<?php
/**
 * category_api.php — PHP 7+ — WooCommerce Category Lister
 * ورودی:  ?url=<category_url>&pages=<N>&debug=1&insecure=1
 * خروجی:  JSON شامل لیست محصولات {name, url} برای N صفحهٔ اول دسته
 */

declare(strict_types=1);

// ---- Controls ----
$DEBUG       = isset($_GET['debug']) && $_GET['debug'] == '1';
$ALLOW_INSEC = isset($_GET['insecure']) && $_GET['insecure'] == '1';

if ($DEBUG) { error_reporting(E_ALL); ini_set('display_errors', '1'); }
header('Access-Control-Allow-Origin: *');
header('Content-Type: application/json; charset=UTF-8');

// ---- Helpers ----
function fail($code, $msg, $extra = []) {
    http_response_code($code);
    echo json_encode(array_merge(['ok'=>false,'error'=>$msg], $extra), JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES|JSON_PRETTY_PRINT);
    exit;
}
function starts_with($h, $n) { return $n !== '' && strpos((string)$h, (string)$n) === 0; }
function textContent($node) { return trim($node ? $node->textContent : ''); }
function abs_url($base, $rel) {
    if ($rel === '' || parse_url($rel, PHP_URL_SCHEME)) return $rel;
    $bp = parse_url($base);
    if (!$bp) return $rel;
    $scheme = isset($bp['scheme']) ? $bp['scheme'] : 'http';
    $host   = isset($bp['host']) ? $bp['host'] : '';
    $port   = isset($bp['port']) ? ':' . $bp['port'] : '';
    $path   = isset($bp['path']) ? $bp['path'] : '/';
    if (substr($rel, 0, 1) === '/') {
        $path = $rel;
    } else {
        $dir = rtrim(substr($path, 0, strrpos($path, '/') !== false ? strrpos($path, '/') : 0), '/');
        $path = $dir . '/' . $rel;
    }
    // normalize ../ and ./
    $segments = [];
    foreach (explode('/', $path) as $part) {
        if ($part === '' || $part === '.') continue;
        if ($part === '..') { array_pop($segments); continue; }
        $segments[] = $part;
    }
    return $scheme . '://' . $host . $port . '/' . implode('/', $segments) . (substr($rel, -1) === '/' ? '/' : '');
}

// ---- Input ----
$catUrl = isset($_GET['url']) ? trim($_GET['url']) : '';
$pages  = isset($_GET['pages']) ? (int)$_GET['pages'] : 1;

if ($catUrl === '' || !filter_var($catUrl, FILTER_VALIDATE_URL)) {
    fail(400, 'Invalid or missing ?url=');
}
if ($pages < 1) $pages = 1;
$pages = min($pages, 50); // سقف محافظه‌کارانه

// ---- Env checks ----
$need = ['curl'=>extension_loaded('curl'), 'dom'=>extension_loaded('dom'), 'libxml'=>extension_loaded('libxml')];
if (!$need['dom'] || !$need['libxml']) fail(500, 'DOM/XML extensions are required', ['extensions'=>$need]);
if (!$need['curl'] && !ini_get('allow_url_fopen')) fail(500, 'Require cURL or allow_url_fopen', ['extensions'=>$need]);

// ---- Fetchers ----
function fetch_curl($url, $timeout = 20, $insecure = false) {
    $ch = curl_init($url);
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS      => 6,
        CURLOPT_CONNECTTIMEOUT => $timeout,
        CURLOPT_TIMEOUT        => $timeout,
        CURLOPT_ENCODING       => '',
        CURLOPT_USERAGENT      => 'CategoryLister/1.0 PHP/' . PHP_VERSION,
        CURLOPT_SSL_VERIFYPEER => !$insecure,
        CURLOPT_SSL_VERIFYHOST => $insecure ? 0 : 2,
        CURLOPT_HTTPHEADER     => ['Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'],
    ]);
    $body = curl_exec($ch);
    $err  = curl_error($ch);
    $code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
    $final= curl_getinfo($ch, CURLINFO_EFFECTIVE_URL) ?: $url;
    curl_close($ch);
    return [$code, $body, $err, $final];
}
function fetch_fopen($url, $timeout = 20) {
    $ctx = stream_context_create([
        'http' => ['timeout'=>$timeout,'follow_location'=>1,'header'=>"User-Agent: CategoryLister/1.0 PHP/".PHP_VERSION."\r\n"],
        'ssl'  => ['verify_peer'=>true,'verify_peer_name'=>true]
    ]);
    $body = @file_get_contents($url, false, $ctx);
    $meta = isset($http_response_header) ? $http_response_header : [];
    $code = 0; foreach ($meta as $h) { if (preg_match('#^HTTP/\S+\s+(\d+)#', $h, $m)) { $code = (int)$m[1]; break; } }
    return [$code ?: 200, $body, $meta, $url];
}
function get_html($url, $insecure = false) {
    if (extension_loaded('curl')) return fetch_curl($url, 20, $insecure);
    return fetch_fopen($url, 20);
}
function normalize_html($html) {
    if (extension_loaded('mbstring') && !mb_detect_encoding($html, 'UTF-8', true)) {
        $html = mb_convert_encoding($html, 'UTF-8', 'auto');
    }
    if (stripos($html, '<!doctype') === false) {
        $html = "<!doctype html><html><head><meta charset='utf-8'></head><body>{$html}</body></html>";
    }
    return $html;
}
function xpath_from_html($html) {
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    $dom->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR);
    libxml_clear_errors();
    return new DOMXPath($dom);
}

// ---- Build page URLs (supports /page/N/ and ?paged=N) ----
function build_page_urls($baseUrl, $pages) {
    $urls = [];
    // ensure trailing slash on category base (common in WP)
    $base = rtrim($baseUrl, "/") . '/';
    for ($i = 1; $i <= $pages; $i++) {
        if ($i === 1) {
            $urls[] = $base; // page 1 is the base itself
        } else {
            // primary: /page/N/
            $urls[] = $base . 'page/' . $i . '/';
        }
    }
    return $urls;
}
function alt_paged_url($url, $page) {
    // fallback pattern ?paged=N
    if (strpos($url, '?') !== false) return $url . '&paged=' . $page;
    return rtrim($url, '/') . '/?paged=' . $page;
}

// ---- Product extractors ----
function extract_products_from_document(DOMXPath $xp, $baseUrl) {
    $items = [];

    // 1) الگوی اصلی ووکامرس: ul.products li.product
    $nodes = $xp->query("//ul[contains(@class,'products')]//li[contains(@class,'product')]");
    if ($nodes && $nodes->length > 0) {
        foreach ($nodes as $li) {
            // لینک
            $a = $xp->query(".//a[contains(@class,'woocommerce-LoopProduct-link') or contains(@href,'/product/')]", $li)->item(0);
            $href = $a ? trim($a->getAttribute('href')) : '';
            if ($href !== '' && strpos($href, 'http') !== 0) $href = abs_url($baseUrl, $href);

            // نام
            $h2 = $xp->query(".//h2[contains(@class,'woocommerce-loop-product__title')]", $li)->item(0);
            $name = $h2 ? textContent($h2) : ($a ? trim($a->getAttribute('title')) : '');
            if ($name === '' && $a) $name = trim($a->textContent);

            if ($href !== '' && $name !== '') {
                $items[] = ['name' => $name, 'url' => $href];
            }
        }
    }

    // 2) فallback: هر کارت با data-product
    if (empty($items)) {
        $cards = $xp->query("//*[contains(@class,'product') and @data-product_id]");
        foreach ($cards as $c) {
            $a = $xp->query(".//a[contains(@href,'/product/')]", $c)->item(0);
            if (!$a) continue;
            $href = trim($a->getAttribute('href'));
            if ($href !== '' && strpos($href, 'http') !== 0) $href = abs_url($baseUrl, $href);
            $name = textContent($xp->query(".//*[self::h2 or self::h3][contains(@class,'title')]", $c)->item(0));
            if ($name === '') $name = trim($a->getAttribute('title'));
            if ($name === '') $name = trim($a->textContent);
            if ($href !== '' && $name !== '') $items[] = ['name'=>$name, 'url'=>$href];
        }
    }

    // 3) آخرین تلاش: تمام لینک‌های /product/
    if (empty($items)) {
        $as = $xp->query("//a[contains(@href,'/product/')]");
        $seen = [];
        foreach ($as as $a) {
            $href = trim($a->getAttribute('href'));
            if ($href === '') continue;
            if (strpos($href, 'http') !== 0) $href = abs_url($baseUrl, $href);
            if (isset($seen[$href])) continue;
            $name = trim($a->getAttribute('title'));
            if ($name === '') $name = trim($a->textContent);
            if ($name === '') continue;
            $seen[$href] = true;
            $items[] = ['name'=>$name, 'url'=>$href];
        }
    }

    return $items;
}

// ---- Crawl ----
$pageUrls = build_page_urls($catUrl, $pages);
$all = [];
$visited = [];
$fetch_log = [];

for ($i = 0; $i < count($pageUrls); $i++) {
    $purl = $pageUrls[$i];

    // جلوگیری از تکرار
    if (isset($visited[$purl])) continue;
    $visited[$purl] = true;

    list($code, $html, $err, $final) = get_html($purl, $ALLOW_INSEC);
    $fetch_log[] = ['url'=>$purl, 'status'=>$code, 'final'=>$final, 'err'=>$err];

    // اگر صفحه 2 به بعد با /page/N/ جواب نداد، الگوی ?paged=N را امتحان کن
    if (($code >= 400 || !$html) && $i >= 1) {
        $alt = alt_paged_url($catUrl, $i+1);
        list($code, $html, $err, $final) = get_html($alt, $ALLOW_INSEC);
        $fetch_log[] = ['url'=>$alt, 'status'=>$code, 'final'=>$final, 'err'=>$err];
        if ($code >= 400 || !$html) continue;
        $purl = $alt; // برای base URL ساخت لینک‌های نسبی
    }

    if (!$html || $code >= 400) continue;

    $html = normalize_html($html);
    $xp   = xpath_from_html($html);

    $items = extract_products_from_document($xp, $purl);
    // ادغام بدون تکرار
    foreach ($items as $it) {
        $key = $it['url'];
        if (!isset($visited[$key])) {
            $visited[$key] = true;
            $all[] = $it;
        }
    }

    // تاخیر کوتاه برای ادب (در صورت نیاز)
    usleep(150000); // 150ms
}

// ---- Output ----
$out = [
    'ok' => true,
    'fetched' => [
        'requested_url' => $catUrl,
        'pages_requested' => $pages,
        'pages_crawled' => count($pageUrls),
        'retrieved_at' => gmdate('c'),
        'debug' => $DEBUG ? ['log'=>$fetch_log] : null
    ],
    'count' => count($all),
    'products' => $all
];

echo json_encode($out, JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES|JSON_PRETTY_PRINT);
