<?php
/**
 * Category Products API for zibaloun.com (Woodmart/WooCommerce)
 * Input:
 *    GET ?url=<category-url>&pages=<N>
 * Output:
 *    JSON: { ok, source_category, pages_requested, pages_crawled, products_count, products:[{name,url}], warnings[], errors[] }
 */

header('Content-Type: application/json; charset=utf-8');
header('Access-Control-Allow-Origin: *'); // در صورت نیاز محدود کنید
mb_internal_encoding('UTF-8');

// ---------------- helpers ----------------
function respond($data, $code = 200) {
    http_response_code($code);
    echo json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);
    exit;
}

function fetch_url($url) {
    $ch = curl_init();
    curl_setopt_array($ch, [
        CURLOPT_URL => $url,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS => 5,
        CURLOPT_CONNECTTIMEOUT => 15,
        CURLOPT_TIMEOUT => 25,
        CURLOPT_SSL_VERIFYHOST => 2,
        CURLOPT_SSL_VERIFYPEER => true,
        CURLOPT_HTTPHEADER => [
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language: fa-IR,fa;q=0.9,en-US;q=0.8,en;q=0.7',
            'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36'
        ]
    ]);
    $html = curl_exec($ch);
    $err  = curl_error($ch);
    $code = curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
    curl_close($ch);

    if ($err) return [null, "cURL error: $err"];
    if ($code >= 400 || !$html) return [null, "HTTP $code fetching $url"];
    return [$html, null];
}

function clean_text($s) {
    $s = trim($s);
    $s = preg_replace("/[\\r\\n\\t]+/u", " ", $s);
    $s = preg_replace("/\\s{2,}/u", " ", $s);
    return trim($s);
}

function ensure_trailing_slash($url) {
    // نگه‌داشت querystring اگر بود
    $parts = parse_url($url);
    if (!$parts) return $url;
    $path = $parts['path'] ?? '/';
    if (substr($path, -1) !== '/') $path .= '/';
    $rebuilt = ($parts['scheme'] ?? 'https') . '://' . $parts['host'] . (isset($parts['port'])?":".$parts['port']:"") . $path;
    if (!empty($parts['query'])) $rebuilt .= '?' . $parts['query'];
    return $rebuilt;
}

function build_page_url($base, $pageNum) {
    // page 1 == base; page >=2 == base + "page/$n/"
    // پایه باید اسلش انتهایی داشته باشد
    $base = ensure_trailing_slash($base);
    if ($pageNum <= 1) return $base;
    // اگر قبلاً querystring دارد، قبل از آن page/N/ را اضافه می‌کنیم
    $qpos = strpos($base, '?');
    if ($qpos !== false) {
        $path = substr($base, 0, $qpos);
        $qs   = substr($base, $qpos);
        return rtrim($path, '/') . '/page/' . intval($pageNum) . '/' . $qs;
    }
    return rtrim($base, '/') . '/page/' . intval($pageNum) . '/';
}

function to_abs_url($base, $rel) {
    if (!$rel) return null;
    if (preg_match('~^https?://~i', $rel)) return $rel;
    $parts = parse_url($base);
    if (!$parts) return $rel;
    $scheme = $parts['scheme'] ?? 'https';
    $host   = $parts['host'] ?? '';
    $port   = isset($parts['port']) ? (':'.$parts['port']) : '';
    $root   = "{$scheme}://{$host}{$port}";
    if ($rel[0] === '/') return $root . $rel;
    $path = isset($parts['path']) ? preg_replace('~/[^/]*$~','/',$parts['path']) : '/';
    return $root . $path . $rel;
}

// ---------------- parsing ----------------
function parse_category_products($url, $html) {
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    $dom->loadHTML('<?xml encoding="utf-8" ?>' . $html, LIBXML_NOWARNING|LIBXML_NOERROR);
    $xp  = new DOMXPath($dom);

    $items = [];

    // هر کارت محصول: .product-grid-item در گرید Woodmart
    // نام + لینک: h3.wd-entities-title a
    $nodes = $xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' product-grid-item ')]");
    foreach ($nodes as $card) {
        // عنوان/لینک اصلی
        $a = $xp->query(".//*[contains(concat(' ', normalize-space(@class), ' '), ' wd-entities-title ')]//a", $card);
        if (!$a->length) {
            // فالبک برای قالب‌های مشابه ووکامرس
            $a = $xp->query(".//a[contains(@class,'product-title') or contains(@class,'woocommerce-LoopProduct-link')]", $card);
        }
        if ($a->length) {
            $an = $a->item(0);
            $name = clean_text($an->textContent);
            $href = $an->getAttribute('href');
            $href = to_abs_url($url, $href);
            if ($name && $href) {
                $items[] = ['name' => $name, 'url' => $href];
                continue;
            }
        }

        // فالبک دوم: از لینک تصویر
        $imgLink = $xp->query(".//a[contains(@class,'product-image-link')]", $card);
        if ($imgLink->length) {
            $lnk = $imgLink->item(0)->getAttribute('href');
            $lnk = to_abs_url($url, $lnk);
            // نام را اگر عنوان نبود، از alt تصویر بگیریم
            $img = $xp->query(".//img", $card);
            $name = null;
            if ($img->length) {
                $alt = clean_text($img->item(0)->getAttribute('alt'));
                if ($alt) $name = $alt;
            }
            if ($lnk) $items[] = ['name' => $name ?: '(بدون عنوان)', 'url' => $lnk];
        }
    }

    return $items;
}

// ---------------- entrypoint ----------------
$url   = isset($_GET['url']) ? trim($_GET['url']) : '';
$pages = isset($_GET['pages']) ? intval($_GET['pages']) : 1;

if (!$url) respond(['ok'=>false,'error'=>'Missing ?url parameter'], 400);
if (!preg_match('~^https?://~i', $url)) respond(['ok'=>false,'error'=>'Invalid URL'], 400);
if ($pages < 1) $pages = 1;
if ($pages > 50) $pages = 50; // سقف معقول

// امنیت/کنترل دامنه و ساختار دسته‌بندی
$host = parse_url($url, PHP_URL_HOST);
$path = parse_url($url, PHP_URL_PATH);
$allowedHosts = ['zibaloun.com','www.zibaloun.com'];
if (!in_array($host, $allowedHosts, true)) {
    respond(['ok'=>false,'error'=>'Host not allowed. Only zibaloun.com supported.'], 403);
}
if (strpos($path, '/product-category/') === false) {
    respond(['ok'=>false,'error'=>'URL must be a WooCommerce category (contains /product-category/).'], 400);
}

$warnings = [];
$errors   = [];
$allItems = [];
$seenUrls = [];

$pagesCrawled = 0;
for ($p = 1; $p <= $pages; $p++) {
    $pageUrl = build_page_url($url, $p);
    list($html, $err) = fetch_url($pageUrl);
    if ($err || !$html) {
        $errors[] = "Page $p: $err";
        continue;
    }
    $pagesCrawled++;

    $items = parse_category_products($pageUrl, $html);
    if (!$items) {
        $warnings[] = "Page $p: no products parsed";
    }

    foreach ($items as $it) {
        $key = $it['url'] ?: $it['name'];
        if (!$key) continue;
        if (!isset($seenUrls[$key])) {
            $seenUrls[$key] = true;
            $allItems[] = $it;
        }
    }

    // (اختیاری) کمی مکث برای ملایمت با سرور
    // usleep(200000); // 200ms
}

respond([
    'ok' => true,
    'source_category' => ensure_trailing_slash($url),
    'pages_requested' => $pages,
    'pages_crawled' => $pagesCrawled,
    'products_count' => count($allItems),
    'products' => $allItems,
    'warnings' => $warnings,
    'errors' => $errors
], 200);