<?php
header('Content-Type: application/json; charset=utf-8');
mb_internal_encoding('UTF-8');

function buildPageUrl($baseUrl, $page, $pageTemplate = null, $pageParam = 'page') {
    if ($pageTemplate) {
        return str_replace('{page}', $page, $pageTemplate);
    }
    // Append ? or & correctly
    $sep = (parse_url($baseUrl, PHP_URL_QUERY) === null) ? '?' : '&';
    return $baseUrl . $sep . urlencode($pageParam) . '=' . urlencode($page);
}

function fetch($url) {
    $ch = curl_init($url);
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS => 5,
        CURLOPT_CONNECTTIMEOUT => 15,
        CURLOPT_TIMEOUT => 25,
        CURLOPT_USERAGENT => 'Mozilla/5.0 (compatible; CategoryCrawler/1.0; +https://example.com)',
        CURLOPT_SSL_VERIFYPEER => true,
        CURLOPT_SSL_VERIFYHOST => 2,
    ]);
    $html = curl_exec($ch);
    $err  = curl_error($ch);
    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);
    if ($html === false || $code >= 400) {
        throw new Exception("HTTP error fetching $url (code: $code, err: $err)");
    }
    // Normalize to UTF-8 if needed
    if (!mb_detect_encoding($html, 'UTF-8', true)) {
        $html = mb_convert_encoding($html, 'UTF-8', 'auto');
    }
    return $html;
}

function parseDomProducts($html) {
    $products = [];
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    $dom->loadHTML($html);
    $xpath = new DOMXPath($dom);
    // TehranMelody-style cards: <h3 class="dir-ltr mt-4"><a href="...">Name</a></h3>
    $nodes = $xpath->query("//h3[contains(@class,'dir-ltr') and contains(@class,'mt-4')]/a[@href]");
    foreach ($nodes as $a) {
        $name = trim($a->textContent);
        $href = trim($a->getAttribute('href'));
        if ($name !== '' && $href !== '') {
            $products[] = ['name' => $name, 'url' => $href];
        }
    }
    return $products;
}

function parseJsonLdItemList($html) {
    $out = [];
    // Grab all <script type="application/ld+json">
    if (preg_match_all('#<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>#si', $html, $m)) {
        foreach ($m[1] as $json) {
            $json = html_entity_decode($json, ENT_QUOTES | ENT_HTML5, 'UTF-8');
            $data = json_decode($json, true);
            if (!$data) continue;

            // Sometimes it's a graph, sometimes a single object
            $candidates = [];
            if (isset($data['@type'])) $candidates[] = $data;
            if (isset($data['@graph']) && is_array($data['@graph'])) {
                foreach ($data['@graph'] as $g) $candidates[] = $g;
            }
            foreach ($candidates as $obj) {
                if (!is_array($obj)) continue;
                if (isset($obj['@type']) && (stripos($obj['@type'], 'CollectionPage') !== false) && isset($obj['mainEntity']['itemListElement'])) {
                    foreach ((array)$obj['mainEntity']['itemListElement'] as $item) {
                        if (isset($item['name'], $item['url'])) {
                            $out[] = ['name' => trim($item['name']), 'url' => trim($item['url'])];
                        }
                    }
                }
            }
        }
    }
    return $out;
}

function uniqueByUrl($items) {
    $seen = [];
    $out = [];
    foreach ($items as $it) {
        $u = $it['url'];
        if (!isset($seen[$u])) {
            $seen[$u] = true;
            $out[] = $it;
        }
    }
    return $out;
}

try {
    $baseUrl      = isset($_GET['url']) ? trim($_GET['url']) : '';
    $pageTemplate = isset($_GET['page_template']) ? trim($_GET['page_template']) : null; // e.g. https://site/category?sortpage={page}
    $pageParam    = isset($_GET['page_param']) ? trim($_GET['page_param']) : 'page';     // e.g. sortpage
    $start        = isset($_GET['start']) ? max(1, (int)$_GET['start']) : 1;
    $pages        = isset($_GET['pages']) ? max(1, (int)$_GET['pages']) : 1;

    if ($baseUrl === '' || (!filter_var($baseUrl, FILTER_VALIDATE_URL))) {
        throw new Exception("پارامتر url الزامی است و باید یک URL معتبر باشد.");
    }

    $all = [];
    for ($p = 0; $p < $pages; $p++) {
        $pageNum = $start + $p;
        $url = ($pageTemplate)
            ? buildPageUrl('', $pageNum, $pageTemplate, $pageParam)   // ignore base in this mode
            : buildPageUrl($baseUrl, $pageNum, null, $pageParam);

        // اگر اولین صفحه است و صفحه‌بندی شما از صفحه 2 به بعد فعال می‌شود،
        // می‌توانید انتخاب کنید که خود لینک baseUrl را برای صفحه 1 درخواست کنید:
        if ($pageNum === 1 && !$pageTemplate) {
            $url = $baseUrl;
        }

        $html = fetch($url);
        $list = parseDomProducts($html);
        if (empty($list)) {
            // Fallback: JSON-LD ItemList
            $list = parseJsonLdItemList($html);
        }
        $all = array_merge($all, $list);
    }

    $all = uniqueByUrl($all);

    echo json_encode([
        'ok' => true,
        'count' => count($all),
        'products' => $all
    ], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT);
} catch (Exception $e) {
    http_response_code(400);
    echo json_encode([
        'ok' => false,
        'error' => $e->getMessage()
    ], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT);
}