<?php
/**
 * techno.php — Technolife multi-page JSON scraper (never blank, always JSON)
 * PHP 7.4+ compatible
 *
 * Query params (YOUR API):
 *   url              : category URL WITHOUT query (required)
 *   pg               : start page (int, default 1)
 *   pages            : how many pages to scan (int, default 1)
 *   stock            : 1/0 only in-stock (default 0)
 *   stop_when_empty  : 1/0 stop on first empty page (default 1)
 *   debug            : 1/0 include debug info (default 0)
 */

mb_internal_encoding('UTF-8');
header('Content-Type: application/json; charset=utf-8');

// --- Never-blank protections ---
ini_set('display_errors', '1');
error_reporting(E_ALL);

// convert warnings/notices to exceptions so we can JSON them
set_error_handler(function($severity, $message, $file, $line){
    if (!(error_reporting() & $severity)) return false;
    throw new ErrorException($message, 0, $severity, $file, $line);
});

// catch fatals
$__response_sent = false;
register_shutdown_function(function() use (&$__response_sent) {
    if ($__response_sent) return;
    $e = error_get_last();
    if ($e && in_array($e['type'], [E_ERROR, E_PARSE, E_CORE_ERROR, E_COMPILE_ERROR])) {
        http_response_code(500);
        echo json_encode([
            'ok' => false,
            'error' => 'Fatal error',
            'fatal' => [
                'type' => $e['type'],
                'message' => $e['message'],
                'file' => $e['file'],
                'line' => $e['line'],
            ]
        ], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
        $__response_sent = true;
    }
});

if (!function_exists('str_starts_with')) {
    function str_starts_with($haystack, $needle) {
        return $needle !== '' && substr($haystack, 0, strlen($needle)) === $needle;
    }
}
if (!function_exists('array_is_list')) {
    function array_is_list(array $array): bool {
        $i = 0;
        foreach ($array as $k => $_) {
            if ($k !== $i++) return false;
        }
        return true;
    }
}

function send_json($arr, int $code = 200) {
    global $__response_sent;
    if ($__response_sent) return;
    http_response_code($code);
    echo json_encode($arr, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);
    $__response_sent = true;
    exit;
}

function fail(int $code, string $msg, array $extra = []) {
    send_json(array_merge(['ok' => false, 'error' => $msg], $extra), $code);
}

function normalizeUserUrl(string $u): string {
    $prev = trim($u);
    if ($prev === '') return $prev;
    for ($i = 0; $i < 3; $i++) {
        $decoded = rawurldecode($prev);
        if ($decoded === $prev) break;
        $prev = $decoded;
    }
    return $prev;
}

function buildTechnoUrl(string $base, int $pg, bool $inStock): string {
    $q = [];
    // always send page for stability
    $q['page'] = (string)max(1, $pg);
    if ($inStock) $q['only_available'] = 'true';
    return $base . (empty($q) ? '' : ('?' . http_build_query($q)));
}

function httpGet(string $url, bool $debug = false): array {
    $ch = curl_init();
    $headers = [
        'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36',
        'Accept-Language: fa-IR,fa;q=0.9,en-US;q=0.8',
        'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Referer: https://www.technolife.com/',
    ];
    curl_setopt_array($ch, [
        CURLOPT_URL => $url,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS => 5,
        CURLOPT_CONNECTTIMEOUT => 20,
        CURLOPT_TIMEOUT => 30,
        CURLOPT_ENCODING => '',
        CURLOPT_HTTPHEADER => $headers,
    ]);
    if (defined('CURL_IPRESOLVE_V4')) curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);

    $body = curl_exec($ch);
    $err  = curl_error($ch);
    $info = curl_getinfo($ch);
    curl_close($ch);

    if ($body === false) {
        fail(502, "HTTP error: $err", $debug ? ['curl_info' => $info] : []);
    }
    $code = (int)($info['http_code'] ?? 0);
    if ($code >= 400 || $code === 0) {
        $snippet = mb_substr(preg_replace('~\s+~u', ' ', (string)$body), 0, 400);
        fail($code ?: 502, "HTTP $code برای $url", $debug ? ['curl_info' => $info, 'body_snippet' => $snippet] : []);
    }
    return [$body, $info];
}

function makeXPath(string $html): DOMXPath {
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    $dom->loadHTML('<?xml encoding="utf-8" ?>' . $html, LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_COMPACT);
    return new DOMXPath($dom);
}

function extractCategory(DOMXPath $xp): ?string {
    $nodes = $xp->query('//script[@type="application/ld+json"]');
    foreach ($nodes as $n) {
        $txt = trim($n->textContent ?? '');
        if ($txt === '') continue;
        $data = json_decode($txt, true);
        $arr = (is_array($data) && array_is_list($data)) ? $data : [$data];
        foreach ($arr as $obj) {
            if (is_array($obj) && (isset($obj['@type']) && $obj['@type'] === 'BreadcrumbList')) {
                $items = $obj['itemListElement'] ?? [];
                if ($items && is_array($items)) {
                    $last = end($items);
                    $name = $last['item']['name'] ?? ($last['name'] ?? null);
                    if ($name) return trim($name);
                }
            }
        }
    }
    $a = $xp->query('(//nav//ul/li[last()]/a | //ul[contains(@class,"breadcrumb")]/li[last()]/a)[1]');
    if ($a->length) return trim($a->item(0)->textContent);
    $t = $xp->query('//title');
    if ($t->length) return trim($t->item(0)->textContent);
    return null;
}

function hostBase(string $u): string {
    $p = parse_url($u);
    return ($p['scheme'] ?? 'https') . '://' . ($p['host'] ?? '');
}

function extractProducts(DOMXPath $xp, string $hostBase): array {
    $cards = $xp->query('//article//section[.//a[contains(@href,"/product-")]]');
    if ($cards->length === 0) {
        $cards = $xp->query('//a[contains(@href,"/product-")][.//h2 or following::h2[1]]');
    }
    $items = [];
    foreach ($cards as $card) {
        $aNodes = $xp->query('.//a[h2][1]', $card);
        $aNode = $aNodes->length ? $aNodes->item(0) : null;

        $title = null; $href = null;
        if ($aNode) {
            $href = $aNode->getAttribute('href') ?? '';
            $h2 = $xp->query('.//h2', $aNode);
            if ($h2->length) $title = trim(preg_replace('/\s+/u', ' ', $h2->item(0)->textContent));
        }
        if (!$href) {
            $a2 = $xp->query('.//a[contains(@href,"/product-")][1]', $card);
            if ($a2->length) $href = $a2->item(0)->getAttribute('href');
        }
        if (!$title) {
            $h2 = $xp->query('.//h2[1]', $card);
            if ($h2->length) $title = trim(preg_replace('/\s+/u', ' ', $h2->item(0)->textContent));
        }
        if (!$href) continue;

        if (!str_starts_with($href, 'http')) {
            $href = rtrim($hostBase, '/') . '/' . ltrim($href, '/');
        }
        if (!$title) $title = $href;

        $items[] = ['title' => $title, 'url' => $href];
    }
    return $items;
}

function extractHeaderTotal(DOMXPath $xp): ?int {
    $node = $xp->query('//div[contains(@class,"h-12") and .//p[contains(text(),"کالا")]]//p');
    if ($node->length) {
        $txt = trim($node->item(0)->textContent);
        if (preg_match('~(\d+)\s*کالا~u', $txt, $m)) return (int)$m[1];
    }
    return null;
}

try {
    $urlRaw = $_GET['url'] ?? '';
    $url = normalizeUserUrl($urlRaw);
    $pgStart = isset($_GET['pg']) ? max(1, (int)$_GET['pg']) : 1;
    $pagesToScan = isset($_GET['pages']) ? max(1, (int)$_GET['pages']) : 1;
    $stock = isset($_GET['stock']) ? ((int)$_GET['stock'] ? 1 : 0) : 0;
    $stopWhenEmpty = isset($_GET['stop_when_empty']) ? ((int)$_GET['stop_when_empty'] ? 1 : 0) : 1;
    $debug = isset($_GET['debug']) ? ((int)$_GET['debug'] ? 1 : 0) : 0;

    if ($url === '') fail(400, 'پارامتر url لازم است (لینک دسته‌بندی بدون کوئری).');

    $parts = parse_url($url);
    if (!$parts || empty($parts['scheme']) || empty($parts['host']) || empty($parts['path'])) {
        fail(400, 'آدرس نامعتبر است.', $debug ? ['got_url' => $url] : []);
    }
    $host = strtolower($parts['host']);
    if (!preg_match('~(^|\.)technolife\.com$~i', $host)) {
        fail(400, 'فقط دامنه technolife.com مجاز است.');
    }
    $baseCatUrl = ($parts['scheme'] ?? 'https') . '://' . $host . $parts['path'];

    $hb = hostBase($baseCatUrl);
    $metaCategory = null;
    $headerTotal = null;
    $aggregated = [];
    $seen = [];
    $totalPagesScanned = 0;
    $pagesWithNoItems = 0;
    $technolifeUrls = [];
    $debugSnippets = [];

    for ($i = 0; $i < $pagesToScan; $i++) {
        $page = $pgStart + $i;
        $fetchUrl = buildTechnoUrl($baseCatUrl, $page, (bool)$stock);
        $technolifeUrls[] = $fetchUrl;

        list($html, $info) = httpGet($fetchUrl, (bool)$debug);
        if (trim($html) === '') {
            $pagesWithNoItems++;
            if ($stopWhenEmpty) break;
            continue;
        }

        $xp = makeXPath($html);
        if ($metaCategory === null) $metaCategory = extractCategory($xp);
        if ($headerTotal === null)  $headerTotal  = extractHeaderTotal($xp);

        $items = extractProducts($xp, $hb);
        $count = count($items);
        $totalPagesScanned++;

        if ($debug) {
            $debugSnippets[] = [
                'url' => $fetchUrl,
                'http_code' => (int)($info['http_code'] ?? 0),
                'found_items' => $count,
                'html_sample' => mb_substr(preg_replace('~\s+~u', ' ', $html), 0, 400)
            ];
        }

        if ($count === 0) {
            $pagesWithNoItems++;
            if ($stopWhenEmpty) break;
        } else {
            foreach ($items as $it) {
                $key = md5($it['url']);
                if (!isset($seen[$key])) {
                    $seen[$key] = true;
                    $aggregated[] = $it;
                }
            }
        }

        usleep(250000);
    }

    $out = [
        'ok' => true,
        'meta' => [
            'category'                  => $metaCategory,
            'user_category_url'         => $baseCatUrl,
            'start_page'                => $pgStart,
            'pages_requested'           => $pagesToScan,
            'stop_when_empty'           => (bool)$stopWhenEmpty,
            'in_stock'                  => (bool)$stock,
            'technolife_urls_used'      => $technolifeUrls,
            'pages_scanned'             => $totalPagesScanned,
            'pages_with_no_items'       => $pagesWithNoItems,
            'reported_total_in_header'  => $headerTotal,
            'total_products_aggregated' => count($aggregated),
        ],
        'products' => $aggregated,
    ];

    if ($debug) $out['debug'] = $debugSnippets;

    send_json($out, 200);

} catch (Throwable $e) {
    $trace = $e->getTrace();
    $cleanTrace = [];
    foreach ($trace as $f) {
        $cleanTrace[] = [
            'file' => $f['file'] ?? null,
            'line' => $f['line'] ?? null,
            'function' => $f['function'] ?? null,
        ];
    }
    fail(500, 'Unhandled exception: ' . $e->getMessage(), [
        'exception' => [
            'type' => get_class($e),
            'file' => $e->getFile(),
            'line' => $e->getLine(),
            'trace' => $cleanTrace,
        ]
    ]);
}