<?php
declare(strict_types=1);

/**
 * Product Scraper API – Extended (PHP 7+)
 * Usage:
 *   game.php?url=https://example.com/product/123
 *   game.php?url=...&debug=1
 *
 * Output: application/json (UTF-8)
 */

mb_internal_encoding('UTF-8');
header('Content-Type: application/json; charset=utf-8');

function error_out(int $code, string $msg, array $extra = []) {
    http_response_code($code);
    echo json_encode(['ok' => false, 'error' => $msg] + $extra, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);
    exit;
}

$url   = $_GET['url']   ?? '';
$debug = isset($_GET['debug']) && $_GET['debug'] !== '0';

if (!$url || !filter_var($url, FILTER_VALIDATE_URL)) {
    error_out(400, 'پارامتر url نامعتبر است.');
}

/** Fetch HTML via cURL */
function fetch_html(string $url): array {
    $ch = curl_init($url);
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS      => 5,
        CURLOPT_CONNECTTIMEOUT => 12,
        CURLOPT_TIMEOUT        => 25,
        CURLOPT_ENCODING       => '', // gzip/deflate/br
        CURLOPT_USERAGENT      => 'Mozilla/5.0 (compatible; ProductScraper/1.2; +https://localhost)',
        CURLOPT_SSL_VERIFYPEER => true,
        CURLOPT_SSL_VERIFYHOST => 2,
        CURLOPT_HEADER         => true,
        CURLOPT_HTTPHEADER     => [
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language: fa-IR,fa;q=0.9,en-US;q=0.8,en;q=0.7',
        ],
    ]);
    $resp = curl_exec($ch);
    if ($resp === false) {
        $err = curl_error($ch);
        $info = curl_getinfo($ch);
        curl_close($ch);
        return [null, 0, $err, '', [], $info];
    }
    $status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
    $headers_raw = substr($resp, 0, $header_size);
    $body = substr($resp, $header_size);
    $info = curl_getinfo($ch);
    curl_close($ch);

    // Parse headers
    $headers = [];
    foreach (preg_split("/\r\n|\n|\r/", $headers_raw) as $line) {
        if (strpos($line, ':') !== false) {
            [$k, $v] = array_map('trim', explode(':', $line, 2));
            $headers[strtolower($k)] = $v;
        }
    }
    return [$body, $status, null, $headers_raw, $headers, $info];
}

[$html, $status, $err, $headers_raw, $headers, $fetch_info] = fetch_html($url);
if ($err) error_out(502, 'خطا در دریافت صفحه: ' . $err, $debug ? ['fetch_info' => $fetch_info] : []);
if ($status >= 400 || !$html) error_out($status ?: 502, 'پاسخ نامعتبر از سرور مقصد.', $debug ? ['headers' => $headers] : []);

/** Load DOM */
libxml_use_internal_errors(true);
$dom = new DOMDocument();
@$dom->loadHTML('<?xml encoding="utf-8" ?>' . $html);
$xpath = new DOMXPath($dom);

/** Helpers */
function get_attr(?DOMElement $n, string $attr): ?string {
    return $n && $n->hasAttribute($attr) ? trim($n->getAttribute($attr)) : null;
}
function node_text(?DOMNode $n): string {
    return $n ? trim(preg_replace('/\s+/u', ' ', $n->textContent)) : '';
}
function abs_url(string $base, string $possiblyRelative): string {
    if (!$possiblyRelative) return $possiblyRelative;
    if (preg_match('~^https?://~i', $possiblyRelative)) return $possiblyRelative;
    if (strpos($possiblyRelative, '//') === 0) {
        $p = parse_url($base);
        return ($p['scheme'] ?? 'https') . ':' . $possiblyRelative;
    }
    $parts = parse_url($base);
    if (!$parts) return $possiblyRelative;
    $scheme = $parts['scheme'] ?? 'https';
    $host   = $parts['host'] ?? '';
    $port   = isset($parts['port']) ? ':' . $parts['port'] : '';
    $path   = isset($parts['path']) ? preg_replace('~/[^/]*$~', '/', $parts['path']) : '/';
    if (strpos($possiblyRelative, '/') === 0) $path = '';
    $abs = "$scheme://$host$port$path$possiblyRelative";
    $abs = preg_replace('~/\./~', '/', $abs);
    while (preg_match('~/[^/]+/\.\./~', $abs)) $abs = preg_replace('~/[^/]+/\.\./~', '/', $abs);
    return $abs;
}
function to_number(?string $s): ?float {
    if ($s === null) return null;
    $s = trim($s);
    if ($s === '') return null;
    $s = preg_replace('/[,\x{066C}\x{066B}\x{200F}\s]/u', '', $s);
    $replacements = [
        '۰'=>'0','۱'=>'1','۲'=>'2','۳'=>'3','۴'=>'4','۵'=>'5','۶'=>'6','۷'=>'7','۸'=>'8','۹'=>'9',
        '٠'=>'0','١'=>'1','٢'=>'2','٣'=>'3','٤'=>'4','٥'=>'5','٦'=>'6','٧'=>'7','٨'=>'8','٩'=>'9',
    ];
    $s = strtr($s, $replacements);
    $s = preg_replace('/[^0-9\.]/', '', $s);
    return $s === '' ? null : (float)$s;
}
function contains_persian_toman(string $t): bool {
    return (mb_strpos($t, 'تومان') !== false) || (mb_strpos($t, 'تـومان') !== false);
}
function contains_irr_code(string $t): bool {
    return stripos($t, 'IRR') !== false;
}

/** Extract JSON-LD (Product & BreadcrumbList) */
function extract_jsonld_products(DOMXPath $xp): array {
    $data = [];
    foreach ($xp->query('//script[@type="application/ld+json"]') as $script) {
        $json = trim($script->textContent);
        if ($json === '') continue;
        $decoded = json_decode($json, true);
        if (!$decoded) {
            $json = preg_replace('/,\s*([\]}])/m', '$1', $json); // strip trailing commas
            $decoded = json_decode($json, true);
        }
        if (!$decoded) continue;
        $items = (isset($decoded['@type']) || isset($decoded['@context'])) ? [$decoded] : (is_array($decoded) ? $decoded : []);
        foreach ($items as $it) {
            if (!is_array($it)) continue;
            $type = $it['@type'] ?? null;
            if (is_array($type)) $type = implode(',', $type);
            if ($type && stripos($type, 'Product') !== false) {
                $data[] = $it;
            } elseif ($type && stripos($type, 'BreadcrumbList') !== false) {
                $data[] = $it;
            }
        }
    }
    return $data;
}

/** Extract OpenGraph & meta */
function extract_meta(DOMXPath $xp): array {
    $meta = [];
    foreach (['og:title','og:description','og:image','og:url','product:price:amount','product:price:currency'] as $p) {
        $n = $xp->query("//meta[@property='$p']")->item(0);
        if ($n) $meta[$p] = get_attr($n, 'content');
    }
    foreach (['description','keywords'] as $n) {
        $m = $xp->query("//meta[@name='$n']")->item(0);
        if ($m) $meta[$n] = get_attr($m, 'content');
    }
    $title = $xp->query('//title')->item(0);
    if ($title) $meta['title'] = node_text($title);
    return $meta;
}

/** DOM price guessers */
function guess_prices(DOMXPath $xp): array {
    $candidates = [];
    $paths = [
        "//*[@itemprop='price']",
        "//*[contains(@class,'price') and not(contains(@class,'old'))]",
        "//*[@id='price']",
        "//meta[@itemprop='price']/@content",
        "//span[contains(@class,'amount')]",
        "//input[@name='price_value']/@value",
    ];
    foreach ($paths as $p) {
        foreach ($xp->query($p) as $node) {
            $val = $node instanceof DOMAttr ? $node->value : node_text($node);
            $val = trim($val);
            if ($val !== '' && preg_match('/[0-9۰-۹]+/', $val)) $candidates[] = $val;
        }
    }
    $oldCandidates = [];
    foreach ([
        "//*[contains(@class,'old') and contains(@class,'price')]",
        "//*[@itemprop='price']/*[contains(@class,'old')]",
        "//*[contains(@class,'price-old')]"
    ] as $p) {
        foreach ($xp->query($p) as $node) {
            $val = node_text($node);
            if (preg_match('/[0-9۰-۹]+/', $val)) $oldCandidates[] = $val;
        }
    }
    $unit_hint = null;
    foreach ($xp->query("//*[contains(@class,'price') or @id='price']") as $n) {
        $txt = node_text($n);
        if ($txt !== '') {
            if (contains_persian_toman($txt)) { $unit_hint = 'TOMAN'; break; }
            if (contains_irr_code($txt))      { $unit_hint = 'IRR';   break; }
        }
    }
    return [
        'raw'        => array_values(array_unique($candidates)),
        'old_raw'    => array_values(array_unique($oldCandidates)),
        'value'      => isset($candidates[0]) ? to_number($candidates[0]) : null,
        'old_value'  => isset($oldCandidates[0]) ? to_number($oldCandidates[0]) : null,
        'unit_hint'  => $unit_hint,
    ];
}

/** Normalize final price */
function normalize_price(array $product, array $meta, array $g): array {
    $price = $product['price'] ?? ['value'=>null,'currency'=>null,'raw'=>null,'old_value'=>null,'unit_hint'=>null,'value_irr'=>null];
    if (!empty($product['offers'])) {
        $first = $product['offers'][0];
        $price['value']    = $first['price'] ?? $price['value'];
        $price['currency'] = $first['currency'] ?? ($meta['product:price:currency'] ?? $price['currency']);
    } else {
        $price['value']     = $price['value'] ?? $g['value'];
        $price['old_value'] = $price['old_value'] ?? $g['old_value'];
        $price['raw']       = $price['raw'] ?? ($g['raw'][0] ?? null);
        $price['currency']  = $price['currency'] ?? ($meta['product:price:currency'] ?? null);
    }
    $hint = $g['unit_hint'] ?? null;
    if (!$hint && !empty($price['raw'])) {
        $hint = contains_persian_toman($price['raw']) ? 'TOMAN' : (contains_irr_code($price['raw']) ? 'IRR' : null);
    }
    $price['unit_hint'] = $price['unit_hint'] ?? $hint;

    $value = $price['value'];
    if ($value !== null) {
        if ($price['currency']) {
            if (strtoupper($price['currency']) === 'IRR') {
                $price['value_irr'] = (float)$value;
            } else {
                $price['value_irr'] = null;
            }
        } else {
            if ($price['unit_hint'] === 'TOMAN') {
                $price['currency']  = 'IRR';
                $price['value_irr'] = (float)$value * 10.0;
            } elseif ($price['unit_hint'] === 'IRR') {
                $price['currency']  = 'IRR';
                $price['value_irr'] = (float)$value;
            } else {
                $price['value_irr'] = null;
            }
        }
    }
    return $price;
}

/** Images */
function collect_images(DOMXPath $xp, string $baseUrl): array {
    $imgs = [];
    $sel = [
        "//meta[@property='og:image']/@content",
        "//link[@rel='image_src']/@href",
        "//img[contains(@class,'product') or contains(@id,'product') or contains(@src,'product')]/@src",
        "//img[contains(@class,'gallery') or contains(@class,'zoom')]/@src",
        "//img[@data-src]/@data-src"
    ];
    foreach ($sel as $q) {
        foreach ($xp->query($q) as $n) {
            $src = $n->value;
            if ($src) $imgs[] = abs_url($baseUrl, $src);
        }
    }
    $seen = [];
    $out = [];
    foreach ($imgs as $u) {
        $k = mb_strtolower($u);
        if (isset($seen[$k])) continue;
        $seen[$k] = true;
        $out[] = $u;
    }
    return $out;
}

/** Specs (includes «خصوصیات کلیدی») */
function extract_specs(DOMXPath $xp, bool $debug = false): array {
    $specs = [];
    $trace = ['kv_divs'=>0, 'attr_pairs'=>[], 'tables'=>0, 'dls'=>0, 'lists'=>0];

    // (0) Key/Value rows under "خصوصیات کلیدی"
    $kvRows = $xp->query("//h6[contains(normalize-space(.), 'خصوصیات کلیدی')]/following::*[self::div][contains(@class,'mb-2')][position() <= 80]");
    foreach ($kvRows as $row) {
        $spans = [];
        foreach ($row->getElementsByTagName('span') as $sp) {
            $txt = trim(preg_replace('/\s+/u', ' ', $sp->textContent));
            if ($txt !== '') $spans[] = $txt;
            if (count($spans) >= 2) break;
        }
        if (count($spans) >= 2) {
            $k = preg_replace('/[:：]\s*$/u', '', $spans[0]);
            $v = $spans[1];
            if ($k !== '' && $v !== '') {
                $specs[$k] = $v;
                $trace['kv_divs']++;
                if ($debug) $trace['attr_pairs'][] = mb_substr("{$k} => {$v}", 0, 160);
            }
        }
    }

    // (1) .attribute-name / .attribute-value
    foreach ($xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' attribute-name ')]") as $nameEl) {
        $key = trim(preg_replace('/\s+/u', ' ', $nameEl->textContent));
        if ($key === '') continue;
        $valEl = $xp->query("following-sibling::*[contains(concat(' ', normalize-space(@class), ' '), ' attribute-value ')][1]", $nameEl)->item(0);
        if (!$valEl && $nameEl->parentNode instanceof DOMElement) {
            $valEl = $xp->query(".//*[contains(concat(' ', normalize-space(@class), ' '), ' attribute-value ')][1]", $nameEl->parentNode)->item(0);
        }
        if (!$valEl && $nameEl->parentNode && $nameEl->parentNode->parentNode instanceof DOMElement) {
            $valEl = $xp->query(".//*[contains(concat(' ', normalize-space(@class), ' '), ' attribute-value ')][1]", $nameEl->parentNode->parentNode)->item(0);
        }
        $val = $valEl ? trim(preg_replace('/\s+/u', ' ', $valEl->textContent)) : '';
        if ($val !== '') {
            $kk = preg_replace('/[:：]\s*$/u', '', $key);
            $specs[$kk] = $val;
            if ($debug) $trace['attr_pairs'][] = mb_substr("{$kk} => {$val}", 0, 160);
        }
    }

    // (2) Tables
    $rows = $xp->query("//table[contains(@class,'spec') or contains(@class,'attribute') or contains(@class,'detail') or contains(@id,'spec')]//tr");
    if ($rows && $rows->length) $trace['tables'] = $rows->length;
    foreach ($rows as $tr) {
        $cells = [];
        foreach ($tr->childNodes as $td) {
            if ($td instanceof DOMElement && in_array(strtolower($td->tagName), ['td','th'], true)) {
                $cells[] = trim(preg_replace('/\s+/u', ' ', $td->textContent));
            }
        }
        if (count($cells) >= 2 && $cells[0] !== '' && $cells[1] !== '') {
            $kk = preg_replace('/[:：]\s*$/u', '', $cells[0]);
            $specs[$kk] = $cells[1];
        }
    }

    // (3) Definition lists
    foreach ($xp->query("//dl[contains(@class,'spec') or contains(@class,'attribute') or contains(@id,'spec') or self::dl]") as $dl) {
        $dts = $dl->getElementsByTagName('dt');
        foreach ($dts as $dt) {
            $dd = null;
            for ($s = $dt->nextSibling; $s; $s = $s->nextSibling) {
                if ($s instanceof DOMElement && strtolower($s->tagName) === 'dd') { $dd = $s; break; }
                if ($s instanceof DOMElement && strtolower($s->tagName) === 'dt') { break; }
            }
            $k = trim(preg_replace('/\s+/u', ' ', $dt->textContent));
            $v = $dd ? trim(preg_replace('/\s+/u', ' ', $dd->textContent)) : '';
            if ($k !== '' && $v !== '') $specs[preg_replace('/[:：]\s*$/u','',$k)] = $v;
        }
    }

    // (4) Lists: <ul class="spec/feature/..."><li>key: value</li>
    $lis = $xp->query("//ul[contains(@class,'spec') or contains(@class,'feature') or contains(@id,'spec')]//li");
    foreach ($lis as $li) {
        $t = trim(preg_replace('/\s+/u', ' ', $li->textContent));
        if ($t === '') continue;
        if (strpos($t, ':') !== false || mb_strpos($t, '：') !== false) {
            $parts = preg_split('/[:：]/u', $t, 2);
            $k = trim($parts[0] ?? '');
            $v = trim($parts[1] ?? '');
            if ($k !== '' && $v !== '') $specs[preg_replace('/[:：]\s*$/u','',$k)] = $v;
        } else {
            $specs[] = $t;
        }
    }

    return $debug ? ['data' => $specs, '_trace' => $trace] : $specs;
}

/** Variants/options (OpenCart + generic) */
function extract_variants(DOMXPath $xp): array {
    $vars = [];

    // (A) selects with variant/option in name/id/class
    foreach ($xp->query("//select[contains(translate(@name,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'variant') or contains(translate(@name,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'option') or contains(@id,'variant') or contains(@class,'variant')]") as $sel) {
        $name = get_attr($sel, 'name') ?? get_attr($sel, 'id') ?? 'option';
        $options = [];
        foreach ($sel->getElementsByTagName('option') as $opt) {
            $label = node_text($opt);
            $val = get_attr($opt, 'value');
            if ($label !== '' || $val !== '') $options[] = ['label'=>$label, 'value'=>$val];
        }
        if ($options) $vars[] = ['name' => $name, 'options' => $options];
    }

    // (B) OpenCart: select name="option[ID]"
    foreach ($xp->query("//select[starts-with(translate(@name,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'option[')]") as $sel) {
        $name = get_attr($sel, 'name') ?? get_attr($sel, 'id') ?? 'option';
        $options = [];
        foreach ($sel->getElementsByTagName('option') as $opt) {
            $label = node_text($opt);
            $val = get_attr($opt, 'value');
            if ($label !== '' || $val !== '') $options[] = ['label'=>$label, 'value'=>$val];
        }
        if ($options) $vars[] = ['name' => $name, 'options' => $options];
    }

    // (C) OpenCart: radio/checkbox inputs name="option[ID]"
    $inputs = $xp->query("//input[(translate(@type,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz')='radio' or translate(@type,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz')='checkbox') and starts-with(translate(@name,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'option[')]");
    $grouped = [];
    foreach ($inputs as $inp) {
        $name  = get_attr($inp, 'name') ?? 'option';
        $value = get_attr($inp, 'value') ?? '';
        $labelText = '';
        $id = get_attr($inp, 'id');
        if ($id) {
            $lbl = $xp->query("//label[@for='{$id}']")->item(0);
            if ($lbl) $labelText = node_text($lbl);
        }
        if ($labelText === '' && $inp->parentNode instanceof DOMElement && strtolower($inp->parentNode->tagName) === 'label') {
            $labelText = node_text($inp->parentNode);
        }
        if ($labelText === '') $labelText = $value;
        $grouped[$name][] = ['label' => $labelText, 'value' => $value];
    }
    foreach ($grouped as $name => $options) {
        if ($options) $vars[] = ['name' => $name, 'options' => $options];
    }

    // (D) swatches (generic)
    foreach ($xp->query("//*[contains(@class,'swatch') or contains(@class,'color') or contains(@class,'size')]//*[self::a or self::button or self::span]") as $el) {
        $txt = node_text($el);
        if ($txt) $vars[] = ['name' => 'swatch', 'options' => [['label'=>$txt,'value'=>$txt]]];
    }

    return $vars;
}

/** Warranty */
function extract_warranty(DOMXPath $xp): ?string {
    $nodes = $xp->query("//*[contains(text(),'گارانتی') or contains(translate(text(),'WARRANTY','warranty'),'warranty')]");
    foreach ($nodes as $n) {
        $t = node_text($n);
        if ($t !== '') {
            $t = preg_replace('/\s+/', ' ', $t);
            $t = preg_replace('/[:：]\s*$/u', '', $t);
            return $t;
        }
    }
    return null;
}

/** Breadcrumbs */
function extract_breadcrumbs(DOMXPath $xp, array $jsonlds): array {
    $out = [];
    foreach ($jsonlds as $blk) {
        if (($blk['@type'] ?? '') === 'BreadcrumbList' && isset($blk['itemListElement']) && is_array($blk['itemListElement'])) {
            foreach ($blk['itemListElement'] as $it) {
                $name = $it['name'] ?? ($it['item']['name'] ?? null);
                $url  = $it['item']['@id'] ?? $it['item']['url'] ?? null;
                if ($name) $out[] = ['name'=>$name, 'url'=>$url ?? null];
            }
        }
    }
    if ($out) return $out;
    foreach ($xp->query("//*[contains(@class,'breadcrumb')]//a") as $a) {
        $out[] = ['name'=>node_text($a), 'url'=>get_attr($a, 'href')];
    }
    return $out;
}

/** MAIN */
$meta    = extract_meta($xpath);
$jsonlds = extract_jsonld_products($xpath);

// Base for absolute URLs
$baseForAbs = $url;
$baseNode = $xpath->query('//base')->item(0);
if ($baseNode && ($bh = get_attr($baseNode,'href'))) $baseForAbs = $bh;

$product = [
    'ok'          => true,
    'source_url'  => $url,
    'fetched_at'  => gmdate('c'),
    'title'       => $meta['title'] ?? null,
    'name'        => null,
    'description' => null,
    'brand'       => null,
    'sku'         => null,
    'mpn'         => null,
    'gtin'        => null,
    'categories'  => [],
    'breadcrumbs' => [],
    'images'      => [],
    'price'       => ['value'=>null,'currency'=>null,'raw'=>null,'old_value'=>null,'unit_hint'=>null,'value_irr'=>null],
    'offers'      => [],
    'availability'=> null,
    'seller'      => null,
    'warranty'    => null,
    'rating'      => ['value'=>null,'count'=>null],
    'variants'    => [],
    'specs'       => [],
    'raw'         => ['jsonld'=>[]],
];

foreach ($jsonlds as $blk) {
    if (isset($blk['@type']) && (stripos(is_array($blk['@type']) ? implode(',',$blk['@type']) : $blk['@type'], 'Product') !== false)) {
        $p = $blk;
        $product['raw']['jsonld'][] = $p;
        $product['name']        = $product['name'] ?? ($p['name'] ?? null);
        $product['description'] = $product['description'] ?? ($p['description'] ?? null);
        $product['brand']       = $product['brand'] ?? ($p['brand']['name'] ?? ($p['brand'] ?? null));
        $product['sku']         = $product['sku'] ?? ($p['sku'] ?? null);
        $product['mpn']         = $product['mpn'] ?? ($p['mpn'] ?? null);
        $product['gtin']        = $product['gtin'] ?? ($p['gtin'] ?? ($p['gtin13'] ?? ($p['gtin14'] ?? null)));

        if (!empty($p['image'])) {
            $imgs = is_array($p['image']) ? $p['image'] : [$p['image']];
            foreach ($imgs as $im) { if ($im) $product['images'][] = abs_url($baseForAbs, (string)$im); }
        }

        if (!empty($p['offers'])) {
            $offers = isset($p['offers'][0]) ? $p['offers'] : [$p['offers']];
            foreach ($offers as $of) {
                if (!is_array($of)) continue;
                $product['offers'][] = [
                    'price'       => isset($of['price']) ? (float)$of['price'] : null,
                    'currency'    => $of['priceCurrency'] ?? null,
                    'availability'=> $of['availability'] ?? null,
                    'url'         => $of['url'] ?? null,
                    'seller'      => is_array($of['seller'] ?? null) ? ($of['seller']['name'] ?? null) : ($of['seller'] ?? null),
                    'valid_until' => $of['priceValidUntil'] ?? null,
                ];
            }
        }

        if (!empty($p['aggregateRating'])) {
            $ar = $p['aggregateRating'];
            $product['rating']['value'] = isset($ar['ratingValue']) ? (float)$ar['ratingValue'] : $product['rating']['value'];
            $product['rating']['count'] = isset($ar['reviewCount']) ? (int)$ar['reviewCount'] : $product['rating']['count'];
        }
    }
}

// Breadcrumbs & categories
$product['breadcrumbs'] = extract_breadcrumbs($xpath, $jsonlds);
if ($product['breadcrumbs']) {
    $product['categories'] = array_values(array_filter(array_map(function($b){ return $b['name'] ?? null; }, $product['breadcrumbs'])));
}

// Description fallback
if (!$product['description'] && !empty($meta['og:description'])) $product['description'] = $meta['og:description'];

// Images fallback + OG merge
if (empty($product['images'])) {
    $product['images'] = collect_images($xpath, $baseForAbs);
} else {
    if (!empty($meta['og:image'])) {
        $og = abs_url($baseForAbs, $meta['og:image']);
        if (!in_array($og, $product['images'], true)) $product['images'][] = $og;
    }
}
$product['images'] = (function(array $imgs){
    $seen=[]; $out=[];
    foreach ($imgs as $u){ $k=mb_strtolower($u); if(isset($seen[$k])) continue; $seen[$k]=true; $out[]=$u; }
    return $out;
})($product['images']);

// Price resolve & normalize
$gprices       = guess_prices($xpath);
$product['price'] = normalize_price($product, $meta, $gprices);
if (!$product['availability'] && !empty($product['offers'][0]['availability'])) {
    $product['availability'] = $product['offers'][0]['availability'];
}
if ($product['availability']) {
    if (stripos($product['availability'], 'InStock') !== false) {
        $product['availability'] = 'InStock';
    } elseif (stripos($product['availability'], 'OutOfStock') !== false) {
        $product['availability'] = 'OutOfStock';
    }
}
if (!$product['seller'] && !empty($product['offers'][0]['seller'])) {
    $product['seller'] = $product['offers'][0]['seller'];
}

// Name fallback
if (!$product['name']) {
    $h1 = $xpath->query('//h1')->item(0);
    if ($h1) $product['name'] = node_text($h1);
}
if (!$product['name'] && !empty($meta['og:title'])) $product['name'] = $meta['og:title'];

// Specs / Variants / Warranty
$specs_extracted = extract_specs($xpath, $debug);
if ($debug && is_array($specs_extracted) && isset($specs_extracted['_trace'])) {
    $product['specs'] = $specs_extracted['data'];
    $product['_debug']['specs_trace'] = $specs_extracted['_trace'];
} else {
    $product['specs'] = $specs_extracted;
}
$product['variants'] = extract_variants($xpath);
$product['warranty'] = extract_warranty($xpath);

// Output
echo json_encode($product, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);
