<?php
/**
 * EXO.ir Single Product Scraper (PHP 7.x)
 * Input : ?url=<product-url>
 * Output: JSON with name, price, images, features, description, etc.
 */

declare(strict_types=1);

header('Content-Type: application/json; charset=utf-8');
header('Access-Control-Allow-Origin: *');
date_default_timezone_set('Asia/Tehran');

function respond(array $arr, int $code = 200): void {
    http_response_code($code);
    echo json_encode($arr, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);
    exit;
}

function normalizeUrl(string $url): string {
    $parts = parse_url($url);
    if (!$parts) return $url;
    $scheme = isset($parts['scheme']) ? $parts['scheme'].'://' : '';
    $host   = isset($parts['host']) ? $parts['host'] : '';
    $port   = isset($parts['port']) ? ':'.$parts['port'] : '';
    $path   = isset($parts['path']) ? implode('/', array_map('rawurlencode', array_map('urldecode', explode('/', $parts['path'])))) : '';
    $query  = isset($parts['query']) ? '?'.$parts['query'] : '';
    $frag   = isset($parts['fragment']) ? '#'.$parts['fragment'] : '';
    return $scheme.$host.$port.$path.$query.$frag;
}

function curl_get(string $url): array {
    $ch = curl_init();
    curl_setopt_array($ch, [
        CURLOPT_URL            => $url,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS      => 5,
        CURLOPT_CONNECTTIMEOUT => 15,
        CURLOPT_TIMEOUT        => 25,
        CURLOPT_ENCODING       => '',
        CURLOPT_SSL_VERIFYPEER => false,
        CURLOPT_SSL_VERIFYHOST => 2,
        CURLOPT_HTTPHEADER     => [
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
            'Accept-Language: fa-IR,fa;q=0.9,en;q=0.5',
            'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36'
        ],
    ]);
    $body = curl_exec($ch);
    $info = curl_getinfo($ch);
    $err  = curl_error($ch);
    curl_close($ch);

    if ($body === false || ($info['http_code'] ?? 0) >= 400) {
        respond([
            'error'     => 'خطا در دریافت صفحه محصول.',
            'http_code' => $info['http_code'] ?? null,
            'curl_err'  => $err,
            'url'       => $url
        ], 502);
    }
    return [$body, $info];
}

function xp_first(DOMXPath $xp, string $q) {
    $n = $xp->query($q);
    return ($n && $n->length) ? $n->item(0) : null;
}

function text_of(DOMNode $n): string {
    return trim(preg_replace('/\s+/u', ' ', html_entity_decode($n->textContent, ENT_QUOTES | ENT_HTML5, 'UTF-8')));
}

function parse_jsonld(DOMXPath $xp): array {
    $nodes = $xp->query("//script[@type='application/ld+json']");
    $out = ['product' => null, 'breadcrumbs' => []];
    if (!$nodes) return $out;

    for ($i = 0; $i < $nodes->length; $i++) {
        $raw = trim($nodes->item($i)->textContent);
        $raw = preg_replace('/^[\xEF\xBB\xBF]/', '', $raw); // BOM
        $data = json_decode($raw, true);
        if (!is_array($data)) continue;

        $queue = [$data];
        while ($queue) {
            $cur = array_shift($queue);
            if (is_array($cur)) {
                if (isset($cur['@type'])) {
                    if ($cur['@type'] === 'Product' && !$out['product']) {
                        $out['product'] = $cur;
                    }
                    if ($cur['@type'] === 'BreadcrumbList' && isset($cur['itemListElement']) && is_array($cur['itemListElement'])) {
                        foreach ($cur['itemListElement'] as $el) {
                            if (isset($el['name'])) {
                                $out['breadcrumbs'][] = [
                                    'name' => $el['name'],
                                    'url'  => isset($el['item']) ? $el['item'] : null
                                ];
                            }
                        }
                    }
                }
                foreach ($cur as $v) if (is_array($v)) $queue[] = $v;
            }
        }
    }
    return $out;
}

function decode_entities_str($s) {
    return html_entity_decode($s, ENT_QUOTES | ENT_HTML5, 'UTF-8');
}

// ---------- Input ----------
$url = isset($_GET['url']) ? trim($_GET['url']) : '';
if ($url === '' || !filter_var($url, FILTER_VALIDATE_URL)) {
    respond([
        'error' => 'پارامتر url الزامی و باید یک URL معتبر باشد.',
        'hint'  => 'نمونه: exo_single.php?url=' . rawurlencode('https://exo.ir/product/msi-rtx-5070-12g-shadow-3x-oc'),
        'received_url' => $url ?: null,
    ], 400);
}
$target = normalizeUrl($url);

// ---------- Download & Parse ----------
list($html, $info) = curl_get($target);
libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
$xp = new DOMXPath($dom);

// canonical
$canonical = '';
if ($c = xp_first($xp, "//link[@rel='canonical']/@href")) $canonical = $c->nodeValue;

// JSON-LD
$jl = parse_jsonld($xp);
$productLD = $jl['product'] ?: [];

// Name
$name = isset($productLD['name']) ? (string)$productLD['name'] : '';
if ($name === '' && ($h1 = xp_first($xp, "//h1"))) $name = text_of($h1);
if ($name === '' && ($og = xp_first($xp, "//meta[@property='og:title']/@content"))) $name = (string)$og->nodeValue;

// SKU / Brand
$sku   = isset($productLD['sku']) ? (string)$productLD['sku'] : '';
$brand = '';
if (isset($productLD['brand'])) {
    if (is_array($productLD['brand']) && isset($productLD['brand']['name'])) $brand = (string)$productLD['brand']['name'];
    elseif (is_string($productLD['brand'])) $brand = $productLD['brand'];
}
if ($brand === '' && ($small = xp_first($xp, "//small[contains(.,'تولیدکننده')]"))) {
    if (preg_match('/تولیدکننده\s*:\s*(.+)$/u', text_of($small), $m)) $brand = $m[1];
}

// Price / Availability
$priceIRR = null;
$currency = null;
$availability = null;
if (isset($productLD['offers'])) {
    $offers = $productLD['offers'];
    if (isset($offers['price']))         $priceIRR = (float)$offers['price'];
    if (isset($offers['priceCurrency'])) $currency = (string)$offers['priceCurrency'];
    if (isset($offers['availability']))  $availability = (string)$offers['availability'];
}

if ($priceIRR === null) {
    if ($pv = xp_first($xp, "//input[@name='price_value']/@value")) {
        $toman = (float)preg_replace('/[^\d.]/', '', $pv->nodeValue);
        $priceIRR = $toman * 10.0;
    }
}
if ($priceIRR === null) {
    if ($pnode = xp_first($xp, "//*[@id='price']")) {
        $digits = preg_replace('/[^\d]/u', '', text_of($pnode));
        if ($digits !== '') $priceIRR = (float)$digits * 10.0;
    }
}
if ($currency === null) $currency = 'IRR';
$priceToman = ($priceIRR !== null) ? (int)round($priceIRR / 10.0) : null;

// Images
$images = [];
if (isset($productLD['image'])) {
    if (is_array($productLD['image'])) $images = array_merge($images, $productLD['image']);
    elseif (is_string($productLD['image'])) $images[] = $productLD['image'];
}
if ($ns = $xp->query("//*[@id='product-image-gallery']//img[@data-src]/@data-src")) {
    for ($i=0; $i<$ns->length; $i++) $images[] = $ns->item($i)->nodeValue;
}
if ($ns = $xp->query("//*[@id='zoom-preview']//img/@src")) {
    for ($i=0; $i<$ns->length; $i++) $images[] = $ns->item($i)->nodeValue;
}
if ($ns = $xp->query("//*[@id='product-image-gallery-modal']//img/@src")) {
    for ($i=0; $i<$ns->length; $i++) $images[] = $ns->item($i)->nodeValue;
}
for ($i=0; $i<count($images); $i++) {
    $images[$i] = preg_replace('/-(?:74x74|300x300)\.webp$/', '-1500x1500.webp', $images[$i]);
}
$images = array_values(array_unique($images));

// Description
$description_html = '';
if ($d = xp_first($xp, "//*[@id='tab-description']")) {
    $frag = '';
    foreach ($d->childNodes as $ch) $frag .= $dom->saveHTML($ch);
    $description_html = trim($frag);
}
if ($description_html === '' && ($md = xp_first($xp, "//meta[@name='description']/@content"))) {
    $description_html = (string)$md->nodeValue;
}
if ($description_html === '' && isset($productLD['description'])) {
    $description_html = (string)$productLD['description'];
}

// Features
$features = [];
if ($rows = $xp->query("//*[@id='tab-specification']//table//tr")) {
    for ($i=0; $i<$rows->length; $i++) {
        $tds = $rows->item($i)->getElementsByTagName('td');
        if ($tds->length === 2) {
            $k = text_of($tds->item(0));
            $v = text_of($tds->item(1));
            if ($k !== '' && $v !== '') $features[$k] = $v;
        }
    }
}
if ($kv = $xp->query("//div[h6[contains(.,'خصوصیات کلیدی')]]/div[contains(@class,'d-flex')]")) {
    for ($i=0; $i<$kv->length; $i++) {
        $spans = $kv->item($i)->getElementsByTagName('span');
        if ($spans->length >= 2) {
            $k = preg_replace('/\s*:\s*$/u', '', text_of($spans->item(0)));
            $v = text_of($spans->item(1));
            if ($k !== '' && $v !== '') $features[$k] = $v;
        }
    }
}

// -------- Decode HTML entities --------
if (!empty($name))         $name = decode_entities_str($name);
if (!empty($brand))        $brand = decode_entities_str($brand);
if (!empty($sku))          $sku = decode_entities_str($sku);
if (!empty($canonical))    $canonical = decode_entities_str($canonical);
if (!empty($description_html)) $description_html = decode_entities_str($description_html);

if (!empty($jl['breadcrumbs'])) {
    foreach ($jl['breadcrumbs'] as &$bc) {
        if (!empty($bc['name'])) $bc['name'] = decode_entities_str($bc['name']);
        if (!empty($bc['url']))  $bc['url']  = decode_entities_str($bc['url']);
    }
    unset($bc);
}
if (!empty($features)) {
    $decoded = [];
    foreach ($features as $k => $v) {
        $decoded[decode_entities_str((string)$k)] = decode_entities_str((string)$v);
    }
    $features = $decoded;
}

// -------- Output --------
$result = [
    'input_url'    => $url,
    'canonical'    => $canonical ?: null,
    'name'         => $name ?: null,
    'sku'          => $sku ?: null,
    'brand'        => $brand ?: null,
    'availability' => $availability ?: null,
    'price'        => [
        'value_irr'     => $priceIRR !== null ? (int)$priceIRR : null,
        'display_toman' => $priceToman !== null ? number_format($priceToman, 0, '.', ',') . ' تومان' : null,
        'currency'      => $currency,
    ],
    'images'       => $images,
    'breadcrumbs'  => $jl['breadcrumbs'],
    'features'     => $features,
    'description'  => $description_html,
];

respond($result);