<?php
declare(strict_types=1);

/**
 * Korians Product Scraper API (WooCommerce-like) — Persianized & Hardened
 * PHP 7.4+
 *
 * Params:
 *   ?url=PRODUCT_URL              (required)
 *   &format=json|clean            (optional; default: json)
 *   &unit=rial|toman              (optional; default: rial) -> toman divides prices by 10
 *   &debug=1                      (optional) -> include raw og/jsonld for troubleshooting
 */

ini_set('display_errors', '1');
ini_set('display_startup_errors', '1');
error_reporting(E_ALL);

if (!headers_sent()) {
    header('Content-Type: application/json; charset=utf-8');
    header('Access-Control-Allow-Origin: *');
    header('Access-Control-Allow-Methods: GET, OPTIONS');
    header('Access-Control-Allow-Headers: *');
}

function respond($data, int $status = 200) {
    if (!headers_sent()) {
        http_response_code($status);
        header('Content-Type: application/json; charset=utf-8');
    }
    echo json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);
    exit;
}

/* ---------- Environment checks ---------- */
$missing = [];
if (!function_exists('mb_internal_encoding')) $missing[] = 'mbstring';
if (!extension_loaded('libxml')) $missing[] = 'libxml';
if (!class_exists('DOMDocument')) $missing[] = 'dom';
if (!function_exists('curl_init')) $missing[] = 'curl (optional; fallback exists)';
if ($missing) {
    respond([
        'success' => false,
        'error'   => 'Missing PHP extensions',
        'details' => $missing
    ], 500);
}
mb_internal_encoding('UTF-8');

/* ---------- Helpers ---------- */
function text_clean(?string $t): string {
    $t = html_entity_decode((string)$t, ENT_QUOTES | ENT_HTML5, 'UTF-8');
    $t = preg_replace('/\s+/u', ' ', $t);
    return trim((string)$t);
}
function node_text(?DOMNode $n): string { return $n ? text_clean($n->textContent) : ''; }
function unique_push(array &$arr, ?string $val) { $val = trim((string)$val); if ($val !== '' && !in_array($val, $arr, true)) $arr[] = $val; }
function array_is_list_compat($arr) { if (!is_array($arr)) return false; if ($arr === []) return true; return array_keys($arr) === range(0, count($arr) - 1); }

/* ---------- Fetch HTML ---------- */
function fetch_html(string $url, int $timeout = 25): string {
    if (function_exists('curl_init')) {
        $ch = curl_init($url);
        curl_setopt_array($ch, [
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_FOLLOWLOCATION => true,
            CURLOPT_MAXREDIRS      => 5,
            CURLOPT_CONNECTTIMEOUT => $timeout,
            CURLOPT_TIMEOUT        => $timeout,
            CURLOPT_SSL_VERIFYPEER => true,
            CURLOPT_SSL_VERIFYHOST => 2,
            CURLOPT_USERAGENT      => 'Mozilla/5.0 (compatible; ProductAPI/1.2; +https://example.com)',
            CURLOPT_HTTPHEADER     => ['Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8']
        ]);
        $html = curl_exec($ch);
        if ($html === false) {
            $err  = curl_error($ch);
            $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
            curl_close($ch);
            $fallback = @file_get_contents($url);
            if ($fallback !== false) return $fallback;
            respond(['success'=>false,'error'=>"cURL error: $err",'http_status'=>$code], 502);
        }
        $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);
        if ($code < 200 || $code >= 400) {
            $fallback = @file_get_contents($url);
            if ($fallback !== false) return $fallback;
            respond(['success'=>false,'error'=>"HTTP status $code from target URL"], 502);
        }
        return $html;
    }
    // Fallback
    $ctx = stream_context_create([
        'http' => [
            'method'  => 'GET',
            'timeout' => $timeout,
            'header'  => "User-Agent: ProductAPI/1.2\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n"
        ]
    ]);
    $html = @file_get_contents($url, false, $ctx);
    if ($html === false) respond(['success'=>false,'error'=>'Failed to fetch URL via file_get_contents'], 502);
    return $html;
}

function load_dom(string $html): DOMXPath {
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    if (stripos($html, '<meta charset=') === false) {
        $html = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'.$html;
    }
    $ok = @$dom->loadHTML($html);
    if (!$ok) {
        $errs = libxml_get_errors();
        libxml_clear_errors();
        respond(['success'=>false,'error'=>'DOM loadHTML failed','details'=>$errs], 500);
    }
    libxml_clear_errors();
    return new DOMXPath($dom);
}

/* ---------- Parsers ---------- */
function get_meta_og(DOMXPath $xp): array {
    $og = [];
    foreach ($xp->query('//meta[starts-with(@property,"og:") or starts-with(@property,"product:") or @name="twitter:card" or starts-with(@name,"twitter:")]') as $meta) {
        /** @var DOMElement $meta */
        $name = $meta->getAttribute('property') ?: $meta->getAttribute('name');
        $content = $meta->getAttribute('content');
        if ($name) $og[$name][] = $content;
    }
    return $og;
}
function decode_jsonld_scripts(DOMXPath $xp): array {
    $all = [];
    foreach ($xp->query('//script[@type="application/ld+json"]') as $script) {
        $raw = trim($script->textContent);
        $raw = preg_replace('/,\s*([\]}])/u', '$1', $raw);
        $raw = preg_replace('/[\x00-\x1F\x7F]/u', ' ', $raw);
        $json = json_decode($raw, true);
        if (json_last_error() === JSON_ERROR_NONE && $json) $all[] = $json;
    }
    return $all;
}
function find_jsonld_products(array $jsonlds): array {
    $products = [];
    $check = function($obj) use (&$products, &$check) {
        if (!is_array($obj)) return;
        if (isset($obj['@type'])) {
            $type = is_array($obj['@type']) ? $obj['@type'] : [$obj['@type']];
            $typeLower = array_map('strtolower', $type);
            if (in_array('product', $typeLower, true)) $products[] = $obj;
        }
        foreach ($obj as $k=>$v) if (is_array($v)) $check($v);
    };
    foreach ($jsonlds as $j) $check($j);
    return $products;
}
function extract_from_schema_product(array $p): array {
    $name  = $p['name'] ?? null;
    $desc  = $p['description'] ?? null;
    $sku   = $p['sku'] ?? null;
    $brand = is_array($p['brand'] ?? null) ? ($p['brand']['name'] ?? null) : ($p['brand'] ?? null);

    $images = [];
    if (!empty($p['image'])) {
        if (is_array($p['image'])) foreach ($p['image'] as $im) unique_push($images, is_array($im)?($im['url']??$im['@id']??''):$im);
        else unique_push($images, $p['image']);
    }

    $price = $currency = $availability = $validUntil = null;
    $offers = $p['offers'] ?? null;
    if ($offers) {
        $offer = (is_array($offers) && array_is_list_compat($offers)) ? $offers[0] : $offers;
        $price = $offer['price'] ?? ($offer['priceSpecification']['price'] ?? null);
        $currency = $offer['priceCurrency'] ?? ($offer['priceSpecification']['priceCurrency'] ?? null);
        $availability = $offer['availability'] ?? null;
        if (is_string($availability)) $availability = basename(str_replace('https://schema.org/','',$availability));
        $validUntil = $offer['priceValidUntil'] ?? null;
    }

    // additionalProperty -> attributes
    $attributes = [];
    if (!empty($p['additionalProperty']) && is_array($p['additionalProperty'])) {
        foreach ($p['additionalProperty'] as $prop) {
            if (!is_array($prop)) continue;
            $n = text_clean($prop['name'] ?? '');
            $v = text_clean($prop['value'] ?? ($prop['valueReference']['name'] ?? ''));
            if ($n && $v) {
                if (!isset($attributes[$n])) $attributes[$n] = [];
                foreach (preg_split('/\s*,\s*/u', $v) as $vv) if ($vv) $attributes[$n][] = $vv;
                $attributes[$n] = array_values(array_unique($attributes[$n]));
            }
        }
    }

    return [
        'name'            => $name ? text_clean($name) : null,
        'description'     => $desc ? text_clean($desc) : null,
        'sku'             => $sku ? text_clean($sku) : null,
        'brand'           => $brand ? text_clean($brand) : null,
        'images'          => array_values(array_unique($images)),
        'price'           => $price !== null ? (is_numeric($price) ? (float)$price : $price) : null,
        'currency'        => $currency ?: null,
        'availability'    => $availability ?: null,
        'priceValidUntil' => $validUntil ?: null,
        'attributes'      => $attributes,
    ];
}

/* ---------- DOM fallbacks ---------- */
function extract_dom_description(DOMXPath $xp): ?string {
    $cands = [
        '//*[contains(@class,"woocommerce-product-details__short-description")]',
        '//*[@id="tab-description"]',
        '//*[@class="product-short-description"]',
        '//meta[@name="description"]/@content',
    ];
    foreach ($cands as $q) {
        $n = $xp->query($q)->item(0);
        $txt = $n ? ($n instanceof DOMAttr ? $n->value : node_text($n)) : '';
        $txt = text_clean($txt);
        if ($txt) return $txt;
    }
    return null;
}
function extract_dom_name(DOMXPath $xp): ?string {
    $cands = [
        '//*[contains(@class,"product_title")]',
        '//meta[@property="og:title"]/@content',
        '//title',
    ];
    foreach ($cands as $q) {
        $n = $xp->query($q)->item(0);
        $txt = $n ? ($n instanceof DOMAttr ? $n->value : node_text($n)) : '';
        $txt = text_clean($txt);
        if ($txt) return $txt;
    }
    return null;
}
function extract_dom_images(DOMXPath $xp): array {
    $imgs = [];
    $qs = [
        '//*[contains(@class,"woocommerce-product-gallery")]//img',
        '//meta[@property="og:image"]/@content',
        '//img[contains(@class,"attachment-woocommerce_single")]',
        '//img[contains(@class,"wp-post-image")]',
    ];
    foreach ($qs as $q) {
        foreach ($xp->query($q) as $img) {
            if ($img instanceof DOMAttr) {
                unique_push($imgs, $img->value);
            } else {
                /** @var DOMElement $img */
                $src = $img->getAttribute('data-src') ?: $img->getAttribute('data-large_image') ?: $img->getAttribute('src');
                unique_push($imgs, $src);
                if ($srcset = $img->getAttribute('srcset')) {
                    foreach (explode(',', $srcset) as $part) {
                        $u = trim(explode(' ', trim($part))[0] ?? '');
                        unique_push($imgs, $u);
                    }
                }
            }
        }
    }
    $imgs = array_values(array_filter($imgs, function($u){ return $u && !preg_match('~\.(svg|ico)$~i', $u); }));
    return $imgs;
}
function extract_dom_price(DOMXPath $xp): array {
    $amount = null; $currency = null;
    $ogAmount = $xp->query('//meta[@property="product:price:amount"]/@content')->item(0);
    $ogCurrency = $xp->query('//meta[@property="product:price:currency"]/@content')->item(0);
    if ($ogAmount) $amount = (float)$ogAmount->value;
    if ($ogCurrency) $currency = $ogCurrency->value;

    if ($amount === null) {
        $node = $xp->query('//*[contains(@class,"summary")]//*[contains(@class,"price")]')->item(0)
              ?: $xp->query('//*[contains(@class,"price")]')->item(0);
        $txt = $node ? node_text($node) : '';
        if ($txt) {
            $txt = str_replace('٬','', $txt);
            if (preg_match('/([\d\.,]+)/u', $txt, $m)) {
                $num = str_replace([',',' '], ['',''], $m[1]);
                $amount = (float)$num;
            }
            if (preg_match('/\b([A-Z]{3})\b/u', $txt, $m2)) $currency = $m2[1];
        }
    }
    return [$amount, $currency];
}
function extract_attributes_and_variations(DOMXPath $xp): array {
    $attributes = [];

    // جدول مشخصات (shop_attributes)
    foreach ($xp->query('//*[contains(@class,"shop_attributes")]//tr') as $tr) {
        $name = node_text($xp->query('.//th', $tr)->item(0));
        $val  = node_text($xp->query('.//td', $tr)->item(0));
        if ($name && $val) {
            $parts = array_map('trim', preg_split('/\s*,\s*/u', $val));
            $parts = array_values(array_filter($parts));
            if (!empty($parts)) $attributes[$name] = $parts;
        }
    }

    // ویژگی‌های قابل‌انتخاب (select[name^="attribute_"])
    foreach ($xp->query('//form[contains(@class,"variations_form")]//select[starts-with(@name,"attribute_")]') as $sel) {
        /** @var DOMElement $sel */
        $label = node_text($xp->query('preceding-sibling::label[1]', $sel)->item(0)) ?: $sel->getAttribute('name');
        $vals = [];
        foreach ($xp->query('.//option[normalize-space(@value)!=""]', $sel) as $opt) {
            $vals[] = text_clean($opt->textContent ?: $opt->getAttribute('value'));
        }
        if ($label && $vals) $attributes[$label] = array_values(array_unique($vals));
    }

    // واریانت‌ها از data-product_variations
    $variations = [];
    foreach ($xp->query('//form[contains(@class,"variations_form")]') as $form) {
        /** @var DOMElement $form */
        $json = $form->getAttribute('data-product_variations');
        if ($json) {
            $json = html_entity_decode($json, ENT_QUOTES | ENT_HTML5, 'UTF-8');
            $data = json_decode($json, true);
            if (is_array($data)) {
                foreach ($data as $v) {
                    $attrs = [];
                    if (!empty($v['attributes']) && is_array($v['attributes'])) {
                        foreach ($v['attributes'] as $k=>$val) $attrs[preg_replace('~^attribute_~','',$k)] = $val;
                    }
                    $variations[] = [
                        'sku'           => $v['sku'] ?? null,
                        'price'         => isset($v['display_price']) ? (float)$v['display_price'] : null,
                        'regular_price' => isset($v['display_regular_price']) ? (float)$v['display_regular_price'] : null,
                        'image'         => $v['image']['url'] ?? null,
                        'attributes'    => $attrs,
                        'is_in_stock'   => $v['is_in_stock'] ?? null
                    ];
                }
            }
        }
    }
    return [$attributes, $variations];
}

/* ---------- MAIN ---------- */
$url    = $_GET['url']    ?? '';
$format = $_GET['format'] ?? 'json';   // json | clean
$unit   = $_GET['unit']   ?? 'rial';   // rial | toman
$debug  = isset($_GET['debug']);

if (!$url || !filter_var($url, FILTER_VALIDATE_URL) || !preg_match('~^https?://~i', $url)) {
    respond(['success'=>false,'error'=>'آدرس محصول صحیح نیست. پارامتر ?url=... را به‌درستی بدهید.'], 400);
}

$html = fetch_html($url);
$xp   = load_dom($html);

// 1) JSON-LD
$jsonlds    = decode_jsonld_scripts($xp);
$products   = find_jsonld_products($jsonlds);
$fromSchema = $products ? extract_from_schema_product($products[0]) : [];

// 2) OG
$og = get_meta_og($xp);

// 3) DOM fallbacks
$domName = extract_dom_name($xp);
$domDesc = extract_dom_description($xp);
$domImgs = extract_dom_images($xp);
list($domPrice, $domCurrency) = extract_dom_price($xp);
list($domAttrs, $variations)  = extract_attributes_and_variations($xp);

/* ---------- Compose base result ---------- */
$name = $fromSchema['name'] ?? $domName;
$description = $fromSchema['description'] ?? $domDesc;
$sku = $fromSchema['sku'] ?? null;
$brand = $fromSchema['brand'] ?? null;

$images = $fromSchema['images'] ?? [];
foreach ($domImgs as $u) unique_push($images, $u);

$price = $fromSchema['price'] ?? $domPrice;
$currency = $fromSchema['currency'] ?? $domCurrency ?: 'IRR';
$availability = $fromSchema['availability'] ?? null;

$attributes = $fromSchema['attributes'] ?? [];
foreach ($domAttrs as $k=>$vals) {
    if (!isset($attributes[$k])) $attributes[$k] = [];
    foreach ($vals as $v) if (!in_array($v, $attributes[$k], true)) $attributes[$k][] = $v;
}
$technical_specs = $domAttrs;

/* ---------- Result ---------- */
$result = [
    'success'        => true,
    'url'            => $url,
    'name'           => $name,
    'sku'            => $sku,
    'brand'          => $brand,
    'description'    => $description,
    'price'          => $price,
    'currency'       => $currency,
    'availability'   => $availability,
    'priceValidUntil'=> $fromSchema['priceValidUntil'] ?? null,
    'images'         => array_values($images),
    'attributes'     => $attributes,
    'technical_specs'=> $technical_specs,
    'variants'       => $variations,
    'source'         => [
        'signals' => [
            'schema_product' => !empty($products),
            'og_meta'        => !empty($og),
            'dom_variations' => !empty($variations)
        ]
    ]
];

/* ---------- CLEAN MODE: فارسی‌سازی و تمیزکاری ---------- */
if ($format === 'clean') {
    // عنوان بدون پسوند سایت
    if (!empty($result['name'])) $result['name'] = preg_replace('~\s*\|\s*.*$~u', '', $result['name']);

    // اصلاح اختلاف مقیاس ۱۰× در قیمت واریانت‌ها
    if (!empty($result['price']) && !empty($result['variants'])) {
        foreach ($result['variants'] as &$v) {
            if (!empty($v['price']) && $v['price'] > 0) {
                if (abs(($result['price'] / $v['price']) - 10) < 0.01) $v['price'] *= 10;
            }
            if (!empty($v['regular_price']) && $v['regular_price'] > 0) {
                if (abs(($result['price'] / $v['regular_price']) - 10) < 0.01) $v['regular_price'] *= 10;
            }
        }
        unset($v);
    }

    // تمیزکردن تصاویر: حذف placeholder و سایزدارها
    if (!empty($result['images'])) {
        $clean = [];
        foreach ($result['images'] as $u) {
            if (!$u) continue;
            if (preg_match('~/woocommerce-placeholder~i', $u)) continue;
            $u = preg_replace('~(-\d{2,4}x\d{2,4})(?=\.[a-z]{3,4}$)~i', '', $u);
            if (!in_array($u, $clean, true)) $clean[] = $u;
        }
        $result['images'] = $clean;
        $result['main_image'] = $result['images'][0] ?? null;
    }

    // ادغام کلید تکراری attribute_pa_color در «رنگ»
    if (!empty($result['attributes']['attribute_pa_color'])) {
        $vals = $result['attributes']['attribute_pa_color'];
        $result['attributes']['رنگ'] = array_values(array_unique(array_merge($result['attributes']['رنگ'] ?? [], $vals)));
        unset($result['attributes']['attribute_pa_color']);
    }

    // فارسی‌سازی کلیدهای ویژگی‌های واریانت + دیکود مقدارها
    // نگاشت متعارف attribute/pa_* → فارسی
    $labelMap = [
        'pa_color'   => 'رنگ',
        'attribute_pa_color' => 'رنگ',
        'pa_size'    => 'سایز',
        'attribute_pa_size'  => 'سایز',
        'pa_material'=> 'جنس',
        'attribute_pa_material'=> 'جنس',
    ];

    foreach ($result['variants'] as &$v) {
        if (!empty($v['attributes'])) {
            $newAttrs = [];
            foreach ($v['attributes'] as $ak=>$av) {
                // کلید فارسی
                $label = $labelMap[$ak] ?? $ak;
                // مقدار دیکود شده و خوانا
                if (is_string($av)) {
                    $dec = urldecode($av);
                    $dec = preg_replace('~[-‐-–—]+~u', ' ', $dec); // خط‌تیره → فاصله
                    $dec = trim($dec);
                } else {
                    $dec = $av;
                }
                $newAttrs[$label] = $dec;
            }
            $v['attributes'] = $newAttrs;
        }
    }
    unset($v);

    // فارسی‌سازی کلیدهای attributes سطح محصول نیز (attribute_pa_* → برچسب‌های فارسی اگر ممکن بود)
    $productAttrKeys = array_keys($result['attributes']);
    foreach ($productAttrKeys as $k) {
        $newK = $labelMap[$k] ?? $k;
        if ($newK !== $k) {
            $result['attributes'][$newK] = array_values(array_unique(array_merge($result['attributes'][$newK] ?? [], $result['attributes'][$k]))); 
            unset($result['attributes'][$k]);
        }
    }

    // دیکود مقادیر URL-encoded در attributes سطح محصول (برای سازگاری کامل فارسی)
    foreach ($result['attributes'] as $k=>$vals) {
        if (is_array($vals)) {
            $decoded = [];
            foreach ($vals as $vv) {
                $dv = is_string($vv) ? urldecode($vv) : $vv;
                $dv = preg_replace('~[-‐-–—]+~u', ' ', (string)$dv);
                $dv = trim($dv);
                if ($dv !== '') $decoded[] = $dv;
            }
            $result['attributes'][$k] = array_values(array_unique($decoded));
        }
    }
}

/* ---------- Unit conversion (Rial → Toman) ---------- */
if (strtolower($unit) === 'toman') {
    if (isset($result['price']) && is_numeric($result['price'])) {
        $result['price'] = round(((float)$result['price']) / 10);
    }
    if (!empty($result['variants'])) {
        foreach ($result['variants'] as &$v) {
            if (isset($v['price']) && is_numeric($v['price']))               $v['price'] = round(((float)$v['price']) / 10);
            if (isset($v['regular_price']) && is_numeric($v['regular_price'])) $v['regular_price'] = round(((float)$v['regular_price']) / 10);
        }
        unset($v);
    }
    $result['currency'] = 'TOMAN';
}

/* ---------- Debug ---------- */
if ($debug) {
    $result['debug'] = [
        'jsonld_products_found' => count($products),
        'jsonld_first_product'  => $products[0] ?? null,
        'og'                    => $og,
        'format'                => $format,
        'unit'                  => $unit,
    ];
}

respond($result);