<?php
/**
 * Single Product scraper – SornaShop (WooCommerce + Woodmart)
 * Usage:
 *  GET  ?url=https://www.sornashop.com/product/....        [optional debug=1]
 *  POST ?html=BASE64_OR_RAW_HTML                            [optional debug=1]
 * خروجی: JSON با فیلدهای استاندارد
 */
header('Content-Type: application/json; charset=UTF-8');
mb_internal_encoding('UTF-8');

try {
    list($url, $html, $debug) = get_input_html();
    $product = [];

    // 1) JSON-LD / Meta (RankMath / OG / Twitter)
    $meta = parse_meta_blocks($html);
    $product = array_merge($product, $meta);

    // 2) DOM selectors (Woo + Woodmart)
    $domData = parse_dom($html, $url ?: null);
    $product = array_merge($product, $domData);

    // 3) نرمال‌سازی قیمت‌ها
    normalize_prices($product);

    // 4) URL
    if ($url && empty($product['url'])) $product['url'] = $url;

    $out = [
        'ok'      => true,
        'product' => $product
    ];
    if ($debug) $out['_debug'] = ['have_meta'=>!empty($meta), 'have_dom'=>!empty($domData)];
    echo json_encode($out, JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES|JSON_PRETTY_PRINT);

} catch (Throwable $e) {
    http_response_code(500);
    echo json_encode(['ok'=>false, 'error'=>$e->getMessage()], JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT);
}

/* -------------------------- helpers -------------------------- */

function get_input_html() {
    $url   = isset($_GET['url']) ? trim($_GET['url']) : '';
    $html  = isset($_POST['html']) ? $_POST['html'] : (isset($_GET['html']) ? $_GET['html'] : '');
    $debug = isset($_GET['debug']) && $_GET['debug']=='1';

    if ($html !== '') {
        $decoded = base64_decode($html, true);
        if ($decoded !== false && strlen($decoded) > 200) $html = $decoded;
        if (!mb_detect_encoding($html, 'UTF-8', true)) $html = mb_convert_encoding($html, 'UTF-8', 'auto');
        return [$url, $html, $debug];
    }
    if ($url === '') throw new Exception("please provide 'url' or 'html'");

    $ch = curl_init($url);
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER=>true,
        CURLOPT_FOLLOWLOCATION=>true,
        CURLOPT_MAXREDIRS=>5,
        CURLOPT_TIMEOUT=>30,
        CURLOPT_CONNECTTIMEOUT=>15,
        CURLOPT_ENCODING=>'',
        CURLOPT_SSL_VERIFYPEER=>true,
        CURLOPT_SSL_VERIFYHOST=>2,
        CURLOPT_HTTPHEADER=>[
            'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari/537.36',
            'Accept-Language: fa-IR,fa;q=0.9,en;q=0.8'
        ],
    ]);
    $html = curl_exec($ch);
    $err  = curl_error($ch);
    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);
    if ($html===false || $code>=400) throw new Exception("HTTP $code $err");
    if (!mb_detect_encoding($html, 'UTF-8', true)) $html = mb_convert_encoding($html, 'UTF-8', 'auto');
    return [$url, $html, $debug];
}

function parse_meta_blocks($html) {
    $res = [];

    // متاهای OG/RankMath (قیمت/ارز/موجودی/برند)
    $priceAmount = match_meta($html, 'property="product:price:amount"', 'content');
    $priceCurr   = match_meta($html, 'property="product:price:currency"', 'content');
    $availability= match_meta($html, 'property="product:availability"', 'content');
    $brandMeta   = match_meta($html, 'property="product:brand"', 'content');

    if ($priceAmount !== '') $res['price_rial_meta'] = to_int_price($priceAmount);
    if ($priceCurr   !== '') $res['currency_meta']   = $priceCurr;
    if ($availability!== '') $res['availability_meta']= $availability;
    if ($brandMeta   !== '') $res['brand']           = $brandMeta;

    // توییتر کارت – قیمت متنی تومان
    $twPriceText = match_meta($html, 'name="twitter:data1"', 'content');
    if ($twPriceText !== '') $res['price_text_toman_meta'] = trim_html($twPriceText);

    // JSON-LD Product/Offer
    if (preg_match_all('#<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>#si', $html, $mm)) {
        foreach ($mm[1] as $block) {
            $json = html_entity_decode($block, ENT_QUOTES|ENT_HTML5, 'UTF-8');
            $data = json_decode($json, true);
            if (!$data) continue;

            $nodes = [];
            if (isset($data['@type'])) $nodes[] = $data;
            if (isset($data['@graph']) && is_array($data['@graph'])) $nodes = array_merge($nodes, $data['@graph']);

            foreach ($nodes as $n) {
                if (!is_array($n)) continue;
                if (isset($n['@type']) && (stripos(@$n['@type'], 'Product') !== false)) {
                    if (!empty($n['name'])) $res['name'] = $n['name'];
                    if (!empty($n['sku']))  $res['sku']  = $n['sku'];
                    if (!empty($n['brand'])) {
                        $res['brand'] = is_array($n['brand']) ? (@$n['brand']['name'] ?: @$n['brand']['@id'] ?: null) : $n['brand'];
                    }
                    if (!empty($n['offers'])) {
                        $offers = $n['offers'];
                        if (isset($offers['@type'])) $offers = [$offers];
                        foreach ((array)$offers as $o) {
                            if (!empty($o['price']))         $res['price_rial_jsonld'] = to_int_price($o['price']);
                            if (!empty($o['priceCurrency'])) $res['currency_jsonld']    = $o['priceCurrency'];
                            if (!empty($o['availability']))  $res['availability_jsonld']= $o['availability'];
                            if (!empty($o['url']))           $res['url'] = $o['url'];
                        }
                    }
                    // تصاویر
                    if (!empty($n['image'])) {
                        $imgs = [];
                        foreach ((array)$n['image'] as $im) {
                            if (is_array($im)) { if (!empty($im['url'])) $imgs[] = $im['url']; }
                            else $imgs[] = $im;
                        }
                        if ($imgs) $res['images_jsonld'] = array_values(array_unique($imgs));
                    }
                }
            }
        }
    }

    return array_filter($res, function($v){ return !($v==='' || $v===null); });
}

function match_meta($html, $needle, $attr){
    $re = '#<meta[^>]+'.preg_quote($needle,'#').'[^>]*'.$attr.'=["\']([^"\']+)["\'][^>]*>#si';
    return preg_match($re, $html, $m) ? $m[1] : '';
}

function parse_dom($html, $baseUrl=null) {
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    @$dom->loadHTML('<?xml encoding="UTF-8">'.$html);
    libxml_clear_errors();
    $xp = new DOMXPath($dom);
    $root = $dom->documentElement;

    $res = [];

    // عنوان محصول
    $name = xtext($xp, $root, "//h1[contains(@class,'product_title')]");
    if ($name!=='') $res['name'] = $name;

    // قیمت نمایشی (تومان)
    $priceText = xtext($xp, $root, "//p[contains(@class,'price')]//bdi | //span[contains(@class,'price')]//bdi");
    if ($priceText!=='') $res['price_text_toman_dom'] = $priceText;

    // موجودی
    $stock = xtext($xp, $root, "//p[contains(@class,'stock')]");
    if ($stock!=='') $res['availability_dom'] = $stock;

    // SKU
    $sku = xtext($xp, $root, "//*[@class='sku' or contains(@class,'sku')]");
    if ($sku!=='') $res['sku'] = $sku;

    // برند (لوگوی YITH Brand یا لینک brand/)
    $brand = xtext($xp, $root, "//div[contains(@class,'wd-product-brands-links')]//a | //a[contains(@href,'/brand/')]");
    if ($brand!=='') $res['brand'] = $brand;

    // توضیحات (تب Description یا شورت‌دسکریپشن)
    $descNode = $xp->query("//div[@id='tab-description'] | //div[contains(@class,'woocommerce-product-details__short-description')]")->item(0);
    if ($descNode) {
        $htmlDesc = inner_html($descNode);
        $res['description_html'] = tidy_html($htmlDesc);
        $res['description_text'] = trim_html(strip_tags($htmlDesc));
    }

    // جدول ویژگی‌ها (Additional information)
    $attrs = [];
    foreach($xp->query("//table[contains(@class,'shop_attributes')]//tr") as $tr){
        $k = xtext($xp, $tr, ".//th");
        $v = xtext($xp, $tr, ".//td");
        if ($k!=='' && $v!=='') $attrs[$k] = $v;
    }
    if ($attrs) $res['attributes'] = $attrs;

    // گالری تصاویر
    $imgs = [];
    foreach($xp->query("//figure[contains(@class,'woocommerce-product-gallery')]//img | //div[contains(@class,'product-images')]//img") as $img){
        $src = $img->getAttribute('data-large_image') ?: $img->getAttribute('src');
        if ($src) $imgs[] = $src;
    }
    $imgs = array_values(array_filter(array_unique($imgs)));
    if ($imgs) $res['images_dom'] = $imgs;

    if ($baseUrl) $res['url'] = $baseUrl;
    return array_filter($res, function($v){ return !($v==='' || $v===null); });
}

function xtext($xp,$ctx,$x){
    $n = $xp->query($x,$ctx)->item(0);
    if (!$n) return '';
    $t = $n->textContent;
    $t = preg_replace('/\s+/u',' ', $t);
    return trim($t);
}
function inner_html(DOMNode $n){
    $doc = $n->ownerDocument; $html='';
    foreach ($n->childNodes as $c) $html .= $doc->saveHTML($c);
    return $html;
}
function tidy_html($html){
    // حذف اسکریپت/استایل و تمیزکاری ساده
    $html = preg_replace('#<(script|style)[^>]*>.*?</\1>#si','', $html);
    // تعویض nbsp
    $html = str_replace(["\xc2\xa0","&nbsp;"], ' ', $html);
    return trim($html);
}
function trim_html($t){
    $t = strip_tags($t);
    $t = html_entity_decode($t, ENT_QUOTES|ENT_HTML5, 'UTF-8');
    $t = str_replace(["\xc2\xa0","&nbsp;"], ' ', $t);
    return trim(preg_replace('/\s+/u',' ', $t));
}

function fa_digits_to_en($s){
    $fa = ['۰','۱','۲','۳','۴','۵','۶','۷','۸','۹','٬','،'];
    $en = ['0','1','2','3','4','5','6','7','8','9',',',','];
    return str_replace($fa, $en, $s);
}
function to_int_price($s){
    $s = fa_digits_to_en($s);
    $s = html_entity_decode($s, ENT_QUOTES|ENT_HTML5, 'UTF-8');
    $s = str_replace(["\xc2\xa0",'&nbsp;',' تومان','تومان',' ریال','ریال',','], [' ',' ','','','','', ''], $s);
    $s = preg_replace('/[^\d]/','', $s);
    return $s === '' ? null : (int)$s;
}

function normalize_prices(&$p){
    // اولویت: JSON-LD/Meta (ریال) ← DOM/متن (تومان)
    $rial = null;
    if (isset($p['price_rial_meta']))   $rial = $p['price_rial_meta'];
    if (!$rial && isset($p['price_rial_jsonld'])) $rial = $p['price_rial_jsonld'];

    $tomanText = isset($p['price_text_toman_dom']) ? $p['price_text_toman_dom'] : (isset($p['price_text_toman_meta']) ? $p['price_text_toman_meta'] : null);
    $toman = $tomanText ? to_int_price($tomanText) : null;

    // اگر فقط ریال داریم، تومان = ریال/10
    if (!$toman && $rial) $toman = (int) round($rial / 10);

    // اگر فقط تومان داریم، ریال = تومان*10
    if (!$rial && $toman) $rial = (int) ($toman * 10);

    if ($rial)  $p['price_rial']  = $rial;
    if ($toman) $p['price_toman'] = $toman;

    // currency
    if (empty($p['currency'])) {
        if (!empty($p['currency_meta']))   $p['currency'] = $p['currency_meta'];
        elseif (!empty($p['currency_jsonld'])) $p['currency'] = $p['currency_jsonld'];
        elseif ($toman) $p['currency'] = 'IRT'; // تومان
        elseif ($rial)  $p['currency'] = 'IRR'; // ریال
    }

    // پاکسازی فیلدهای کمکی
    foreach (['price_rial_meta','currency_meta','availability_meta','images_jsonld','price_rial_jsonld','currency_jsonld','availability_jsonld','price_text_toman_dom','price_text_toman_meta','images_dom'] as $k){
        if (isset($p[$k]) && ($k==='images_jsonld' || $k==='images_dom')) continue; // تصاویر را حذف نکن
        unset($p[$k]);
    }

    // ادغام تصاویر jsonld و dom
    $imgs = [];
    if (!empty($p['images_jsonld'])) $imgs = array_merge($imgs, (array)$p['images_jsonld']);
    if (!empty($p['images_dom']))    $imgs = array_merge($imgs, (array)$p['images_dom']);
    if ($imgs) $p['images'] = array_values(array_unique($imgs));
    unset($p['images_jsonld'], $p['images_dom']);
}