<?php
/**
 * Single Product scraper for Sazkala (WooCommerce + custom theme) – PHP 7 Compatible
 * Inputs:
 *   GET url=https://sazkala.com/product/...
 *   or  html=BASE64_OR_RAW_SOURCE
 * Output (JSON):
 *   title, url, price(int), regular_price(int), sale_price(int),
 *   price_text, regular_price_text, sale_price_text,
 *   currency, availability, brand, sku,
 *   images[], categories[{name,url}],
 *   description_html, specs_html, specs{key=>value}
 */

header('Content-Type: application/json; charset=UTF-8');
mb_internal_encoding('UTF-8');

function get_input_html() {
    $url   = isset($_GET['url']) ? trim($_GET['url']) : '';
    $html  = isset($_POST['html']) ? $_POST['html'] : (isset($_GET['html']) ? $_GET['html'] : '');
    $debug = isset($_GET['debug']) && $_GET['debug'] === '1';

    if ($html !== '') {
        $decoded = base64_decode($html, true);
        if ($decoded !== false && strlen($decoded) > 200) $html = $decoded;
        return array($url, $html, $debug);
    }
    if ($url === '') throw new Exception("please provide 'url' or 'html'");

    $html = fetch_html($url, $debug);
    return array($url, $html, $debug);
}

function fetch_html($url, $debug=false) {
    $ch = curl_init($url);
    curl_setopt_array($ch, array(
        CURLOPT_RETURNTRANSFER=>true,
        CURLOPT_FOLLOWLOCATION=>true,
        CURLOPT_MAXREDIRS=>5,
        CURLOPT_TIMEOUT=>25,
        CURLOPT_CONNECTTIMEOUT=>15,
        CURLOPT_ENCODING=>'',
        CURLOPT_SSL_VERIFYPEER=>true,
        CURLOPT_SSL_VERIFYHOST=>2,
        CURLOPT_HTTPHEADER=>array(
            'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari/537.36',
            'Accept-Language: fa-IR,fa;q=0.9,en;q=0.8'
        ),
    ));
    $html = curl_exec($ch);
    $err  = curl_error($ch);
    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);
    if ($html===false || $code>=400) {
        throw new Exception("HTTP $code while fetching: $url" . ($err ? " | $err" : ""));
    }
    if (!mb_detect_encoding($html,'UTF-8',true)) {
        $html = mb_convert_encoding($html,'UTF-8','auto');
    }
    return $html;
}

function parse_jsonld($html) {
    $out = array();
    if (!preg_match_all('#<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>#si',$html,$m)) return $out;
    foreach ($m[1] as $block) {
        $json = html_entity_decode($block, ENT_QUOTES|ENT_HTML5, 'UTF-8');
        $data = json_decode($json, true);
        if (!$data) continue;
        $nodes = array();
        if (isset($data['@type'])) $nodes[] = $data;
        if (isset($data['@graph']) && is_array($data['@graph'])) $nodes = array_merge($nodes,$data['@graph']);
        foreach ($nodes as $n) {
            if (!is_array($n)) continue;
            $types = isset($n['@type']) ? $n['@type'] : '';
            if (is_array($types)) $types = implode(',', $types);
            if (stripos((string)$types, 'Product') !== false) {
                $p = array();
                if (isset($n['name'])) $p['name'] = $n['name'];
                if (isset($n['url']))  $p['url']  = $n['url'];
                if (isset($n['sku']))  $p['sku']  = $n['sku'];

                if (isset($n['brand'])) {
                    $p['brand'] = is_array($n['brand']) ? (isset($n['brand']['name']) ? $n['brand']['name'] : null) : $n['brand'];
                }

                $offers = isset($n['offers']) ? $n['offers'] : array();
                if (isset($offers['@type'])) $offers = array($offers);
                foreach ((array)$offers as $o) {
                    if (!isset($p['price']) && isset($o['price'])) $p['price'] = $o['price'];
                    if (!isset($p['currency']) && isset($o['priceCurrency'])) $p['currency'] = $o['priceCurrency'];
                    if (!isset($p['availability']) && !empty($o['availability'])) $p['availability'] = $o['availability'];
                }

                $imgs = array();
                $imgArr = isset($n['image']) ? (array)$n['image'] : array();
                foreach ($imgArr as $im) { $imgs[] = is_array($im) ? (isset($im['url']) ? $im['url'] : '') : $im; }
                $imgs = array_values(array_unique(array_filter($imgs)));
                if ($imgs) $p['images'] = $imgs;

                if (!empty($n['description'])) $p['description_html'] = $n['description'];

                foreach ($p as $k=>$v) { if ($v===null || $v==='') unset($p[$k]); }
                $out = array_merge($out, $p);
            }
        }
    }
    return $out;
}

function metaProp($xp, $prop) {
    $n = $xp->query("//meta[@property='$prop']/@content");
    return $n->length ? trim($n->item(0)->nodeValue) : null;
}
function node_html($dom, $n) {
    if (!$n) return '';
    $html = '';
    foreach ($n->childNodes as $child) $html .= $dom->saveHTML($child);
    return trim($html);
}
function text($xp, $ctx, $x) {
    $n=$xp->query($x,$ctx)->item(0);
    return $n?trim(preg_replace('/\s+/u',' ',$n->textContent)):'';
}
function normalize_digits_to_en($s) {
    $fa = array('۰','۱','۲','۳','۴','۵','۶','۷','۸','۹','٬','،',',','٫','.','تومان','ريال','ریال');
    $en = array('0','1','2','3','4','5','6','7','8','9','','' ,'' ,'' ,'' ,'' ,    '' ,  '' ,  '' );
    return str_replace($fa, $en, $s);
}
function to_int_price($s) {
    if ($s === null) return null;
    $s = normalize_digits_to_en($s);
    $s = preg_replace('/[^\d]/', '', $s);
    if ($s === '') return null;
    return (int)$s;
}

/** قیمت را چندمرحله‌ای استخراج می‌کند (DOM -> META -> JSON-LD) */
function extract_prices($xp, $root, $jsonld = array()) {
    $priceNowTxt = '';
    $priceRegTxt = '';
    $priceSaleTxt = '';

    // 1) bdi های داخل .price
    $nNow = $xp->query(".//*[contains(@class,'price')]//ins//bdi | .//*[contains(@class,'price')]//bdi", $root)->item(0);
    if ($nNow) $priceNowTxt = trim($nNow->textContent);

    // 2) woocommerce-Price-amount
    if ($priceNowTxt === '') {
        $n = $xp->query(".//span[contains(@class,'woocommerce-Price-amount')]/bdi", $root)->item(0);
        if ($n) $priceNowTxt = trim($n->textContent);
    }

    // 3) itemprop=price
    if ($priceNowTxt === '') {
        $n = $xp->query(".//*[@itemprop='price']/@content | .//*[@itemprop='price']", $root)->item(0);
        if ($n) $priceNowTxt = trim($n->nodeValue ? $n->nodeValue : $n->textContent);
    }

    // 4) data-price / data-amount
    if ($priceNowTxt === '') {
        $n = $xp->query(".//*[@data-price or @data-amount][1]", $root)->item(0);
        if ($n) {
            $attr = $n->attributes->getNamedItem('data-price');
            if ($attr && $attr->nodeValue !== '') $priceNowTxt = $attr->nodeValue;
            else {
                $attr2 = $n->attributes->getNamedItem('data-amount');
                if ($attr2 && $attr2->nodeValue !== '') $priceNowTxt = $attr2->nodeValue;
            }
        }
    }

    // 5) قیمت خط‌خورده (regular)
    $nReg = $xp->query(".//*[contains(@class,'price')]//del//bdi", $root)->item(0);
    if ($nReg) $priceRegTxt = trim($nReg->textContent);

    // 6) اگر هنوز خالی بود: META
    if ($priceNowTxt === '') {
        $metaNow = $xp->query("//meta[@property='product:price:amount']/@content")->item(0);
        if ($metaNow) $priceNowTxt = trim($metaNow->nodeValue);
    }
    // 7) سپس JSON-LD
    if ($priceNowTxt === '' && isset($jsonld['price']) && $jsonld['price'] !== '') {
        $priceNowTxt = (string)$jsonld['price'];
    }

    $price          = to_int_price($priceNowTxt);
    $regular_price  = to_int_price($priceRegTxt);
    $sale_price     = null;

    if ($regular_price && $price && $price < $regular_price) {
        $sale_price = $price;
        $priceSaleTxt = $priceNowTxt;
    } elseif ($regular_price && !$price) {
        $price = $regular_price;
    }

    return array(
        'price' => $price,
        'regular_price' => $regular_price,
        'sale_price' => $sale_price,
        'price_text' => $priceNowTxt ?: null,
        'regular_price_text' => $priceRegTxt ?: null,
        'sale_price_text' => $priceSaleTxt ?: null,
    );
}

function parse_dom_sazkala($html, $baseUrl=null) {
    libxml_use_internal_errors(true);
    $dom=new DOMDocument(); @$dom->loadHTML('<?xml encoding="UTF-8">'.$html); libxml_clear_errors();
    $xp=new DOMXPath($dom);
    $root=$dom->documentElement;

    $res = array();

    // عنوان
    $res['title'] = text($xp,$root,"//h1[contains(@class,'product_title') or contains(@class,'entry-title')]");

    // موجودی
    $stock = $xp->query("//p[contains(@class,'stock')]")->item(0);
    if ($stock) $res['availability'] = trim($stock->getAttribute('class'));

    // SKU
    $sku = text($xp,$root,"//*[@class='sku' or contains(@class,'sku')]");
    if ($sku!=='') $res['sku']=$sku;

    // برند
    $brand = text($xp,$root,"//div[contains(@class,'rb_product_brand_image')]//img/@alt | //a[contains(@href,'/brand/') or contains(@href,'/brand-')]");
    if ($brand!=='') $res['brand']=$brand;

    // دسته‌ها
    $cats=array();
    foreach($xp->query("//p[@id='breadcrumbs']//a | //nav[contains(@class,'breadcrumb')]//a") as $a){
        $t=trim($a->textContent); $href=$a->getAttribute('href');
        if($t!=='' && mb_strpos($t,'خانه')===false) $cats[]=array('name'=>$t,'url'=>$href);
    }
    if ($cats) $res['categories']=$cats;

    // توضیحات
    $descNode = $xp->query("//div[@id='about_product' and contains(@class,'product_content')]")->item(0);
    if (!$descNode) {
        $descNode = $xp->query("//div[@id='tab-description'] | //div[contains(@class,'woocommerce-Tabs-panel--description')] | //div[contains(@class,'woocommerce-product-details__short-description')]")->item(0);
    }
    if ($descNode) {
        $res['description_html'] = node_html($dom, $descNode);
    }

    // مشخصات فنی
    $attrs = array();
    foreach($xp->query("//div[@id='product_info']//table[contains(@class,'product_meta_detail_table')]//tr") as $tr){
        $k = text($xp,$tr,".//td[contains(@class,'item_label')]");
        $v = text($xp,$tr,".//td[contains(@class,'item_value')]");
        if ($k!=='' && $v!=='') $attrs[$k]=$v;
    }
    foreach($xp->query("//table[contains(@class,'shop_attributes')]//tr") as $tr){
        $k = text($xp,$tr,".//th");
        $v = text($xp,$tr,".//td");
        if ($k!=='' && $v!=='') $attrs[$k]=$v;
    }
    if (!empty($attrs)) $res['specs'] = $attrs;

    $specsNode = $xp->query("//div[@id='product_info']//table[contains(@class,'product_meta_detail_table')]")->item(0);
    if (!$specsNode) $specsNode = $xp->query("//table[contains(@class,'shop_attributes')]")->item(0);
    if ($specsNode) $res['specs_html'] = node_html($dom, $specsNode);

    // تصاویر
    $imgs=array();
    foreach($xp->query("//div[contains(@class,'rb_product_main')]//img | //div[contains(@class,'product-images')]//img | //figure[contains(@class,'woocommerce-product-gallery')]//img") as $img){
        $src='';
        if ($img->hasAttribute('data-large_image')) $src = $img->getAttribute('data-large_image');
        if ($src==='') $src = $img->getAttribute('src');
        if($src) $imgs[]=$src;
    }
    $imgs=array_values(array_unique(array_filter($imgs)));
    if ($imgs) $res['images']=$imgs;

    // قیمت‌ها
    $prices = extract_prices($xp, $root, array());
    $res = array_merge($res, $prices);

    if ($baseUrl) $res['url']=$baseUrl;

    // پاکسازی مقادیر خالی رشته‌ای
    foreach ($res as $k=>$v) {
        if (is_string($v) && $v==='') unset($res[$k]);
    }
    return $res;
}

// -------- MAIN --------
try {
    list($url, $html, $debug) = get_input_html();

    $fromJsonLd = parse_jsonld($html);
    $fromDom    = parse_dom_sazkala($html, $url ? $url : null);

    // ادغام: DOM ارجح؛ اگر description_html نبود، از JSON-LD
    $product = array_merge($fromJsonLd, $fromDom);
    if ((!isset($fromDom['description_html']) || $fromDom['description_html']==='') && isset($fromJsonLd['description_html'])) {
        $product['description_html'] = $fromJsonLd['description_html'];
    }

    // اگر قیمت عددی تهی بود ولی JSON-LD داشت
    if ((!isset($product['price']) || $product['price']===null) && isset($fromJsonLd['price'])) {
        $product['price'] = to_int_price((string)$fromJsonLd['price']);
        $product['price_text'] = (string)$fromJsonLd['price'];
    }
    if ($url && (!isset($product['url']) || $product['url']==='')) $product['url'] = $url;

    echo json_encode(array('ok'=>true, 'product'=>$product), JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES|JSON_PRETTY_PRINT);

} catch (Throwable $e) {
    http_response_code(500);
    echo json_encode(array('ok'=>false, 'error'=>$e->getMessage()), JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT);
}