<?php
/**
 * Product Info API for zibaloun.com (WooCommerce)
 * Input:  GET ?url=<product-page-url>
 * Output: JSON (UTF-8) with robust category/brand extraction and availability detection
 */

header('Content-Type: application/json; charset=utf-8');
header('Access-Control-Allow-Origin: *');
mb_internal_encoding('UTF-8');

/*---------------- helpers ----------------*/
function respond($data, $code = 200) {
    http_response_code($code);
    echo json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);
    exit;
}
function to_abs_url($base, $rel) {
    if (!$rel) return null;
    if (preg_match('~^https?://~i', $rel)) return $rel;
    $parts = parse_url($base);
    if (!$parts) return $rel;
    $scheme = $parts['scheme'] ?? 'https';
    $host   = $parts['host'] ?? '';
    $port   = isset($parts['port']) ? (':'.$parts['port']) : '';
    $root   = "{$scheme}://{$host}{$port}";
    if ($rel[0] === '/') return $root . $rel;
    $path = isset($parts['path']) ? preg_replace('~/[^/]*$~','/',$parts['path']) : '/';
    return $root . $path . $rel;
}
function fetch_url($url) {
    $ch = curl_init();
    curl_setopt_array($ch, [
        CURLOPT_URL => $url,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS => 5,
        CURLOPT_CONNECTTIMEOUT => 15,
        CURLOPT_TIMEOUT => 25,
        CURLOPT_SSL_VERIFYHOST => 2,
        CURLOPT_SSL_VERIFYPEER => true,
        CURLOPT_HTTPHEADER => [
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language: fa-IR,fa;q=0.9,en-US;q=0.8,en;q=0.7',
            'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36'
        ]
    ]);
    $html = curl_exec($ch);
    $err  = curl_error($ch);
    $code = curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
    curl_close($ch);
    if ($err) respond(['ok'=>false,'error'=>"cURL error: $err"], 502);
    if ($code >= 400 || !$html) respond(['ok'=>false,'error'=>"HTTP $code fetching url"], 502);
    return $html;
}
function normalize_digits($s) {
    $map = ['۰'=>'0','۱'=>'1','۲'=>'2','۳'=>'3','۴'=>'4','۵'=>'5','۶'=>'6','۷'=>'7','۸'=>'8','۹'=>'9','٠'=>'0','١'=>'1','٢'=>'2','٣'=>'3','٤'=>'4','٥'=>'5','٦'=>'6','٧'=>'7','٨'=>'8','٩'=>'9',','=>'',' '=>'', '٬'=>'','،'=>''];
    return strtr($s, $map);
}
function clean_text($s) {
    $s = trim($s);
    $s = preg_replace("/[\\r\\n\\t]+/u", " ", $s);
    $s = preg_replace("/\\s{2,}/u", " ", $s);
    return trim($s);
}
function innerHTML(DOMNode $element) {
    $doc = $element->ownerDocument;
    $html = '';
    foreach ($element->childNodes as $node) $html .= $doc->saveHTML($node);
    return $html;
}
function pick_first($nodelist) { return ($nodelist && $nodelist->length) ? $nodelist->item(0) : null; }
function slug_from_url($u){
    $p = parse_url($u, PHP_URL_PATH);
    if(!$p) return '';
    $p = rtrim($p,'/');
    $parts = explode('/', $p);
    return end($parts);
}
function is_product_category_link($href){
    return (bool)preg_match('~/product-category/~i', $href);
}
function is_home_or_shop($label){
    return (bool)preg_match('~^(خانه|فروشگاه|صفحه اصلی|home|shop)$~ui', $label);
}
function is_brands_root($item){
    if (empty($item['url']) && empty($item['slug'])) return false;
    if (!empty($item['slug']) && strtolower($item['slug'])==='brands') return true;
    if (!empty($item['url']) && preg_match('~/product-category/brands/?$~i', $item['url'])) return true;
    if (!empty($item['name']) && trim($item['name'])==='برندها') return true;
    return false;
}
function is_assoc($arr){ return is_array($arr) && array_keys($arr)!==range(0,count($arr)-1); }
function has_class_token(DOMElement $el, $token){
    $cls = ' ' . preg_replace('/\s+/',' ', $el->getAttribute('class')) . ' ';
    return (strpos($cls, ' '.$token.' ') !== false);
}

/*---------------- category extractors ----------------*/
function extract_categories_from_breadcrumb_html(DOMXPath $xp, $base_url){
    $items = [];
    $nodes = $xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' breadcrumb ')]//a|//*[contains(concat(' ', normalize-space(@class), ' '), ' breadcrumb ')]//span");
    foreach ($nodes as $n){
        $label = clean_text($n->textContent);
        if ($label==='') continue;
        $href = $n->nodeName === 'a' ? $n->getAttribute('href') : '';
        if ($href && !preg_match('~^https?://~',$href)) $href = to_abs_url($base_url, $href);
        if ($href && is_product_category_link($href) && !is_home_or_shop($label)){
            $items[] = [
                'name'  => $label,
                'url'   => $href,
                'slug'  => slug_from_url($href),
                'is_brand' => (bool)preg_match('~/product-category/brands/~i', $href),
                'is_in_breadcrumb' => true
            ];
        }
    }
    return $items;
}
function extract_categories_from_posted_in(DOMXPath $xp, $base_url){
    $items = [];
    $nodes = $xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' posted_in ')]//a[@href]");
    foreach ($nodes as $a){
        $href = $a->getAttribute('href');
        $label = clean_text($a->textContent);
        if ($href && !preg_match('~^https?://~',$href)) $href = to_abs_url($base_url, $href);
        if ($href && is_product_category_link($href) && $label && !is_home_or_shop($label)){
            $items[] = [
                'name'=>$label,
                'url'=>$href,
                'slug'=>slug_from_url($href),
                'is_brand'=>(bool)preg_match('~/product-category/brands/~i', $href),
                'is_in_breadcrumb'=>false
            ];
        }
    }
    return $items;
}
function extract_categories_from_jsonld(DOMDocument $dom, $base_url){
    $items = [];
    $scripts = $dom->getElementsByTagName('script');
    foreach ($scripts as $s){
        if (strtolower($s->getAttribute('type')) !== 'application/ld+json') continue;
        $json = trim($s->nodeValue);
        if ($json==='') continue;
        $decoded = json_decode($json, true);
        if (!is_array($decoded)) continue;

        $candidates = [];
        if (isset($decoded['@type']) || isset($decoded[0])) $candidates[] = $decoded;
        if (isset($decoded['@graph'])) $candidates[] = $decoded['@graph'];

        foreach ($candidates as $cand){
            $arr = is_assoc($cand) ? [$cand] : $cand;
            foreach ($arr as $item){
                if (!is_array($item)) continue;
                $type = isset($item['@type']) ? (is_array($item['@type']) ? implode(',',$item['@type']) : $item['@type']) : '';
                if (stripos($type,'BreadcrumbList') === false) continue;
                if (empty($item['itemListElement']) || !is_array($item['itemListElement'])) continue;
                foreach ($item['itemListElement'] as $pos){
                    if (isset($pos['item']) && is_array($pos['item'])){
                        $name = clean_text($pos['item']['name'] ?? '');
                        $href = $pos['item']['@id'] ?? ($pos['item']['url'] ?? '');
                        if ($href && !preg_match('~^https?://~',$href)) $href = to_abs_url($base_url, $href);
                        if ($name && $href && is_product_category_link($href) && !is_home_or_shop($name)){
                            $items[] = [
                                'name'=>$name,
                                'url'=>$href,
                                'slug'=>slug_from_url($href),
                                'is_brand'=>(bool)preg_match('~/product-category/brands/~i', $href),
                                'is_in_breadcrumb'=>true
                            ];
                        }
                    }
                }
            }
        }
    }
    // unique by URL
    $uniq = []; $out=[];
    foreach ($items as $it){ if(!isset($uniq[$it['url']])){ $uniq[$it['url']]=1; $out[]=$it; } }
    return $out;
}

/*---------------- availability (robust) ----------------*/
function get_jsonld_availability(DOMDocument $dom){
    foreach ($dom->getElementsByTagName('script') as $s){
        if (strtolower($s->getAttribute('type')) !== 'application/ld+json') continue;
        $json = trim($s->nodeValue);
        if ($json==='') continue;
        $obj = json_decode($json, true);
        if (!$obj) continue;

        // normalize to a flat list of nodes to inspect
        $cands = [];
        if (isset($obj['@graph']) && is_array($obj['@graph'])) $cands = array_merge($cands, $obj['@graph']);
        $cands[] = $obj;

        foreach ($cands as $it){
            if (!is_array($it)) continue;

            // Offers may be nested under Product or be Offer directly.
            $offersList = [];

            // Product with offers
            if (!empty($it['@type'])) {
                $t = is_array($it['@type']) ? implode(',', $it['@type']) : $it['@type'];
                if (stripos($t,'Product') !== false && !empty($it['offers'])) {
                    $offersList[] = $it['offers'];
                }
                if (stripos($t,'Offer') !== false) {
                    $offersList[] = $it;
                }
            }

            foreach ($offersList as $off){
                if (is_array($off) && isset($off[0])) {
                    foreach ($off as $o) {
                        $res = _availability_from_offer($o);
                        if ($res) return $res;
                    }
                } else {
                    $res = _availability_from_offer($off);
                    if ($res) return $res;
                }
            }
        }
    }
    return null;
}
function _availability_from_offer($off){
    if (!is_array($off)) return null;
    $a = $off['availability'] ?? null;
    if (!$a) return null;
    if (is_array($a)) $a = ($a['@id'] ?? ($a['url'] ?? ''));
    $a = strtolower((string)$a);
    if (strpos($a,'instock')!==false)    return 'in_stock';
    if (strpos($a,'outofstock')!==false) return 'out_of_stock';
    if (strpos($a,'backorder')!==false)  return 'backorder';
    return null;
}

/**
 * Final availability detection:
 * 1) JSON-LD Offer
 * 2) Root product container classes: instock / outofstock / onbackorder
 * 3) .stock text (FA/EN patterns)
 * 4) Add-to-cart button only if clearly enabled
 */
function detect_availability(DOMXPath $xp, DOMDocument $dom, &$source = null){
    // 1) JSON-LD
    $jl = get_jsonld_availability($dom);
    if ($jl) { $source = 'jsonld'; return $jl; }

    // 2) Classes on product root
    $root = $xp->query("//*[starts-with(@id, 'product-') and contains(concat(' ', normalize-space(@class), ' '), ' product ')]");
    if ($root->length){
        /** @var DOMElement $r */
        $r = $root->item(0);
        if (has_class_token($r, 'outofstock')) { $source = 'class'; return 'out_of_stock'; }
        if (has_class_token($r, 'onbackorder')){ $source = 'class'; return 'backorder'; }
        if (has_class_token($r, 'instock'))    { $source = 'class'; return 'in_stock'; }
    }

    // 3) .stock text
    $stock = $xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' stock ')]");
    if ($stock->length){
        $txt = mb_strtolower(trim(preg_replace("/\\s+/u"," ", $stock->item(0)->textContent)));
        $outPatterns  = ['ناموجود', 'موجود نیست', 'موجود نمی', 'اتمام موجود', 'اتمام موجودی', 'ناموجود شد', 'out of stock', 'unavailable', 'sold out'];
        $backPatterns = ['سفارش مجدد', 'backorder', 'available on backorder'];
        $inPatterns   = ['موجود', 'in stock', 'available'];
        foreach ($outPatterns as $p){ if (mb_strpos($txt, $p)!==false) { $source='stock_text'; return 'out_of_stock'; } }
        foreach ($backPatterns as $p){ if (mb_strpos($txt, $p)!==false){ $source='stock_text'; return 'backorder'; } }
        foreach ($inPatterns as $p){ if (mb_strpos($txt, $p)!==false)  { $source='stock_text'; return 'in_stock'; } }
    }

    // 4) Add-to-cart button (only if clearly active)
    $btn = $xp->query("//form[contains(@class,'cart')]//button[contains(@class,'single_add_to_cart_button')]");
    if ($btn->length){
        /** @var DOMElement $b */
        $b = $btn->item(0);
        $bTxt = mb_strtolower(trim($b->textContent));
        $disabled = $b->hasAttribute('disabled') || has_class_token($b, 'disabled') || has_class_token($b, 'wc-variation-is-unavailable');
        $neg = (mb_strpos($bTxt, 'ناموجود')!==false || mb_strpos($bTxt, 'out of stock')!==false);
        if (!$disabled && !$neg){ $source='button'; return 'in_stock'; }
    }

    $source = 'unknown';
    return null; // unknown
}

/*---------------- main parser ----------------*/
function parse_product($url, $html) {
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    $dom->loadHTML('<?xml encoding="utf-8" ?>' . $html, LIBXML_NOWARNING|LIBXML_NOERROR);
    $xp  = new DOMXPath($dom);

    $data = [
        'ok' => true,
        'source_url' => $url,
        'name' => null,
        'brand' => null,
        'sku' => null,
        'price' => ['value'=>null,'currency'=>null,'raw'=>null],
        'availability' => null,
        'availability_source' => null,
        'images' => [],
        'thumbnail' => null,
        'description_html' => null,
        'description_text' => null,
        'short_description_html' => null,
        'short_description_text' => null,
        'breadcrumbs' => [],
        'categories' => [],
        'category_path' => null,
        'meta' => [
            'og_title' => null,
            'og_description' => null,
            'og_image' => null,
            'canonical' => null,
            'modified_time' => null
        ],
        'warnings' => []
    ];

    /* name */
    $n = $xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' product_title ')]");
    if ($n->length) $data['name'] = clean_text($n->item(0)->textContent);
    if (!$data['name']) {
        $ogt = $xp->query("//meta[@property='og:title']/@content");
        if ($ogt->length) $data['name'] = clean_text($ogt->item(0)->nodeValue);
    }

    /* sku */
    $sku = $xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' sku ')]");
    if ($sku->length) $data['sku'] = clean_text($sku->item(0)->textContent);

    /* price (Woodmart/Elementor) */
    $priceNode = pick_first($xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' wd-single-price ')]//*[contains(concat(' ', normalize-space(@class), ' '), ' price ')]"));
    if (!$priceNode) $priceNode = pick_first($xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' wd-single-price ')]"));
    if (!$priceNode) $priceNode = pick_first($xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' entry-summary ') or contains(concat(' ', normalize-space(@class), ' '), ' summary ')]//*[contains(concat(' ', normalize-space(@class), ' '), ' price ')]"));

    if ($priceNode) {
        $raw = clean_text($priceNode->textContent);
        if ($raw === '') {
            $amtNode = pick_first($xp->query(".//*[contains(concat(' ', normalize-space(@class), ' '), ' woocommerce-Price-amount ')]", $priceNode));
            if ($amtNode) $raw = clean_text($amtNode->textContent);
        }
        $data['price']['raw'] = $raw;
        $currency = null;
        if (preg_match('~تومان~u', $raw)) $currency = 'IRT';
        if (preg_match('~ریال~u', $raw))  $currency = 'IRR';
        if (!$currency) {
            $cSym = pick_first($xp->query(".//*[contains(concat(' ', normalize-space(@class), ' '), ' woocommerce-Price-currencySymbol ')]", $priceNode));
            if ($cSym) {
                $sym = clean_text($cSym->textContent);
                if ($sym === 'تومان') $currency = 'IRT';
                elseif ($sym === 'ریال') $currency = 'IRR';
            }
        }
        $data['price']['currency'] = $currency ?: 'IRT';
        if (preg_match_all('~[\d۰-۹٠-٩]+~u', $raw, $m)) {
            $num = normalize_digits(implode('', $m[0]));
            if ($num !== '') $data['price']['value'] = (int)$num;
        } else {
            $amtNode = pick_first($xp->query(".//*[contains(concat(' ', normalize-space(@class), ' '), ' woocommerce-Price-amount ')]", $priceNode));
            if ($amtNode) {
                $amt = clean_text($amtNode->textContent);
                if (preg_match_all('~[\d۰-۹٠-٩]+~u', $amt, $mm)) {
                    $num = normalize_digits(implode('', $mm[0]));
                    if ($num !== '') $data['price']['value'] = (int)$num;
                }
            }
        }
    }

    /* availability (robust) */
    $src = null;
    $avail = detect_availability($xp, $dom, $src);
    $data['availability'] = $avail;                 // in_stock | out_of_stock | backorder | null
    $data['availability_source'] = $src;            // jsonld | class | stock_text | button | unknown

    /* images */
    $imgs = [];
    foreach ($xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' woocommerce-product-gallery__image ')]//img") as $img) {
        $srcImg = $img->getAttribute('data-src') ?: $img->getAttribute('data-large_image') ?: $img->getAttribute('src');
        if ($srcImg) $imgs[] = $srcImg;
    }
    if (empty($imgs)) {
        $ogimg = $xp->query("//meta[@property='og:image']/@content");
        if ($ogimg->length) $imgs[] = $ogimg->item(0)->nodeValue;
    }
    $abs = []; $seen = [];
    foreach ($imgs as $s) { $u = to_abs_url($url, $s); if ($u && !isset($seen[$u])) { $seen[$u]=1; $abs[]=$u; } }
    $data['images']    = $abs;
    $data['thumbnail'] = $abs[0] ?? null;

    /* descriptions */
    $descNode = $xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' woocommerce-Tabs-panel--description ')]");
    if ($descNode->length) { $html = innerHTML($descNode->item(0)); $data['description_html']=trim($html); $data['description_text']=clean_text(strip_tags($html)); }
    $shortNode = $xp->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' woocommerce-product-details__short-description ')]");
    if ($shortNode->length) { $html = innerHTML($shortNode->item(0)); $data['short_description_html']=trim($html); $data['short_description_text']=clean_text(strip_tags($html)); }

    /* --------- CATEGORIES (enhanced) --------- */
    $cats = [];

    // 1) breadcrumb HTML
    $bc = extract_categories_from_breadcrumb_html($xp, $url);
    foreach ($bc as $it) $cats[$it['url']] = $it;

    // 2) JSON-LD BreadcrumbList
    $jl = extract_categories_from_jsonld($dom, $url);
    foreach ($jl as $it) {
        if (!isset($cats[$it['url']])) $cats[$it['url']] = $it;
        else $cats[$it['url']]['is_in_breadcrumb'] = ($cats[$it['url']]['is_in_breadcrumb'] || $it['is_in_breadcrumb']);
    }

    // 3) posted_in
    $pi = extract_categories_from_posted_in($xp, $url);
    foreach ($pi as $it) if (!isset($cats[$it['url']])) $cats[$it['url']] = $it;

    // ordered all (with brands)
    $ordered = array_values($cats);

    // keep breadcrumb order when present
    if (!empty($bc)) {
        $pref  = array_column($bc,'url');
        usort($ordered, function($a,$b) use ($pref){
            $ia = array_search($a['url'],$pref); $ib = array_search($b['url'],$pref);
            $ia = ($ia===false)?PHP_INT_MAX:$ia; $ib = ($ib===false)?PHP_INT_MAX:$ib;
            if ($ia === $ib) return strcmp($a['name'],$b['name']);
            return $ia - $ib;
        });
    }

    // product-category only, remove home/shop
    $ordered = array_values(array_filter($ordered, function($it){
        return ($it['name'] && is_product_category_link($it['url']) && !is_home_or_shop($it['name']));
    }));

    // brand detection (from brand categories, excluding brands root)
    $brand = null;
    foreach ($ordered as $it){
        if (!empty($it['is_brand']) && !is_brands_root($it)) {
            $brand = $it['name']; // last valid brand
        }
    }
    if (!$brand && !empty($data['name'])) {
        $brandCandidates = ['Dunhill','Lacoste','Calvin Klein','Chanel','Dior','NICE','Salvador Dali','Narciso','Versace','Armaf','Mont Blanc','Mercedes-Benz','Jaguar','Davidoff','Hugo Boss','Adidas','Azzaro','Bvlgari','Burberry','Carolina Herrera','YSL','Gucci','Tom Ford','Creed','Prada','Givenchy','Joop','Bentley','Ferrari','Ajmal','Lattafa','Nishane'];
        foreach ($brandCandidates as $bcName) { if (mb_stripos($data['name'], $bcName) !== false) { $brand = $bcName; break; } }
    }
    $data['brand'] = $brand ?: ($data['brand'] ?? null);

    // remove all brand items from categories (both brands root & brand itself)
    $ordered = array_values(array_filter($ordered, function($it){
        if (!empty($it['is_brand'])) return false;
        return true;
    }));

    // final categories (without brands)
    $data['categories'] = $ordered;

    // category_path from breadcrumb (without brands)
    $breadcrumb_chain = array_values(array_filter($ordered, function($x){ return !empty($x['is_in_breadcrumb']); }));
    if (empty($breadcrumb_chain)) $breadcrumb_chain = $ordered;
    if (!empty($breadcrumb_chain)) {
        $names = array_column($breadcrumb_chain, 'name');
        $data['category_path'] = implode(' > ', $names);
    }

    /* meta */
    $ogt = $xp->query("//meta[@property='og:title']/@content");        if ($ogt->length) $data['meta']['og_title'] = clean_text($ogt->item(0)->nodeValue);
    $ogd = $xp->query("//meta[@property='og:description']/@content");  if ($ogd->length) $data['meta']['og_description'] = clean_text($ogd->item(0)->nodeValue);
    $ogi = $xp->query("//meta[@property='og:image']/@content");        if ($ogi->length && !$data['thumbnail']) $data['meta']['og_image'] = to_abs_url($url, $ogi->item(0)->nodeValue);
    $can = $xp->query("//link[@rel='canonical']/@href");               if ($can->length) $data['meta']['canonical'] = $can->item(0)->nodeValue;
    $mod = $xp->query("//meta[@property='article:modified_time']/@content"); if ($mod->length) $data['meta']['modified_time'] = $mod->item(0)->nodeValue;

    /* warnings */
    if (!$data['name']) $data['warnings'][] = 'Product name not found';
    if (!$data['price']['value']) $data['warnings'][] = 'Price not detected';
    if (!$data['images']) $data['warnings'][] = 'No images found';
    if (!$data['categories']) $data['warnings'][] = 'No categories found (after removing brands)';
    if (!$data['availability']) $data['warnings'][] = 'Availability not detected';

    return $data;
}

/*---------------- entrypoint ----------------*/
$url = isset($_GET['url']) ? trim($_GET['url']) : '';
if (!$url) respond(['ok'=>false,'error'=>'Missing ?url parameter'], 400);
if (!preg_match('~^https?://~i', $url)) respond(['ok'=>false,'error'=>'Invalid URL'], 400);

// security: only zibaloun.com
$host = parse_url($url, PHP_URL_HOST);
$allowedHosts = ['zibaloun.com','www.zibaloun.com'];
if (!in_array($host, $allowedHosts, true)) respond(['ok'=>false,'error'=>'Host not allowed. Only zibaloun.com supported.'], 403);

$html = fetch_url($url);
$result = parse_product($url, $html);
respond($result, 200);
