<?php
/**
 * Woo Product Details Scraper API (PHP 7+ compatible)
 *
 * GET params:
 *   url   (required) : full product URL, e.g. https://www.petmarket24.ir/product/papi-dry-food-mofeed/
 *   debug (optional) : 1 to include diagnostics
 *
 * Response: application/json; charset=utf-8
 */

header('Content-Type: application/json; charset=utf-8');
header('Access-Control-Allow-Origin: *');

// --------- helpers (PHP 7 safe) ----------
function starts_with($haystack, $needle){ return $needle === '' || strpos($haystack, $needle) === 0; }
function ends_with($haystack, $needle){ if ($needle==='') return true; return substr($haystack, -strlen($needle)) === $needle; }
function norm_ws($s){ return trim(preg_replace('/\s+/u',' ', (string)$s)); }
function json_error($message, $code=400, $extra=[]){
    if (!headers_sent()) http_response_code($code);
    echo json_encode(array_merge(['ok'=>false,'error'=>$message],$extra), JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES|JSON_PRETTY_PRINT);
    exit;
}
set_error_handler(function($sev,$msg,$file,$line){ throw new ErrorException($msg,0,$sev,$file,$line); });

// --------- input ----------
$debug = isset($_GET['debug']) && $_GET['debug'] == '1';
$url   = isset($_GET['url']) ? trim($_GET['url']) : '';
if ($url==='') json_error('پارامتر url الزامی است.');
if (!preg_match('~^https?://~i',$url)) json_error('url باید با http(s) شروع شود.');

// --------- fetch ----------
$ua = 'Mozilla/5.0 (compatible; ProductDetailsScraper/1.1; +https://example.test)';
$html = '';
$http_info = []; $curl_err = null;

try {
    if (function_exists('curl_init')) {
        $ch = curl_init($url);
        curl_setopt_array($ch, [
            CURLOPT_RETURNTRANSFER=>true,
            CURLOPT_FOLLOWLOCATION=>true,
            CURLOPT_MAXREDIRS=>5,
            CURLOPT_CONNECTTIMEOUT=>10,
            CURLOPT_TIMEOUT=>25,
            CURLOPT_SSL_VERIFYPEER=>true,
            CURLOPT_SSL_VERIFYHOST=>2,
            CURLOPT_USERAGENT=>$ua,
            CURLOPT_HTTPHEADER=>['Accept: text/html,application/xhtml+xml;q=0.9,*/*;q=0.8'],
        ]);
        $html = curl_exec($ch);
        $http_info = curl_getinfo($ch) ?: [];
        $curl_err = curl_error($ch);
        curl_close($ch);
    } else {
        $context = stream_context_create([
            'http'=>[
                'method'=>'GET',
                'header'=>"User-Agent: $ua\r\nAccept: text/html,application/xhtml+xml\r\n",
                'timeout'=>25
            ],
            'ssl'=>['verify_peer'=>true,'verify_peer_name'=>true,'allow_self_signed'=>false]
        ]);
        $html = @file_get_contents($url,false,$context);
        $http_info = ['note'=>'curl_disabled_fallback'];
    }
    if (!$html) json_error('عدم دریافت محتوا از آدرس هدف.', 502, ['curl_error'=>$curl_err,'target'=>$url]);

    // --------- parse DOM ----------
    if (!class_exists('DOMDocument')) json_error('DOM extension روی سرور فعال نیست.', 500, ['fix'=>'enable php-xml / dom']);
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    $dom->loadHTML('<?xml encoding="utf-8" ?>'.$html, LIBXML_NOWARNING|LIBXML_NOERROR);
    libxml_clear_errors();
    $xp = new DOMXPath($dom);

    // absolute URL resolver
    $bp = parse_url($url); $scheme = $bp['scheme']??'https'; $host=$bp['host']??''; $port=isset($bp['port'])?':'.$bp['port']:'';
    $path = $bp['path'] ?? '/'; $dir = preg_replace('~/[^/]*$~','/',$path);
    $abs = function($href) use($scheme,$host,$port,$dir){
        $href = trim($href);
        if ($href==='') return $href;
        if (preg_match('~^https?://~i',$href)) return $href;
        if (starts_with($href,'//')) return $scheme.':'.$href;
        if (starts_with($href,'/')) return $scheme.'://'.$host.$port.$href;
        return $scheme.'://'.$host.$port.$dir.$href;
    };

    // --------- extract: meta/canonical/lang/og ---------
    $selOne = function($xp,$q) {
        $n = $xp->query($q);
        return ($n && $n->length) ? $n->item(0) : null;
    };
    $meta = [];
    $langEl = $selOne($xp, "//html[@lang]"); if ($langEl) $meta['lang'] = $langEl->getAttribute('lang');
    $can = $selOne($xp, "//link[@rel='canonical' or contains(@rel,'canonical')]");
    if ($can) $meta['canonical'] = $can->getAttribute('href');
    foreach ($xp->query("//meta[@property and @content]") as $m){
        $prop = $m->getAttribute('property'); $meta['og'][$prop] = $m->getAttribute('content');
    }
    foreach ($xp->query("//meta[@name and @content]") as $m){
        $name = strtolower($m->getAttribute('name'));
        if (in_array($name,['description','keywords','twitter:title','twitter:description'])) {
            $meta['meta'][$name] = $m->getAttribute('content');
        }
    }

    // --------- title/H1 ---------
    $titleNode = $selOne($xp, "//h1[contains(@class,'product_title') or contains(@class,'entry-title')]");
    $title = $titleNode ? norm_ws($titleNode->textContent) : '';

    // --------- breadcrumbs ---------
    $breadcrumbs = [];
    $bc = $xp->query("//nav[contains(@class,'breadcrumb') or contains(@class,'woocommerce-breadcrumb')]//a | //nav[contains(@class,'woocommerce-breadcrumb')]//span");
    if (!$bc || !$bc->length) $bc = $xp->query("//*[contains(@class,'woocommerce-breadcrumb')]//a | //*[contains(@class,'woocommerce-breadcrumb')]//span");
    foreach ($bc as $node) {
        $text = norm_ws($node->textContent);
        $href = $node->nodeName==='a' ? $node->getAttribute('href') : '';
        $breadcrumbs[] = ['text'=>$text, 'url'=>$href ? $abs($href) : null];
    }

    // --------- categories/tags/brand (typical locations) ---------
    $categories=[]; $tags=[]; $brand=null;
    foreach ($xp->query("//*[contains(@class,'product_meta')]//a[contains(@href,'/product-category/')]") as $a) {
        $categories[] = ['name'=>norm_ws($a->textContent),'url'=>$abs($a->getAttribute('href'))];
    }
    foreach ($xp->query("//*[contains(@class,'product_meta')]//a[contains(@href,'/product-tag/')]") as $a) {
        $tags[] = ['name'=>norm_ws($a->textContent),'url'=>$abs($a->getAttribute('href'))];
    }
    // برند (بعضی قالب‌ها: .posted_in .brand یا attributes table)
    $brandNode = $selOne($xp, "//*[contains(@class,'product_meta')]//*[contains(@class,'brand')]//a | //table[contains(@class,'woocommerce-product-attributes')]//th[contains(.,'برند')]/following-sibling::td");
    if ($brandNode) $brand = norm_ws($brandNode->textContent);

    // --------- price & availability ---------
    $price_raw = '';
    $priceNode = $selOne($xp, "//*[contains(@class,'summary')]//*[contains(@class,'price')] | //p[contains(@class,'price')]");
    if ($priceNode) $price_raw = norm_ws($priceNode->textContent);
    $availability = null;
    $stock = $selOne($xp, "//p[contains(@class,'stock')]");
    if ($stock) $availability = norm_ws($stock->textContent);

    // try to guess currency/number
    $price_numeric = null; $price_currency = null;
    if ($price_raw) {
        if (mb_strpos($price_raw,'تومان')!==false) $price_currency = 'IRT'; // ریال/تومان بستگی به سایت دارد
        $digits = preg_replace('~[^\d]~u','',$price_raw);
        if ($digits!=='') $price_numeric = (float)$digits;
    }

    // --------- SKU ---------
    $sku = null;
    $skuNode = $selOne($xp, "//*[contains(@class,'product_meta')]//*[contains(@class,'sku')]");
    if ($skuNode) $sku = norm_ws($skuNode->textContent);

    // --------- description / additional info (tabs) ---------
    // Description HTML
    $getInnerHTML = function(DOMNode $node){
        $doc = $node->ownerDocument; $html = '';
        foreach ($node->childNodes as $child) $html .= $doc->saveHTML($child);
        return $html;
    };
    $desc_html = null; $addl_html = null;
    $descNode = $selOne($xp, "//*[@id='tab-description' or contains(@class,'woocommerce-Tabs-panel--description')]");
    if ($descNode) $desc_html = trim($getInnerHTML($descNode));
    // fallback: short description
    if (!$desc_html) {
        $short = $selOne($xp, "//*[contains(@class,'woocommerce-product-details__short-description')]");
        if ($short) $desc_html = trim($getInnerHTML($short));
    }
    $addlNode = $selOne($xp, "//*[@id='tab-additional_information' or contains(@class,'woocommerce-Tabs-panel--additional_information')]");
    if ($addlNode) $addl_html = trim($getInnerHTML($addlNode));

    // --------- attributes table (key/value) ---------
    $attributes = [];
    foreach ($xp->query("//table[contains(@class,'woocommerce-product-attributes')]//tr") as $tr){
        $th = $selOne($xp, ".//th", $tr); $td = $selOne($xp, ".//td", $tr);
        if (!$th || !$td) continue;
        $key = norm_ws($th->textContent); $val = norm_ws($td->textContent);
        if ($key !== '') $attributes[$key] = $val;
    }

    // --------- gallery images ---------
    $images = [];
    // Woo gallery
    foreach ($xp->query("//*[contains(@class,'woocommerce-product-gallery')]//a[@href]") as $a) {
        $images[] = $abs($a->getAttribute('href'));
    }
    // direct <img> fallbacks
    if (empty($images)) {
        foreach ($xp->query("//*[contains(@class,'woocommerce-product-gallery')]//img[@src] | //div[contains(@class,'images')]//img[@src]") as $img) {
            $images[] = $abs($img->getAttribute('src'));
        }
    }
    $images = array_values(array_unique(array_filter($images)));

    // --------- rating summary ---------
    $rating = ['average'=>null,'count'=>null];
    $avgNode = $selOne($xp, "//*[contains(@class,'woocommerce-product-rating')]//*[contains(@class,'star-rating')]//strong | //*[contains(@class,'star-rating')]/@aria-label");
    // Common: <div class="star-rating" role="img" aria-label="Rated 5.00 out of 5">
    $ariaNode = $selOne($xp, "//*[contains(@class,'star-rating')]");
    if ($ariaNode && $ariaNode->attributes && $ariaNode->attributes->getNamedItem('aria-label')) {
        $aria = $ariaNode->attributes->getNamedItem('aria-label')->nodeValue;
        if (preg_match('~([\d\.,]+)\s*out\s*of\s*5~i',$aria,$m)) $rating['average'] = (float)str_replace(',','.',$m[1]);
    }
    $countNode = $selOne($xp, "//*[contains(@class,'woocommerce-review-link')]");
    if ($countNode && preg_match('~\d+~u',$countNode->textContent,$m)) $rating['count'] = (int)$m[0];

    // --------- reviews list ---------
    $reviews = [];
    foreach ($xp->query("//*[@id='reviews']//*[contains(@class,'commentlist')]//li[contains(@class,'review')]") as $li) {
        $author = $selOne($xp, ".//*[contains(@class,'woocommerce-review__author')]", $li);
        $date   = $selOne($xp, ".//*[contains(@class,'woocommerce-review__published-date')]", $li);
        $body   = $selOne($xp, ".//*[contains(@class,'description')]", $li);
        $rnode  = $selOne($xp, ".//*[contains(@class,'star-rating')]", $li);
        $rv = ['author'=> $author?norm_ws($author->textContent):null,
               'date'  => $date?norm_ws($date->textContent):null,
               'rating'=> null,
               'content'=> $body?norm_ws($body->textContent):null];
        if ($rnode && $rnode->attributes && $rnode->attributes->getNamedItem('aria-label')) {
            $aria = $rnode->attributes->getNamedItem('aria-label')->nodeValue;
            if (preg_match('~([\d\.,]+)\s*out\s*of\s*5~i',$aria,$m)) $rv['rating'] = (float)str_replace(',','.',$m[1]);
        }
        $reviews[] = $rv;
    }

    // --------- related / upsells products (name+url+price) ---------
    $related = [];
    foreach ($xp->query("//*[contains(@class,'related')]//li[contains(concat(' ',normalize-space(@class),' '),' product ')]") as $li) {
        $loc = new DOMXPath($li->ownerDocument);
        $a = $loc->query(".//a[contains(@class,'woocommerce-LoopProduct-link') or contains(@class,'woocommerce-loop-product__link')]", $li);
        if (!$a || !$a->length) $a = $loc->query(".//a", $li);
        $href = $a && $a->length ? $a->item(0)->getAttribute('href') : '';
        $nm = $loc->query(".//h2[contains(@class,'woocommerce-loop-product__title')]|.//h3", $li);
        $name = $nm && $nm->length ? norm_ws($nm->item(0)->textContent) : '';
        $pr = $loc->query(".//*[contains(@class,'price')]", $li);
        $price = $pr && $pr->length ? norm_ws($pr->item(0)->textContent) : null;
        if ($href && $name) $related[] = ['name'=>$name,'url'=>$abs($href),'price'=>$price];
    }

    // --------- JSON-LD Product (if available) ---------
    $jsonld = [];
    if (preg_match_all('~<script[^>]+type=["\']application/ld\+json["\'][^>]*>([\s\S]*?)</script>~iu', $html, $m)) {
        foreach ($m[1] as $chunk) {
            $chunk = trim($chunk);
            // بعضی قالب‌ها چند شیء را در یک آرایه می‌گذارند
            $data = json_decode($chunk, true);
            if (!$data) continue;
            $arr = is_assoc($data) ? [$data] : $data;
            foreach ($arr as $obj) {
                if (!is_array($obj)) continue;
                if (isset($obj['@type']) && (stripos(@(is_array($obj['@type'])?implode(',',$obj['@type']):$obj['@type']), 'Product') !== false)) {
                    $jsonld[] = $obj;
                }
            }
        }
    }
    // helper: is associative array
} catch(Throwable $e){
    json_error('Exception: '.$e->getMessage(), 500);
}

// define after try so it's available above
function is_assoc($arr){ if(!is_array($arr)) return false; return array_keys($arr)!==range(0,count($arr)-1); }

// --------- build output ----------
$out = [
    'ok'           => true,
    'url'          => $url,
    'title'        => $title,
    'breadcrumbs'  => $breadcrumbs,
    'categories'   => $categories,
    'tags'         => $tags,
    'brand'        => $brand,
    'sku'          => $sku,
    'price'        => ['raw'=>$price_raw, 'numeric'=>$price_numeric, 'currency'=>$price_currency, 'availability'=>$availability],
    'description_html'         => $desc_html,
    'additional_information_html'=> $addl_html,
    'attributes'   => $attributes,
    'images'       => $images,
    'rating'       => $rating,
    'reviews'      => $reviews,
    'related_products' => $related,
    'meta'         => $meta,
    'jsonld_product'=> $jsonld, // خامِ JSON‑LD برای دقت بیشتر قیمت/موجودی/Rating (اگر موجود باشد)
];

if ($debug) {
    $out['debug'] = [
        'http'=>$http_info,
        'php'=>[
            'version'=>PHP_VERSION,
            'curl'=> function_exists('curl_init')?'enabled':'disabled',
            'dom' => class_exists('DOMDocument')?'enabled':'missing'
        ]
    ];
}

echo json_encode($out, JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES|JSON_PRETTY_PRINT);