<?php
/**
 * Product Scraper API (WooCommerce/Woodmart friendly)
 * Usage: product_api.php?url=https://example.com/product/slug/
 * Output: JSON
 * - Extracts Product JSON-LD first, then fills gaps from HTML/XPath
 * - Builds label map for variant attributes to return Persian labels instead of slugs
 */

header('Content-Type: application/json; charset=UTF-8');
// header('Access-Control-Allow-Origin: *'); // برای تست، درصورت نیاز باز کنید

$inputUrl = isset($_GET['url']) ? trim($_GET['url']) : '';
if (!$inputUrl) {
    http_response_code(400);
    echo json_encode(['ok' => false, 'error' => 'Missing ?url parameter']);
    exit;
}
if (!filter_var($inputUrl, FILTER_VALIDATE_URL)) {
    http_response_code(400);
    echo json_encode(['ok' => false, 'error' => 'Invalid URL']);
    exit;
}

try {
    $html = fetchHtml($inputUrl);
    if (!$html) throw new Exception('Failed to fetch HTML');

    $dom = new DOMDocument();
    libxml_use_internal_errors(true);
    $dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
    libxml_clear_errors();
    $xp = new DOMXPath($dom);

    // Base origin for making absolute URLs
    $base = parse_url($inputUrl);
    $baseOrigin = $base['scheme'] . '://' . $base['host'];

    // === 1) JSON-LD Product ===
    $fromLd = parseJsonLdProduct($xp);

    // === 2) HTML fallback (incl. label map & variations) ===
    $fromHtml = parseHtmlFallback($xp, $baseOrigin);

    // === 3) Merge (LD first priority for semantic fields) ===
    $data = array_replace_recursive(
        [
            'name'        => null,
            'brand'       => null,
            'price'       => null,
            'currency'    => null,
            'images'      => [],
            'description' => null,
            'features'    => [],
            'variants'    => [],
            'url'         => $inputUrl,
            'source'      => 'scraper'
        ],
        $fromHtml,
        $fromLd
    );

    // === 4) Cleanups & normalizations ===
    $data['name']        = safeText($data['name']);
    $data['brand']       = safeText($data['brand']);
    $data['description'] = normalizeDescription($data['description']);

    // Images -> unique + absolute
    $data['images'] = array_values(array_unique(array_map('trim', $data['images'])));
    $data['images'] = array_map(fn($u) => absolutizeUrl($u, $baseOrigin), $data['images']);

    // Price & currency normalization
    if (!empty($data['price'])) {
        [$num, $cur] = normalizePrice($data['price'], $data['currency'] ?? null);
        $data['price']    = $num;
        $data['currency'] = $cur;
    } else {
        $data['price'] = null;
    }

    // === 5) Translate variant attributes (slugs -> Persian labels) ===
    // Prefer labelMap extracted from HTML (selects/swatches). If absent, keep original.
    $labelMap = $fromHtml['_labelMap'] ?? [];
    if (!empty($data['variants']) && is_array($data['variants'])) {
        foreach ($data['variants'] as &$v) {
            $v['attributes']   = (isset($v['attributes']) && is_array($v['attributes'])) ? $v['attributes'] : [];
            $v['availability'] = $v['availability'] ?? null;

            // Normalize price per variant
            if (!empty($v['price'])) {
                [$vnum, $vcur] = normalizePrice($v['price'], $data['currency'] ?? null);
                $v['price']    = $vnum;
                $v['currency'] = $vcur ?: ($data['currency'] ?? null);
            } else {
                $v['currency'] = $v['currency'] ?? ($data['currency'] ?? null);
            }

            // Absolutize image
            if (!empty($v['image'])) {
                $v['image'] = absolutizeUrl($v['image'], $baseOrigin);
            }

            // Translate attributes using labelMap (exact attribute names like attribute_pa_color)
            if (!empty($labelMap)) {
                $v['attributes'] = translateAttributes($v['attributes'], $labelMap);
            }

            // Fallback: if a color attribute is still slug/english, convert to Persian when possible
            foreach ($v['attributes'] as $ak => $av) {
                $akLower = mb_strtolower($ak, 'UTF-8');
                if ($akLower === 'attribute_pa_color' || strpos($akLower, 'color') !== false || strpos($akLower, 'rang') !== false) {
                    // اگر پس از labelMap هنوز همان اسلاگ مانده، یک ترجمه‌ی حداقلی انجام بده
                    $maybeFa = faColorFallback($av);
                    if ($maybeFa) $v['attributes'][$ak] = $maybeFa;
                }
            }
        }
        unset($v);
    }

    // Remove internal helper
    unset($data['_labelMap']);

    echo json_encode(['ok' => true, 'data' => $data], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);

} catch (Exception $e) {
    http_response_code(500);
    echo json_encode(['ok' => false, 'error' => $e->getMessage()]);
    exit;
}

/* ===================== Helpers ===================== */

function fetchHtml(string $url): ?string {
    $ch = curl_init($url);
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS      => 5,
        CURLOPT_CONNECTTIMEOUT => 10,
        CURLOPT_TIMEOUT        => 20,
        CURLOPT_SSL_VERIFYPEER => true,
        CURLOPT_HTTPHEADER     => [
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language: fa,en;q=0.9',
            'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36'
        ]
    ]);
    $body = curl_exec($ch);
    $err  = curl_error($ch);
    curl_close($ch);
    if ($err) return null;
    return $body ?: null;
}

/** ===================== JSON-LD Product ===================== */

function parseJsonLdProduct(DOMXPath $xp): array {
    $res = [
        'name'        => null,
        'brand'       => null,
        'price'       => null,
        'currency'    => null,
        'images'      => [],
        'description' => null,
        'features'    => [],
        'variants'    => []
    ];

    $nodes = $xp->query('//script[@type="application/ld+json"]');
    if (!$nodes || $nodes->length === 0) return $res;

    foreach ($nodes as $node) {
        $json = trim($node->textContent);
        if ($json === '') continue;

        $candidates = splitPossiblyConcatenatedJson($json);
        foreach ($candidates as $candidate) {
            $data = json_decode($candidate, true);
            if (!is_array($data)) continue;

            if (isset($data['@graph']) && is_array($data['@graph'])) {
                foreach ($data['@graph'] as $g) {
                    if (isProduct($g)) $res = mergeLdProduct($res, $g);
                }
            }

            if (isset($data[0]) && is_array($data)) {
                foreach ($data as $d) {
                    if (isProduct($d)) $res = mergeLdProduct($res, $d);
                }
            }

            if (isProduct($data)) {
                $res = mergeLdProduct($res, $data);
            }
        }
    }

    return $res;
}

function isProduct($d): bool {
    if (!is_array($d)) return false;
    $type = $d['@type'] ?? $d['type'] ?? null;
    if (is_array($type)) $type = implode(',', $type);
    return $type && stripos($type, 'Product') !== false;
}

function mergeLdProduct(array $res, array $p): array {
    $res['name']        = $res['name']        ?: ($p['name'] ?? null);
    $res['description'] = $res['description'] ?: ($p['description'] ?? null);

    // Brand
    if (!$res['brand']) {
        if (is_array($p['brand'] ?? null)) {
            $res['brand'] = $p['brand']['name'] ?? ($p['brand'][0]['name'] ?? null);
        } else {
            $res['brand'] = $p['brand'] ?? null;
        }
    }

    // Images
    foreach (['image', 'images'] as $k) {
        if (isset($p[$k])) {
            $imgs = is_array($p[$k]) ? $p[$k] : [$p[$k]];
            foreach ($imgs as $im) {
                if (is_string($im) && $im !== '') $res['images'][] = $im;
                if (is_array($im) && isset($im['url'])) $res['images'][] = $im['url'];
            }
        }
    }

    // Offers / variants
    if (isset($p['offers'])) {
        $offers = $p['offers'];
        if (isset($offers['@type']) && stripos($offers['@type'], 'AggregateOffer') !== false) {
            $low  = $offers['lowPrice']  ?? null;
            $high = $offers['highPrice'] ?? null;
            $cur  = $offers['priceCurrency'] ?? null;
            $res['price']    = $res['price']    ?: ($low ?: $high);
            $res['currency'] = $res['currency'] ?: $cur;

            if (!empty($offers['offers']) && is_array($offers['offers'])) {
                foreach ($offers['offers'] as $of) {
                    $res['variants'][] = [
                        'sku'         => $of['sku'] ?? ($of['mpn'] ?? null),
                        'price'       => $of['price'] ?? null,
                        'currency'    => $of['priceCurrency'] ?? ($cur ?? null),
                        'availability'=> getAvailability($of['availability'] ?? null),
                        'attributes'  => extractAttributesFromName($of['name'] ?? null),
                        'image'       => $of['image'] ?? null,
                        'url'         => $of['url']   ?? null,
                    ];
                }
            }
        } else {
            $ofs = is_assoc($offers) ? [$offers] : $offers;
            foreach ($ofs as $of) {
                if (!$res['price'] && !empty($of['price'])) {
                    $res['price']    = $of['price'];
                    $res['currency'] = $of['priceCurrency'] ?? ($res['currency'] ?? null);
                }
                $res['variants'][] = [
                    'sku'         => $of['sku'] ?? ($of['mpn'] ?? null),
                    'price'       => $of['price'] ?? null,
                    'currency'    => $of['priceCurrency'] ?? ($res['currency'] ?? null),
                    'availability'=> getAvailability($of['availability'] ?? null),
                    'attributes'  => extractAttributesFromName($of['name'] ?? null),
                    'image'       => $of['image'] ?? null,
                    'url'         => $of['url']   ?? null,
                ];
            }
        }
    }

    // Features / additionalProperty
    foreach (['additionalProperty', 'additionalProperties', 'properties'] as $k) {
        if (!empty($p[$k]) && is_array($p[$k])) {
            foreach ($p[$k] as $prop) {
                $pn = $prop['name']  ?? null;
                $pv = $prop['value'] ?? null;
                if ($pn && $pv && empty($res['features'][$pn])) {
                    $res['features'][$pn] = $pv;
                }
            }
        }
    }

    return $res;
}

/** ===================== HTML Fallback + Label Map ===================== */

function parseHtmlFallback(DOMXPath $xp, string $baseOrigin): array {
    $out = [
        'name'        => null,
        'brand'       => null,
        'price'       => null,
        'currency'    => null,
        'images'      => [],
        'description' => null,
        'features'    => [],
        'variants'    => [],
        '_labelMap'   => [], // برای ترجمه‌ی اسلاگ‌ها به نام فارسی
    ];

    // Build label map (from selects and swatches)
    $labelMap = buildAttributeLabelMap($xp);
    $out['_labelMap'] = $labelMap;

    // Name
    $out['name'] = firstText($xp, [
        '//h1[contains(@class,"product_title")]',
        '//meta[@property="og:title"]/@content',
        '//title'
    ]);

    // Brand
    $out['brand'] = firstText($xp, [
        '//*[contains(@class,"product-brands")]/a',
        '//a[contains(@href,"/brand/") or contains(@href,"/brands/")]',
    ]);

    // Price
    $priceRaw = firstText($xp, [
        '//*[contains(@class,"price")]//*[contains(@class,"amount")]',
        '//*[@class="summary"]//*[contains(@class,"price")]',
        '//meta[@property="product:price:amount"]/@content'
    ]);
    $out['price'] = $priceRaw ?: null;

    // Currency
    $out['currency'] = firstText($xp, [
        '//meta[@property="product:price:currency"]/@content'
    ]) ?: guessCurrencyFromLocale($xp);

    // Images
    $imgs = allAttr($xp, '//figure[contains(@class,"woocommerce-product-gallery__image")]//a/@href');
    if (!$imgs) {
        $imgs = allAttr($xp, '//*[contains(@class,"woocommerce-product-gallery")]//img/@src');
    }
    if (!$imgs) {
        $imgs = allAttr($xp, '//meta[@property="og:image"]/@content');
    }
    $out['images'] = $imgs;

    // Description
    $desc = firstHtml($xp, [
        '//*[@id="tab-description"]',
        '//*[contains(@class,"woocommerce-product-details__short-description")]',
        '//*[@class="summary"]//div[contains(@class,"short-description")]'
    ]);
    $out['description'] = $desc;

    // Features (spec table)
    $features = [];
    foreach ($xp->query('//table[contains(@class,"woocommerce-product-attributes")]//tr') as $tr) {
        $k = trim(nodeText($tr->getElementsByTagName('th')->item(0) ?? null));
        $v = trim(nodeText($tr->getElementsByTagName('td')->item(0) ?? null));
        if ($k && $v) $features[$k] = $v;
    }
    if (empty($features)) {
        foreach ($xp->query('//*[contains(@class,"attributes") or contains(@class,"specs")]//li') as $li) {
            $txt = trim(nodeText($li));
            if (strpos($txt, ':') !== false) {
                [$k, $v] = array_map('trim', explode(':', $txt, 2));
                if ($k && $v) $features[$k] = $v;
            }
        }
    }
    $out['features'] = $features;

    // Variations
    $variants = [];
    foreach ($xp->query('//form[contains(@class,"variations_form")]') as $form) {
        // data-product_variations
        $jsonAttr = $form->getAttribute('data-product_variations');
        if ($jsonAttr) {
            $j = json_decode(html_entity_decode($jsonAttr, ENT_QUOTES | ENT_HTML5), true);
            if (is_array($j)) {
                foreach ($j as $v) {
                    $attrsRaw = $v['attributes'] ?? [];
                    // translate attributes using label map
                    $attrs = translateAttributes($attrsRaw, $labelMap);

                    // fallback color translation if still sluggy
                    foreach ($attrs as $ak => $av) {
                        $akLower = mb_strtolower($ak, 'UTF-8');
                        if ($akLower === 'attribute_pa_color' || strpos($akLower, 'color') !== false || strpos($akLower, 'rang') !== false) {
                            if ($av === ($attrsRaw[$ak] ?? $av)) {
                                $fa = faColorFallback($av);
                                if ($fa) $attrs[$ak] = $fa;
                            }
                        }
                    }

                    $variants[] = [
                        'sku'         => $v['sku'] ?? null,
                        'price'       => $v['display_price'] ?? ($v['price_html'] ?? null),
                        'availability'=> ($v['is_in_stock'] ?? false) ? 'InStock' : 'OutOfStock',
                        'attributes'  => $attrs,
                        'image'       => $v['image']['full_src'] ?? ($v['image']['src'] ?? null),
                    ];
                }
            }
        } else {
            // If no JSON variations: list options from selects as raw attribute sets
            $attrs = [];
            foreach ($xp->query('.//table[contains(@class,"variations")]//select', $form) as $sel) {
                $aname = $sel->getAttribute('name') ?: $sel->getAttribute('id'); // attribute_pa_*
                $label = $xp->query('ancestor::tr[1]//label', $sel)->item(0);
                $lname = $label ? trim($label->textContent) : $aname;
                $opts  = [];
                foreach ($xp->query('.//option[@value!=""]', $sel) as $opt) {
                    $slug = trim($opt->getAttribute('value'));
                    $txt  = trim(html_entity_decode($opt->textContent, ENT_QUOTES | ENT_HTML5, 'UTF-8'));
                    // convert slug to Persian using map
                    $opts[] = $labelMap[$aname][$slug] ?? $txt ?? $slug;
                }
                if ($lname && $opts) $attrs[$lname] = $opts;
            }
            if (!empty($attrs)) {
                $variants[] = ['attributes' => $attrs];
            }
        }
    }
    $out['variants'] = $variants;

    return $out;
}

/** Build attribute label map from selects & swatches (slug -> Persian label) */
function buildAttributeLabelMap(DOMXPath $xp): array {
    $map = [];

    // 1) <select name="attribute_pa_*"><option value="slug">متن فارسی</option>
    foreach ($xp->query('//form[contains(@class,"variations_form")]//select[starts-with(@name,"attribute_")]') as $sel) {
        $attrName = $sel->getAttribute('name'); // e.g. attribute_pa_color
        if (!$attrName) continue;
        foreach ($xp->query('.//option[@value!=""]', $sel) as $opt) {
            $slug = trim($opt->getAttribute('value')); // abi-tirh
            $text = trim(html_entity_decode($opt->textContent, ENT_QUOTES | ENT_HTML5, 'UTF-8')); // آبی تیره
            if ($slug !== '' && $text !== '') {
                $map[$attrName][$slug] = $text;
            }
        }
    }

    // 2) Swatches (common WooCommerce swatch plugins)
    // <li class="variable-item" data-attribute="attribute_pa_color" data-value="abi-tirh" title="آبی تیره">
    foreach ($xp->query('//*[contains(@class,"variable-items-wrapper")]//*[@data-attribute and @data-value]') as $swatch) {
        $attr = $swatch->getAttribute('data-attribute');
        $val  = $swatch->getAttribute('data-value');
        $title= $swatch->getAttribute('title') ?: $swatch->getAttribute('data-title') ?: trim($swatch->textContent);
        $title= trim(html_entity_decode($title, ENT_QUOTES | ENT_HTML5, 'UTF-8'));
        if ($attr && $val && $title) {
            $map[$attr][$val] = $title;
        }
    }

    return $map;
}

/** ===================== Utilities ===================== */

function firstText(DOMXPath $xp, array $xpaths): ?string {
    foreach ($xpaths as $q) {
        $n = $xp->query($q);
        if ($n && $n->length) {
            $val = $n->item(0)->textContent;
            $val = html_entity_decode(trim($val), ENT_QUOTES | ENT_HTML5, 'UTF-8');
            if ($val !== '') return $val;
        }
    }
    return null;
}
function firstHtml(DOMXPath $xp, array $xpaths): ?string {
    foreach ($xpaths as $q) {
        $n = $xp->query($q);
        if ($n && $n->length) {
            $html = innerHTML($n->item(0));
            $html = sanitizeHtml($html);
            if ($html !== '') return $html;
        }
    }
    return null;
}
function allAttr(DOMXPath $xp, string $xpath): array {
    $out = [];
    $nodes = $xp->query($xpath);
    if ($nodes && $nodes->length) {
        foreach ($nodes as $n) {
            $out[] = trim($n->nodeValue);
        }
    }
    return $out;
}
function nodeText(?DOMNode $n): string {
    return $n ? html_entity_decode(trim($n->textContent), ENT_QUOTES | ENT_HTML5, 'UTF-8') : '';
}
function innerHTML(DOMNode $el): string {
    $doc = $el->ownerDocument;
    $html = '';
    foreach ($el->childNodes as $child) {
        $html .= $doc->saveHTML($child);
    }
    return $html;
}
function sanitizeHtml(string $html): string {
    $html = preg_replace('#<script\b[^>]*>.*?</script>#is', '', $html);
    $html = preg_replace('#<style\b[^>]*>.*?</style>#is', '', $html);
    return trim($html);
}
function safeText(?string $s): ?string {
    if ($s === null) return null;
    $s = html_entity_decode($s, ENT_QUOTES | ENT_HTML5, 'UTF-8');
    $s = trim($s);
    return $s === '' ? null : $s;
}
function persianToLatinDigits(string $s): string {
    $map = [
        '۰'=>'0','۱'=>'1','۲'=>'2','۳'=>'3','۴'=>'4',
        '۵'=>'5','۶'=>'6','۷'=>'7','۸'=>'8','۹'=>'9',
        '٠'=>'0','١'=>'1','٢'=>'2','٣'=>'3','٤'=>'4',
        '٥'=>'5','٦'=>'6','٧'=>'7','٨'=>'8','٩'=>'9',
        '٬'=>',','٫'=>'.','؍'=>'/','٬'=>'',
    ];
    return strtr($s, $map);
}
function normalizePrice($raw, $fallbackCur = null): array {
    $s = is_array($raw) ? json_encode($raw, JSON_UNESCAPED_UNICODE) : (string)$raw;
    $s = persianToLatinDigits($s);
    $cur = null;
    if (preg_match('/\b(IRR|IRT|Toman|Tomans|تومان|ریال)\b/i', $s, $m)) {
        $label = strtolower($m[1]);
        if (in_array($label, ['irr','ریال']))  $cur = 'IRR';
        elseif (in_array($label, ['irt','toman','tomans','تومان'])) $cur = 'IRT';
    }
    if (preg_match_all('/\d[\d,\.]*/', $s, $m) && !empty($m[0])) {
        $n = $m[0][0];
        $n = str_replace([','], '', $n);
        return [ (float)$n, $cur ?: $fallbackCur ];
    }
    return [ null, $cur ?: $fallbackCur ];
}
function getAvailability(?string $s): ?string {
    if (!$s) return null;
    $s = strtolower($s);
    if (strpos($s, 'instock') !== false) return 'InStock';
    if (strpos($s, 'outofstock') !== false) return 'OutOfStock';
    return null;
}
function extractAttributesFromName(?string $name): array {
    if (!$name) return [];
    $out = [];
    if (preg_match('/(\d+)\s*gb/i', $name, $m)) $out['Storage'] = $m[1] . 'GB';
    if (preg_match('/(\d+)\s*\/\s*(\d+)\s*gb/i', $name, $m)) $out['RAM'] = $m[2] . 'GB';
    if (preg_match('/\b(black|green|silver|white|blue|red|مشکی|نقره‌ای|سبز|آبی|سفید|قرمز)\b/i', $name, $m)) $out['Color'] = $m[1];
    return $out;
}
function guessCurrencyFromLocale(DOMXPath $xp): ?string {
    $loc = firstText($xp, ['//meta[@property="og:locale"]/@content']);
    if ($loc && stripos($loc, 'fa') !== false) return 'IRR';
    return null;
}
function splitPossiblyConcatenatedJson(string $json): array {
    $out = [];
    $trim = trim($json);
    if ($trim !== '' && $trim[0] === '[' && substr($trim, -1) === ']') return [$trim];
    if (strpos($json, '}{') !== false) {
        $parts = preg_split('/}\s*{/', $json);
        foreach ($parts as $i => $p) {
            if ($i === 0) $p = rtrim($p, " \t\n\r\0\x0B") . '}';
            elseif ($i === count($parts) - 1) $p = '{' . ltrim($p);
            else $p = '{' . $p . '}';
            $out[] = $p;
        }
        return $out;
    }
    return [$json];
}
function is_assoc(array $arr): bool {
    return array_keys($arr) !== range(0, count($arr) - 1);
}
function absolutizeUrl(?string $url, string $baseOrigin): ?string {
    if (!$url) return null;
    if (preg_match('#^https?://#i', $url)) return $url;
    if ($url[0] === '/') return rtrim($baseOrigin, '/') . $url;
    return rtrim($baseOrigin, '/') . '/' . ltrim($url, '/');
}
function normalizeDescription(?string $html): ?string {
    if ($html === null) return null;
    $text = trim(strip_tags($html));
    return $text === '' ? null : $text;
}

/** Translate attributes using label map (slug -> Persian visible text) */
function translateAttributes(array $attrs, array $labelMap): array {
    $out = [];
    foreach ($attrs as $k => $v) {
        // اگر مقدار خودش آرایه بود (بعضی پلاگین‌ها)، فقط اولین را بردار
        if (is_array($v)) $v = $v['value'] ?? (reset($v) ?: '');
        $out[$k] = isset($labelMap[$k][$v]) ? $labelMap[$k][$v] : $v;
    }
    return $out;
}

/** Minimal English/slug color -> Persian fallback dictionary */
function faColorFallback(?string $val): ?string {
    if (!$val) return null;
    $val = trim(mb_strtolower($val, 'UTF-8'));
    $dict = [
        'black' => 'مشکی',
        'deep-blue' => 'آبی تیره',
        'blue'  => 'آبی',
        'silver'=> 'نقره‌ای',
        'green' => 'سبز',
        'white' => 'سفید',
        'red'   => 'قرمز',
        'gray'  => 'خاکستری',
        'grey'  => 'خاکستری',
        'gold'  => 'طلایی',
        'purple'=> 'بنفش',
        'pink'  => 'صورتی',
        'yellow'=> 'زرد',
        'orange'=> 'نارنجی',
        // اگر لازم شد بیشترش کن
        'abi-tirh'   => 'آبی تیره',
        'nghrh-ai'   => 'نقره‌ای',
    ];
    return $dict[$val] ?? null;
}