<?php
/**
 * Product Info JSON API — cleaned name & toman price
 * PHP 7+ (7.4+) single-file
 *
 * Usage:
 *   GET product_api.php?url=<product_url>&pretty=1
 */

declare(strict_types=1);

// ---------- Basics ----------
function respond_json($data, int $code = 200, bool $pretty = false): void {
    http_response_code($code);
    header('Content-Type: application/json; charset=utf-8');
    header('Access-Control-Allow-Origin: *');
    $opt = JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES;
    if ($pretty) $opt |= JSON_PRETTY_PRINT;
    echo json_encode($data, $opt);
    exit;
}
function error_json(string $message, int $code = 400, array $meta = []): void {
    respond_json(['ok' => false, 'error' => $message, 'meta' => $meta], $code, true);
}
function fa2en(?string $s): ?string {
    if ($s === null) return null;
    $fa = ['۰','۱','۲','۳','۴','۵','۶','۷','۸','۹','٠','١','٢','٣','٤','٥','٦','٧','٨','٩'];
    $en = ['0','1','2','3','4','5','6','7','8','9','0','1','2','3','4','5','6','7','8','9'];
    return str_replace($fa, $en, $s);
}
function normalize_price_to_int(?string $s): ?int {
    if ($s === null) return null;
    $s = fa2en($s);
    $s = preg_replace('/[^\d]/u', '', $s);
    if ($s === '' || $s === null) return null;
    return intval($s, 10);
}
function decode_jsonld(string $json): array {
    $json = trim($json);
    $json = preg_replace('/<!--.*?-->/s', '', $json) ?? $json;
    $json = preg_replace('/,\s*([}\]])/', '$1', $json) ?? $json;
    $x = json_decode($json, true);
    if (json_last_error() === JSON_ERROR_NONE && is_array($x)) return $x;
    $x = json_decode(utf8_encode($json), true);
    return (json_last_error() === JSON_ERROR_NONE && is_array($x)) ? $x : [];
}
function curl_fetch(string $url, array $headers = [], bool $withHeaders = true, int $timeout = 25): array {
    $ch = curl_init();
    $ua = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36 ProductInfoBot/2.1';
    $base = [
        'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language: fa-IR,fa;q=0.9,en-US;q=0.8,en;q=0.7',
        'Cache-Control: no-cache',
        'Pragma: no-cache',
        'User-Agent: ' . $ua,
    ];
    curl_setopt_array($ch, [
        CURLOPT_URL => $url,
        CURLOPT_HTTPHEADER => array_merge($base, $headers),
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS => 5,
        CURLOPT_CONNECTTIMEOUT => 15,
        CURLOPT_TIMEOUT => $timeout,
        CURLOPT_SSL_VERIFYPEER => true,
        CURLOPT_SSL_VERIFYHOST => 2,
        CURLOPT_ENCODING => '',
        CURLOPT_HEADER => $withHeaders,
    ]);
    $resp = curl_exec($ch);
    if ($resp === false) {
        $err = curl_error($ch);
        curl_close($ch);
        return [null, ['error' => 'curl_error', 'detail' => $err]];
    }
    $status = curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
    $final  = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
    $hsize  = $withHeaders ? curl_getinfo($ch, CURLINFO_HEADER_SIZE) : 0;
    curl_close($ch);
    if ($withHeaders) {
        $rawHeaders = substr($resp, 0, $hsize);
        $body = substr($resp, $hsize);
        return [$body, ['status' => $status, 'final_url' => $final, 'headers' => $rawHeaders]];
    }
    return [$resp, ['status' => $status, 'final_url' => $final]];
}
function fetch_content(string $url): array {
    if (!preg_match('#^https?://#i', $url)) {
        if (!is_readable($url)) return [null, ['source' => 'local', 'error' => 'file_not_readable']];
        $html = file_get_contents($url);
        return [$html === false ? null : $html, ['source' => 'local']];
    }
    list($html, $meta) = curl_fetch($url, [], true, 25);
    $meta['source'] = 'remote';
    return [$html, $meta];
}
function dom_and_xpath(string $html): array {
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    $clean = preg_replace('#<script\b[^>]*>.*?</script>#is', '', $html) ?? $html;
    $dom->loadHTML($clean);
    $xp = new DOMXPath($dom);
    return [$dom, $xp];
}
function meta_get(DOMXPath $xp, string $attr, string $val): ?string {
    $attr = strtolower($attr);
    $q = sprintf('//meta[translate(@%1$s,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")="%2$s"]/@content', $attr, $val);
    $n = $xp->query($q);
    return ($n && $n->length > 0) ? trim($n->item(0)->nodeValue) : null;
}
function node_text(DOMXPath $xp, string $xpath): ?string {
    $n = $xp->query($xpath);
    if (!$n || $n->length === 0) return null;
    $t = trim($n->item(0)->textContent);
    return $t === '' ? null : $t;
}
function nodes_attr(DOMXPath $xp, string $xpath, string $attr = 'src'): array {
    $out = [];
    $list = $xp->query($xpath);
    if ($list) foreach ($list as $node) {
        if ($node->hasAttribute($attr)) {
            $v = trim($node->getAttribute($attr));
            if ($v !== '') $out[] = $v;
        } elseif ($node->nodeType === XML_ATTRIBUTE_NODE) {
            $out[] = trim($node->nodeValue);
        }
    }
    return array_values(array_unique($out));
}
function cleanup_whitespace(?string $html): ?string {
    if ($html === null) return null;
    return trim(preg_replace('/\s+/', ' ', $html));
}

// ---------- Extractors ----------
function extract_jsonld_products(DOMXPath $xp): array {
    $out = [];
    $nodes = $xp->query('//script[@type="application/ld+json"]');
    if (!$nodes) return $out;
    foreach ($nodes as $s) {
        $data = decode_jsonld($s->textContent ?? '');
        if (!$data) continue;
        $cands = [];
        if (isset($data['@type'])) $cands = [$data];
        elseif (isset($data['@graph']) && is_array($data['@graph'])) $cands = $data['@graph'];
        elseif (is_array($data)) $cands = $data;
        foreach ($cands as $it) {
            if (!is_array($it)) continue;
            $t = $it['@type'] ?? null;
            $ok = is_array($t) ? in_array('Product', $t, true) : (strcasecmp((string)$t, 'Product') === 0);
            if ($ok) $out[] = $it;
        }
    }
    return $out;
}
function extract_product_from_jsonld(array $p): array {
    $g = function($a,$k,$d=null){ return $a[$k] ?? $d; };
    $offer = $g($p, 'offers', []);
    if (is_array($offer) && isset($offer[0])) $offer = $offer[0];
    $imgs = [];
    if (isset($p['image'])) {
        if (is_array($p['image'])) foreach ($p['image'] as $im) {
            if (is_array($im) && isset($im['url'])) $imgs[] = $im['url'];
            elseif (is_string($im)) $imgs[] = $im;
        } elseif (is_string($p['image'])) $imgs[] = $p['image'];
    }
    $brand = $g($p,'brand');
    if (is_array($brand)) $brand = $brand['name'] ?? ($brand['@id'] ?? null);
    $seller = $offer['seller'] ?? null;
    if (is_array($seller)) {
        $seller = [
            'name'=>$seller['name'] ?? null,
            'url' =>$seller['url'] ?? null,
            'logo'=>is_array($seller['logo'] ?? null) ? ($seller['logo']['url'] ?? null) : ($seller['logo'] ?? null),
            '@id' =>$seller['@id'] ?? null,
        ];
    } elseif (!is_string($seller)) $seller = null;

    return [
        'name'        => $g($p,'name'),
        'description' => $g($p,'description'),
        'sku'         => $g($p,'sku'),
        'mpn'         => $g($p,'mpn'),
        'gtin13'      => $g($p,'gtin13'),
        'gtin14'      => $g($p,'gtin14'),
        'brand'       => $brand,
        'category'    => $g($p,'category'),
        'images'      => array_values(array_unique($imgs)),
        'mainEntityOfPage'=> is_array($g($p,'mainEntityOfPage')) ? ($p['mainEntityOfPage']['@id'] ?? null) : $g($p,'mainEntityOfPage'),
        'offers' => [
            'price'         => normalize_price_to_int((string)($offer['price'] ?? '')),
            'priceCurrency' => $offer['priceCurrency'] ?? null,
            'priceValidUntil'=> $offer['priceValidUntil'] ?? null,
            'availability'  => $offer['availability'] ?? null,
            'itemCondition' => $offer['itemCondition'] ?? null,
            'url'           => $offer['url'] ?? null,
            'seller'        => $seller,
        ],
        'aggregateRating' => (isset($p['aggregateRating']) && is_array($p['aggregateRating'])) ? [
            'ratingValue' => (float)($p['aggregateRating']['ratingValue'] ?? 0),
            'reviewCount' => (int)($p['aggregateRating']['reviewCount'] ?? 0),
        ] : null,
        '@id'      => $g($p,'@id'),
        '@context' => $g($p,'@context'),
    ];
}
function extract_from_meta(DOMXPath $xp): array {
    $og = [
        'title'        => meta_get($xp,'property','og:title'),
        'description'  => meta_get($xp,'property','og:description'),
        'image'        => meta_get($xp,'property','og:image'),
        'url'          => meta_get($xp,'property','og:url'),
        'site_name'    => meta_get($xp,'property','og:site_name'),
        'type'         => meta_get($xp,'property','og:type'),
        'updated_time' => meta_get($xp,'property','og:updated_time'),
        'locale'       => meta_get($xp,'property','og:locale'),
    ];
    $tw = [
        'card'  => meta_get($xp,'name','twitter:card'),
        'title' => meta_get($xp,'name','twitter:title'),
        'description' => meta_get($xp,'name','twitter:description'),
        'image' => meta_get($xp,'name','twitter:image'),
        'label1'=> meta_get($xp,'name','twitter:label1'),
        'data1' => meta_get($xp,'name','twitter:data1'), // e.g. "6,840,000 تومان"
        'label2'=> meta_get($xp,'name','twitter:label2'),
        'data2' => meta_get($xp,'name','twitter:data2'),
    ];
    $prod = [
        'brand'           => meta_get($xp,'property','product:brand'),
        'price:amount'    => meta_get($xp,'property','product:price:amount'),
        'price:currency'  => meta_get($xp,'property','product:price:currency'),
        'availability'    => meta_get($xp,'property','product:availability'),
        'retailer_item_id'=> meta_get($xp,'property','product:retailer_item_id'),
    ];
    $base = [
        'title'            => node_text($xp,'//title'),
        'meta_description' => meta_get($xp,'name','description'),
        'canonical'        => nodes_attr($xp,'//link[@rel="canonical"]','href')[0] ?? null,
        'lang'             => nodes_attr($xp,'//html','lang')[0] ?? null,
        'dir'              => nodes_attr($xp,'//html','dir')[0] ?? null,
    ];
    $p1 = normalize_price_to_int($prod['price:amount'] ?? null); // IRR
    $p2 = normalize_price_to_int($tw['data1'] ?? null);          // often TOMAN (has "تومان")
    return [
        'base'    => $base,
        'og'      => $og,
        'twitter' => $tw,
        'product' => $prod,
        'price_candidates' => array_values(array_filter([$p1, $p2], function($v){ return $v !== null; })),
    ];
}
function parse_srcset_best(?string $srcset): ?string {
    if (!$srcset) return null;
    $best = null; $bw = -1;
    foreach (explode(',', $srcset) as $part) {
        $part = trim($part); if ($part === '') continue;
        if (preg_match('/\s+(\d+)w$/', $part, $m)) {
            $url = trim(str_replace($m[0], '', $part));
            $w = intval($m[1], 10);
            if ($w > $bw) { $bw = $w; $best = $url; }
        } else $best = preg_split('/\s+/', $part)[0];
    }
    return $best;
}
function get_wc_gallery_images(DOMXPath $xp): array {
    $urls = [];
    $nodes = $xp->query('//*[contains(@class,"woocommerce-product-gallery")]//img');
    if ($nodes) foreach ($nodes as $img) {
        $cands = [];
        foreach (['data-large_image','data-src','data-zoom-image','src'] as $a) {
            if ($img->hasAttribute($a)) {
                $v = trim($img->getAttribute($a));
                if ($v !== '') $cands[] = $v;
            }
        }
        if ($img->hasAttribute('srcset')) {
            $b = parse_srcset_best($img->getAttribute('srcset'));
            if ($b) $cands[] = $b;
        }
        foreach ($cands as $u) {
            if (preg_match('/-\d{2,3}x\d{2,3}\.(jpg|jpeg|png|webp)$/i', $u)) continue; // skip thumbs
            $urls[] = $u;
        }
    }
    $uniq = [];
    foreach ($urls as $u) if (!in_array($u, $uniq, true)) $uniq[] = $u;
    return $uniq;
}
function extract_from_markup(DOMXPath $xp): array {
    $title = node_text($xp,'//*[@class="product_title" or contains(@class,"product_title")]') ?? node_text($xp,'//h1');
    $priceText = node_text($xp,'//*[@class="price"]//*[contains(@class,"amount")]') ?? node_text($xp,'//*[@class="price"]');
    $priceInt  = normalize_price_to_int($priceText);
    $sku = node_text($xp,'//*[@class="sku"]');
    $stockText = node_text($xp,'//*[contains(@class,"stock")]');

    $short = null; $n = $xp->query('//*[contains(@class,"woocommerce-product-details__short-description")]');
    if ($n && $n->length > 0) { $inner=''; foreach ($n->item(0)->childNodes as $c) $inner.=$n->item(0)->ownerDocument->saveHTML($c); $short=cleanup_whitespace($inner); }
    $desc = null; $dn = $xp->query('//*[@id="tab-description" or contains(@class,"woocommerce-Tabs-panel--description")]');
    if ($dn && $dn->length > 0) { $inner=''; foreach ($dn->item(0)->childNodes as $c) $inner.=$dn->item(0)->ownerDocument->saveHTML($c); $desc=cleanup_whitespace($inner); }

    $attrs = [];
    $rows = $xp->query('//table[contains(@class,"shop_attributes")]//tr');
    if ($rows) foreach ($rows as $tr) {
        $th = $tr->getElementsByTagName('th')->item(0);
        $td = $tr->getElementsByTagName('td')->item(0);
        if ($th && $td) {
            $name = trim($th->textContent); $value = trim($td->textContent);
            if ($name !== '' && $value !== '') $attrs[] = ['name'=>$name,'value'=>$value];
        }
    }

    $images = get_wc_gallery_images($xp);
    if (!$images) $images = nodes_attr($xp,'//img[contains(@class,"wp-post-image") or contains(@class,"attachment-woocommerce_single")]','src');

    $breadcrumbs = [];
    $crumb = $xp->query('//*[contains(@class,"woocommerce-breadcrumb")]//a');
    if ($crumb) foreach ($crumb as $a) $breadcrumbs[] = ['title'=>trim($a->textContent),'url'=>$a->getAttribute('href')];

    $categories = [];
    $cat = $xp->query('//*[contains(@class,"posted_in")]//a');
    if ($cat) foreach ($cat as $a) $categories[] = ['title'=>trim($a->textContent),'url'=>$a->getAttribute('href')];

    return [
        'title'  => $title,
        'price_text' => $priceText,
        'price_int'  => $priceInt,
        'sku'    => $sku,
        'stock_text' => $stockText,
        'short_description_html' => $short,
        'description_html'       => $desc,
        'attributes' => $attrs,
        'images' => $images,
        'breadcrumbs' => $breadcrumbs,
        'categories'  => $categories,
    ];
}

// ---------- Normalizers ----------
function normalize_availability(?string $v): ?string {
    if ($v === null) return null;
    $v = strtolower(trim($v));
    if (strpos($v, 'schema.org') !== false) return $v;
    $m = [
        'instock' => 'https://schema.org/InStock',
        'in stock' => 'https://schema.org/InStock',
        'outofstock' => 'https://schema.org/OutOfStock',
        'out of stock' => 'https://schema.org/OutOfStock',
        'preorder' => 'https://schema.org/PreOrder',
        'pre order' => 'https://schema.org/PreOrder',
        'backorder' => 'https://schema.org/BackOrder',
    ];
    return $m[$v] ?? $v;
}
/** Remove site branding from product names, especially "آسان جی اس ام" */
function strip_branding(?string $name): ?string {
    if ($name === null) return null;
    // remove suffix like " - آسان جی اس ام" or " | آسان جی اس ام"
    $brand = 'آسان\s*جی\s*اس\s*ام';
    $name = preg_replace('/\s*[-–—|]\s*' . $brand . '\s*$/u', '', $name);
    // remove occurrences anywhere
    $name = preg_replace('/' . $brand . '/u', '', $name);
    // remove leading "خرید و قیمت"
    $name = preg_replace('/^\s*خرید\s*و\s*قیمت\s*/u', '', $name);
    // collapse extra spaces & trailing dashes
    $name = preg_replace('/\s{2,}/u', ' ', $name);
    $name = preg_replace('/\s*[-–—]\s*$/u', '', $name);
    return trim($name);
}
function price_to_toman(?int $irr): ?int { return $irr === null ? null : intdiv($irr, 10); }
function price_display_toman(?int $toman): ?string { return $toman === null ? null : number_format($toman, 0, '.', ',') . ' تومان'; }

// ---------- Main ----------
$pretty = isset($_GET['pretty']) && $_GET['pretty'] == '1';
$url = $_GET['url'] ?? $_POST['url'] ?? null;
if (!$url) error_json('Missing "url" parameter. Example: ?url=https://example.com/product/123&pretty=1');

list($html, $fetchMeta) = fetch_content($url);
if ($html === null) error_json('Failed to fetch content', 502, $fetchMeta);

list($dom, $xp) = dom_and_xpath($html);

$meta   = extract_from_meta($xp);
$jsonld = array_map('extract_product_from_jsonld', extract_jsonld_products($xp));
$markup = extract_from_markup($xp);

// Choose primary block
$primary = $jsonld[0] ?? null;
$offers  = $primary['offers'] ?? [];

// Images combined (JSON-LD -> OG -> markup)
$images = $primary['images'] ?? [];
if (!$images && !empty($meta['og']['image'])) $images[] = $meta['og']['image'];
$images = array_values(array_unique(array_merge($images, $markup['images'] ?? [])));

// Price resolution (number only)
$priceNum = $offers['price'] ?? ($meta['price_candidates'][0] ?? ($markup['price_int'] ?? null));
$priceCurrencyIn = $offers['priceCurrency'] ?? ($meta['product']['price:currency'] ?? null);

// Convert to TOMAN if input is IRR
$original_price = ['value' => $priceNum, 'currency' => $priceCurrencyIn];
$priceOut = $priceNum;
$currencyOut = $priceCurrencyIn;

if ($priceNum !== null) {
    if (strtoupper((string)$priceCurrencyIn) === 'IRR') {
        $priceOut = price_to_toman($priceNum);
        $currencyOut = 'TOMAN';
    } else {
        // If twitter:data1 explicitly includes "تومان" و عددش با priceNum برابر است، فرض کن تومان است
        $twRaw = $meta['twitter']['data1'] ?? '';
        if ($twRaw && mb_strpos($twRaw, 'تومان') !== false) {
            $currencyOut = 'TOMAN';
        }
    }
}
$priceDisplay = price_display_toman($currencyOut === 'TOMAN' ? $priceOut : null);

// Availability
$availability_raw = $offers['availability'] ?? ($meta['product']['availability'] ?? null);
$availability = normalize_availability($availability_raw);

// Name & Category
$name = $markup['title'] ?? ($primary['name'] ?? ($meta['og']['title'] ?? null));
$name = strip_branding($name); // << remove "آسان جی اس ام" and marketing prefix

$category = $primary['category'] ?? null;
if ($category === null && !empty($markup['breadcrumbs'])) {
    $last = end($markup['breadcrumbs']);
    $category = $last['title'] ?? null;
    reset($markup['breadcrumbs']);
}

// Canonical decoded (optional)
$canonical = $meta['base']['canonical'] ?? null;
$canonical_decoded = $canonical ? urldecode($canonical) : null;

// Build JSON
$result = [
    'ok' => true,
    'source' => [
        'requested_url' => $url,
        'fetch' => $fetchMeta,
    ],
    'base' => [
        'title' => $meta['base']['title'],
        'meta_description' => $meta['base']['meta_description'],
        'canonical' => $canonical,
        'canonical_decoded' => $canonical_decoded,
        'lang' => $meta['base']['lang'],
        'dir'  => $meta['base']['dir'],
    ],
    'seo_meta' => [
        'og'          => $meta['og'],
        'twitter'     => $meta['twitter'],
        'product_meta'=> $meta['product'],
    ],
    'product' => [
        'name'        => $name,                 // بدون "آسان جی اس ام"
        'brand'       => $primary['brand'] ?? ($meta['product']['brand'] ?? null),
        'sku'         => $primary['sku'] ?? ($markup['sku'] ?? null),
        'category'    => $category,
        'description' => $primary['description'] ?? null,
        'short_description_html' => $markup['short_description_html'] ?? null,
        'description_html'       => $markup['description_html'] ?? null,
        'attributes'  => $markup['attributes'] ?? null,
        'images'      => $images,
        'breadcrumbs' => $markup['breadcrumbs'] ?? null,
        'categories'  => $markup['categories'] ?? null,
    ],
    'offers' => [
        'price'            => $priceOut,        // به تومان
        'priceCurrency'    => $currencyOut ?: 'TOMAN',
        'price_display'    => $priceDisplay,    // "6,840,000 تومان"
        'original_price'   => $original_price,  // مقدار اصلی برای شفافیت
        'availability'     => $availability,
        'availability_raw' => $availability_raw,
        'availability_text'=> $markup['stock_text'] ?? ($meta['twitter']['data2'] ?? null),
        'itemCondition'    => $offers['itemCondition'] ?? null,
        'priceValidUntil'  => $offers['priceValidUntil'] ?? null,
        'url'              => $offers['url'] ?? ($meta['og']['url'] ?? $canonical ?? null),
        'seller'           => $offers['seller'] ?? null,
    ],
    'rating' => $primary['aggregateRating'] ?? null,
    'raw' => [
        'jsonld_products' => $jsonld, // برای دیباگ
    ],
];

respond_json($result, 200, $pretty);
