<?php
/**
 * MyPhone.ir Product Scraper Web Service (PHP 7+)
 * Usage:
 *   GET  /myphone.php?url=<encoded product url>
 *   POST /myphone.php  { "url": "https://myphone.ir/product/..." }
 *
 * Output: JSON (UTF-8)
 *   { ok, url, name, description_html, description_text, price_toman, currency_source, price_raw,
 *     images[], sku, brand, categories[], attributes[], debug{} }
 */

header('Content-Type: application/json; charset=utf-8');
mb_internal_encoding('UTF-8');
ini_set('display_errors', '0');   // جلوگیری از خروجی HTML خطا (صفحه سفید)
error_reporting(E_ALL);

/* -------------------- دریافت و اعتبارسنجی URL ورودی -------------------- */
$inputUrl = '';

if (isset($_GET['url'])) {
    $inputUrl = urldecode(trim((string)$_GET['url']));
    if (strpos($inputUrl, '%') !== false) {
        $tmp = @rawurldecode($inputUrl);
        if ($tmp) $inputUrl = $tmp;
    }
}

if ($inputUrl === '' && !empty($_SERVER['QUERY_STRING'])) {
    if (preg_match('/(?:^|&)url=([^&]+)\z/i', $_SERVER['QUERY_STRING'], $m)) {
        $inputUrl = rawurldecode($m[1]);
    }
}

if ($inputUrl === '' && (($_SERVER['REQUEST_METHOD'] ?? '') === 'POST')) {
    $raw = file_get_contents('php://input');
    if ($raw) {
        $js = json_decode($raw, true);
        if (isset($js['url'])) {
            $inputUrl = trim((string)$js['url']);
        }
    }
}

$inputUrl = trim($inputUrl);
if ($inputUrl !== '' && !preg_match('#^https?://#i', $inputUrl)) {
    $inputUrl = 'https://' . ltrim($inputUrl, '/');
}

$pu   = @parse_url($inputUrl);
$host = isset($pu['host']) ? strtolower($pu['host']) : '';

if ($inputUrl === '' || $host === '') {
    http_response_code(400);
    echo json_encode(['ok'=>false,'error'=>'invalid_or_missing_url','hint'=>'Pass ?url=<encoded product url> for myphone.ir']);
    exit;
}
if (!preg_match('/(^|\.)myphone\.ir$/i', $host)) {
    http_response_code(400);
    echo json_encode(['ok'=>false,'error'=>'only_myphone_ir_is_allowed']);
    exit;
}

/* ------------------------------ Scrape ------------------------------ */
try {
    $html = fetch_url($inputUrl);
    if ($html === null) throw new Exception('fetch_failed');

    // DOM + XPath (بدون هشدارهای HTML نامعتبر)
    libxml_use_internal_errors(true);
    $dom = new DOMDocument('1.0', 'UTF-8');
    if (stripos($html, '<meta charset=') === false) {
        $html = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />' . $html;
    }
    $dom->loadHTML($html);
    libxml_clear_errors();
    $xp = new DOMXPath($dom);

    /* ---------- استخراج اختصاصی myphone.ir ---------- */

    // نام محصول
    $name = xp_text($xp, '//h1[contains(@class,"product_title")]');
    if ($name === null) $name = xp_text($xp, '//h1[contains(@class,"entry-title")]');

    // توضیحات (اول تب توضیحات، بعد خلاصه)
    $description_html = xp_inner_html($xp, '(//div[@id="tab-description"])[1]');
    if ($description_html === null) $description_html = xp_inner_html($xp, '(//div[contains(@class,"woocommerce-Tabs-panel--description")])[1]');
    if ($description_html === null) $description_html = xp_inner_html($xp, '(//div[contains(@class,"product-short-description")])[1]');
    $description_text = $description_html ? trim(strip_tags($description_html)) : null;

    // قیمت (اول JSON-LD → OG → Twitter → UI)
    list($price_raw, $currency) = extract_price_priority($xp);
    if ($price_raw === null) {
        $price_raw = normalize_price_string((string)xp_text($xp, '(//span[contains(@class,"woocommerce-Price-amount")]//bdi)[1]'));
    }
    if ($currency === null) {
        $currency = xp_attr($xp, '(//meta[@itemprop="priceCurrency"])[1]', 'content');
        if ($currency) $currency = strtoupper(trim($currency));
    }
    $price_toman = price_to_toman($price_raw, $currency);

    // تصاویر گالری + og:image
    $images = [];
    foreach ($xp->query('//div[contains(@class,"woocommerce-product-gallery")]//img') as $img) {
        /** @var DOMElement $img */
        $src = $img->getAttribute('data-large_image');
        if ($src === '') $src = $img->getAttribute('src');
        if ($src === '') $src = $img->getAttribute('data-src');
        if ($src !== '') $images[] = absolutize_url($src, $inputUrl);
    }
    $ogImage = xp_meta($xp, 'og:image');
    if ($ogImage) $images[] = absolutize_url($ogImage, $inputUrl);
    $images = array_values(array_unique($images));

    // ویژگی‌ها (جدول مشخصات)
    $attributes = [];
    foreach ($xp->query('//table[contains(@class,"woocommerce-product-attributes")]//tr') as $tr) {
        /** @var DOMElement $tr */
        $label = xp_text($xp, './/th', $tr);
        $value = xp_text($xp, './/td', $tr);
        if ($label && $value) {
            $value = trim(preg_replace('/\s+/', ' ', $value));
            $attributes[] = ['name'=>$label, 'value'=>$value];
        }
    }

    // SKU / برند
    $sku   = xp_text($xp, '//*[contains(@class,"sku")]');
    $brand = xp_text($xp, '//span[contains(@class,"posted_in")]//a[contains(@href,"brand")]');
    if ($brand === null) $brand = xp_text($xp, '//span[contains(@class,"brand")]//a');

    // دسته‌ها از breadcrumb
    $categories = [];
    foreach ($xp->query('//nav[contains(@class,"woocommerce-breadcrumb")]//a') as $a) {
        $t = trim($a->textContent);
        if ($t !== '' && $t !== 'خانه') $categories[] = $t;
    }
    $categories = array_values(array_unique($categories));

    // JSON-LD برای تکمیل
    $jsonld = extract_jsonld_product_myphone($xp);
    if ($jsonld) {
        if ($sku === null && isset($jsonld['sku'])) $sku = $jsonld['sku'];
        if ($brand === null && isset($jsonld['brand'])) $brand = $jsonld['brand'];
        if (empty($attributes) && !empty($jsonld['attributes'])) $attributes = $jsonld['attributes'];
        if (empty($images) && !empty($jsonld['images'])) $images = $jsonld['images'];
        if ($name === null && isset($jsonld['name'])) $name = $jsonld['name'];
        if ($description_html === null && isset($jsonld['description_html'])) $description_html = $jsonld['description_html'];
        if ($price_raw === null && isset($jsonld['price_raw'])) $price_raw = $jsonld['price_raw'];
        if ($currency === null && isset($jsonld['currency'])) $currency = $jsonld['currency'];
        if ($price_toman === null && $price_raw !== null) $price_toman = price_to_toman($price_raw, $currency);
    }

    $out = [
        'ok'               => true,
        'url'              => $inputUrl,
        'name'             => $name,
        'description_html' => $description_html,
        'description_text' => $description_text,
        'price_toman'      => $price_toman,          // int | null
        'currency_source'  => $currency,             // IRR/IRT اگر یافت شد
        'price_raw'        => $price_raw,
        'images'           => $images,
        'sku'              => $sku,
        'brand'            => $brand,
        'categories'       => $categories,
        'attributes'       => $attributes,
        'debug' => [
            'og_price_amount'   => xp_meta($xp, 'product:price:amount'),
            'og_price_currency' => xp_meta($xp, 'product:price:currency'),
            'twitter_data1'     => xp_meta($xp, 'twitter:data1'),
            'twitter_label1'    => xp_meta($xp, 'twitter:label1'),
        ],
    ];

    echo json_encode($out, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);

} catch (Throwable $e) {
    http_response_code(500);
    echo json_encode([
        'ok' => false,
        'error' => 'exception',
        'message' => $e->getMessage(),
        'line' => $e->getLine(),
        'file' => basename($e->getFile())
    ], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);
}

/* ======================== Helpers (PHP 7) ======================== */

function fetch_url($url, $timeout = 25) {
    if (!function_exists('curl_init')) {
        // fallback ساده با file_get_contents (در صورت فعال بودن allow_url_fopen)
        $ctx = stream_context_create([
            'http' => [
                'method' => 'GET',
                'header' => "User-Agent: Mozilla/5.0 (compatible; MyPhoneScraper/1.0)\r\nAccept-Language: fa-IR,fa;q=0.9,en-US;q=0.8\r\n",
                'timeout' => $timeout
            ]
        ]);
        $body = @file_get_contents($url, false, $ctx);
        return $body !== false ? $body : null;
    }

    $ch = curl_init($url);
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS      => 5,
        CURLOPT_TIMEOUT        => $timeout,
        CURLOPT_CONNECTTIMEOUT => 15,
        CURLOPT_SSL_VERIFYPEER => true,
        CURLOPT_SSL_VERIFYHOST => 2,
        CURLOPT_USERAGENT      => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36',
        CURLOPT_HTTPHEADER     => [
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language: fa-IR,fa;q=0.9,en-US;q=0.8,en;q=0.7'
        ],
        CURLOPT_REFERER        => 'https://myphone.ir/',
        CURLOPT_ENCODING       => '' // gzip/deflate فعال
    ]);
    $body = curl_exec($ch);
    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);
    if ($body === false || $code >= 400) return null;
    return $body;
}

function xp_text(DOMXPath $xp, $query, DOMNode $ctx = null) {
    $n = $ctx ? $xp->query($query, $ctx)->item(0) : $xp->query($query)->item(0);
    if ($n && isset($n->textContent)) {
        $t = trim($n->textContent);
        return $t !== '' ? $t : null;
    }
    return null;
}

function xp_attr(DOMXPath $xp, $query, $attr, DOMNode $ctx = null) {
    $n = $ctx ? $xp->query($query, $ctx)->item(0) : $xp->query($query)->item(0);
    if ($n instanceof DOMElement) {
        $v = $n->getAttribute($attr);
        return $v !== '' ? $v : null;
    } elseif ($n && property_exists($n, 'nodeValue')) {
        $v = trim((string)$n->nodeValue);
        return $v !== '' ? $v : null;
    }
    return null;
}

function xp_inner_html(DOMXPath $xp, $query, DOMNode $ctx = null) {
    $n = $ctx ? $xp->query($query, $ctx)->item(0) : $xp->query($query)->item(0);
    if (!$n) return null;
    $html = '';
    foreach ($n->childNodes as $c) {
        $html .= $n->ownerDocument->saveHTML($c);
    }
    $html = trim($html);
    return $html !== '' ? $html : null;
}

function xp_meta(DOMXPath $xp, $propertyOrName) {
    foreach ([
        "//meta[@property='{$propertyOrName}']/@content",
        "//meta[@name='{$propertyOrName}']/@content"
    ] as $q) {
        $node = $xp->query($q)->item(0);
        if ($node && trim((string)$node->nodeValue) !== '') {
            return trim((string)$node->nodeValue);
        }
    }
    return null;
}

function normalize_price_string($s) {
    if ($s === null) return null;
    $s = convert_persian_digits_to_latin((string)$s);
    $s = preg_replace('/[^\d\.]/u', '', $s);
    $s = trim($s, '.');
    if ($s === '' || !preg_match('/\d/', $s)) return null;
    return $s;
}

function price_to_toman($priceRaw, $currency) {
    if ($priceRaw === null) return null;
    $val = (float)$priceRaw;
    $cur = $currency ? strtoupper($currency) : null;
    if ($cur === 'IRR') {
        $val = $val / 10.0; // ریال → تومان
    } elseif ($cur === 'IRT' || $cur === 'TOMAN' || $cur === 'TOM') {
        // تومان
    } else {
        if ($val > 1000000) $val = $val / 10.0; // حدس ریال
    }
    return (int) round($val);
}

function extract_price_priority(DOMXPath $xp) {
    // JSON-LD Product
    $ld = extract_jsonld_product_myphone($xp);
    $ldAmount = isset($ld['price_raw']) ? $ld['price_raw'] : null;
    $ldCurrency = isset($ld['currency']) ? $ld['currency'] : null;

    // OG
    $ogAmount   = xp_meta($xp, 'product:price:amount');
    $ogCurrency = xp_meta($xp, 'product:price:currency');

    // Twitter
    $twLabel1 = xp_meta($xp, 'twitter:label1');
    $twData1  = xp_meta($xp, 'twitter:data1');
    $twAmount = null; $twCurrency = null;
    if ($twLabel1 && stripos($twLabel1, 'price') !== false && $twData1) {
        list($twAmount, $twCurrency) = guess_price_currency_from_string($twData1);
    }

    if ($ldAmount) return [$ldAmount, $ldCurrency];
    if ($ogAmount) return [normalize_price_string($ogAmount), $ogCurrency ? strtoupper($ogCurrency) : null];
    if ($twAmount) return [normalize_price_string($twAmount), $twCurrency ? strtoupper($twCurrency) : null];
    return [null, null];
}

function extract_jsonld_product_myphone(DOMXPath $xp) {
    $out = [];
    foreach ($xp->query("//script[@type='application/ld+json']") as $node) {
        /** @var DOMElement $node */
        $json = json_decode($node->nodeValue, true);
        if (!$json) continue;

        $items = is_assoc($json) ? [$json] : (is_array($json) ? $json : []);
        foreach ($items as $obj) {
            if (!is_array($obj)) continue;
            $type = strtolower(is_string($obj['@type'] ?? '') ? $obj['@type'] : '');
            if ($type === 'product') {
                if (!empty($obj['name'])) $out['name'] = trim((string)$obj['name']);
                if (!empty($obj['image'])) {
                    $imgs = is_array($obj['image']) ? $obj['image'] : [$obj['image']];
                    $out['images'] = array_values(array_filter(array_map('strval', $imgs)));
                }
                if (!empty($obj['sku'])) $out['sku'] = (string)$obj['sku'];
                if (!empty($obj['brand'])) {
                    $out['brand'] = is_array($obj['brand']) ? ($obj['brand']['name'] ?? null) : (string)$obj['brand'];
                }
                if (!empty($obj['description'])) {
                    $out['description_html'] = (string)$obj['description'];
                }
                if (!empty($obj['offers'])) {
                    $offers = is_assoc($obj['offers']) ? [$obj['offers']] : $obj['offers'];
                    foreach ($offers as $offer) {
                        if (!is_array($offer)) continue;
                        if (!empty($offer['price']) && empty($out['price_raw'])) $out['price_raw'] = normalize_price_string((string)$offer['price']);
                        if (!empty($offer['priceCurrency']) && empty($out['currency'])) $out['currency'] = strtoupper((string)$offer['priceCurrency']);
                        if (!empty($offer['priceSpecification'])) {
                            $ps = $offer['priceSpecification'];
                            if (!empty($ps['price']) && empty($out['price_raw'])) $out['price_raw'] = normalize_price_string((string)$ps['price']);
                            if (!empty($ps['priceCurrency']) && empty($out['currency'])) $out['currency'] = strtoupper((string)$ps['priceCurrency']);
                        }
                    }
                }
                if (!empty($obj['additionalProperty']) && is_array($obj['additionalProperty'])) {
                    $attrs = [];
                    foreach ($obj['additionalProperty'] as $p) {
                        $k = isset($p['name']) ? trim((string)$p['name']) : null;
                        $v = isset($p['value']) ? trim((string)$p['value']) : null;
                        if ($k && $v) $attrs[] = ['name'=>$k, 'value'=>$v];
                    }
                    if ($attrs) $out['attributes'] = $attrs;
                }
            }
        }
    }
    return $out;
}

function guess_price_currency_from_string($txt) {
    $t = mb_strtolower(trim((string)$txt));
    $cur = null;
    if (strpos($t, 'irr') !== false || strpos($t, 'ریال') !== false) $cur = 'IRR';
    if (strpos($t, 'irt') !== false || strpos($t, 'تومان') !== false || strpos($t, 'تومن') !== false) $cur = 'IRT';
    $num = normalize_price_string($txt);
    return [$num, $cur];
}

function convert_persian_digits_to_latin($s) {
    $persian = ['۰','۱','۲','۳','۴','۵','۶','۷','۸','۹','٫','٬'];
    $arabic  = ['٠','١','٢','٣','٤','٥','٦','٧','٨','٩','٫','٬'];
    $latin   = ['0','1','2','3','4','5','6','7','8','9','.',''];
    $s = str_replace($persian, $latin, (string)$s);
    $s = str_replace($arabic,  $latin, $s);
    return $s;
}

function is_assoc($arr) {
    return is_array($arr) && array_keys($arr) !== range(0, count($arr) - 1);
}

function absolutize_url($maybe, $base) {
    if ($maybe === '') return $maybe;
    if (strpos($maybe, '//') === 0) {
        $scheme = parse_url($base, PHP_URL_SCHEME) ?: 'https';
        return $scheme . ':' . $maybe;
    }
    if (parse_url($maybe, PHP_URL_SCHEME)) return $maybe;
    $p = parse_url($base);
    $root = $p['scheme'].'://'.$p['host'] . (isset($p['port']) ? ':'.$p['port'] : '');
    if (strpos($maybe, '/') === 0) return $root . $maybe;
    $dir = $root . rtrim(dirname($p['path']), '/\\') . '/';
    return $dir . $maybe;
}