<?php
/**
 * Ghabnaab Product Web Service (site-specific) – v1.2
 * PHP 7+ – Single file
 * Endpoint: product_api_ghabnaab.php?url=https://www.ghabnaab.com/R-993
 *
 * Improvements in v1.2:
 *  - Proper URL encoding for non-ASCII/space in image paths (fixes "incomplete/broken" image links)
 *  - Rich extraction of description (HTML + text)
 *  - Technical specs/features parsing (tables, lists, key:value blocks)
 *  - Deeper Livewire traversal to collect variants/options and (if present) stock/price per variant
 */

declare(strict_types=1);

// ===================== Utils / HTTP =====================
function respond_json($data, int $status = 200): void {
    if (!headers_sent()) {
        http_response_code($status);
        header('Content-Type: application/json; charset=utf-8');
        header('Access-Control-Allow-Origin: *');
        header('Access-Control-Allow-Methods: GET, OPTIONS');
        header('Access-Control-Allow-Headers: Content-Type');
    }
    echo json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);
    exit;
}

function is_valid_url(string $url): bool {
    if (!filter_var($url, FILTER_VALIDATE_URL)) return false;
    $scheme = parse_url($url, PHP_URL_SCHEME);
    return in_array($scheme, ['http','https'], true);
}

function fetch_url(string $url): array {
    $ch = curl_init($url);
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS      => 5,
        CURLOPT_TIMEOUT        => 25,
        CURLOPT_CONNECTTIMEOUT => 10,
        CURLOPT_ENCODING       => '',
        CURLOPT_USERAGENT      => 'Mozilla/5.0 (compatible; GhabnaabScraper/1.2)',
        CURLOPT_SSL_VERIFYPEER => true,
        CURLOPT_SSL_VERIFYHOST => 2,
        CURLOPT_HTTPHEADER     => ['Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'],
    ]);
    $body = curl_exec($ch);
    $info = curl_getinfo($ch);
    $err  = curl_error($ch);
    curl_close($ch);
    return [$body, $info, $err];
}

function detect_charset(string $html, array $curlInfo): string {
    $ct = isset($curlInfo['content_type']) ? strtolower((string)$curlInfo['content_type']) : '';
    if (preg_match('~charset=([\w\-]+)~i', $ct, $m)) return strtoupper($m[1]);
    if (preg_match('~<meta[^>]+charset=["\']?\s*([a-zA-Z0-9\-\_]+)~i', $html, $m)) return strtoupper($m[1]);
    if (preg_match('~<meta[^>]+http-equiv=["\']content-type["\'][^>]*content=["\'][^"]*charset=([^"\']+)~i', $html, $m)) return strtoupper($m[1]);
    return 'UTF-8';
}

function to_utf8(string $html, array $curlInfo): string {
    $charset = detect_charset($html, $curlInfo);
    if ($charset === 'UTF-8') return $html;
    $converted = @iconv($charset, 'UTF-8//IGNORE', $html);
    if ($converted !== false) return $converted;
    $converted = @mb_convert_encoding($html, 'UTF-8', $charset);
    return $converted ?: $html;
}

function dom_xpath(string $html): DOMXPath {
    libxml_use_internal_errors(true);
    if (stripos($html, '<meta charset=') === false) {
        $html = preg_replace('~<head([^>]*)>~i', '<head$1><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">', $html, 1) ?: $html;
    }
    $dom = new DOMDocument('1.0', 'UTF-8');
    $dom->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR);
    libxml_clear_errors();
    return new DOMXPath($dom);
}

function is_assoc(array $arr): bool {
    if ([] === $arr) return false;
    return array_keys($arr) !== range(0, count($arr) - 1);
}

// ---- URL helpers (with UTF-8 path encoding) ----
function encode_url_path_preserving(string $url): string {
    $parts = parse_url($url);
    if ($parts === false) return $url;
    $scheme = $parts['scheme'] ?? 'https';
    $host   = $parts['host'] ?? '';
    $port   = isset($parts['port']) ? ':' . $parts['port'] : '';
    $path   = $parts['path'] ?? '';
    $query  = isset($parts['query']) ? '?' . $parts['query'] : '';
    $frag   = isset($parts['fragment']) ? '#' . $parts['fragment'] : '';

    // Split and rawurlencode each segment if it contains non-ASCII or spaces
    $segments = explode('/', $path);
    foreach ($segments as &$seg) {
        if ($seg === '' || $seg === '.') continue;
        // Avoid double-encoding by decoding once
        $decoded = rawurldecode($seg);
        $seg = rawurlencode($decoded);
    }
    $path_enc = implode('/', $segments);
    return $scheme . '://' . $host . $port . $path_enc . $query . $frag;
}

function make_absolute_url(string $base, string $rel): string {
    if ($rel === '') return $rel;
    if (preg_match('~^https?://~i', $rel)) return encode_url_path_preserving($rel);
    if (strpos($rel, '//') === 0) {
        $scheme = parse_url($base, PHP_URL_SCHEME) ?: 'https';
        return encode_url_path_preserving($scheme . ':' . $rel);
    }
    $parsed = parse_url($base);
    if (!$parsed) return encode_url_path_preserving($rel);
    $scheme = $parsed['scheme'] ?? 'https';
    $host   = $parsed['host'] ?? '';
    $port   = isset($parsed['port']) ? ':' . $parsed['port'] : '';
    $path   = $parsed['path'] ?? '/';
    $dir = preg_replace('~/[^/]*$~', '/', $path);
    $abs = (substr($rel, 0, 1) === '/') ? "$scheme://$host$port$rel" : "$scheme://$host$port$dir$rel";
    // Normalize ../ and ./
    $parts = [];
    foreach (explode('/', $abs) as $seg) {
        if ($seg === '..') array_pop($parts);
        elseif ($seg !== '.') $parts[] = $seg;
    }
    return encode_url_path_preserving(implode('/', $parts));
}

// ===================== Metadata & JSON-LD =====================
function extract_metas(DOMXPath $xp): array {
    $data = ['og' => [], 'twitter' => [], 'misc' => []];
    foreach ($xp->query('//meta') as $m) {
        /** @var DOMElement $m */
        $name  = $m->getAttribute('name');
        $prop  = $m->getAttribute('property');
        $value = $m->getAttribute('content');
        if ($prop && stripos($prop, 'og:') === 0) {
            $data['og'][$prop] = $value;
        } elseif ($name && stripos($name, 'twitter:') === 0) {
            $data['twitter'][$name] = $value;
        } elseif ($name) {
            $data['misc'][$name] = $value;
        }
    }
    return $data;
}

function normalize_og_block(array $og, string $base): array {
    $out = [];
    foreach ($og as $k => $v) $out[strtolower($k)] = $v;
    foreach (['og:image','og:image:url'] as $k) {
        if (isset($out[$k])) $out[$k] = make_absolute_url($base, $out[$k]);
    }
    if (isset($out['og:url'])) $out['og:url'] = make_absolute_url($base, $out['og:url']);
    return $out;
}

function normalize_twitter_block(array $tw, string $base): array {
    $out = [];
    foreach ($tw as $k => $v) $out[strtolower($k)] = $v;
    $images = [];
    foreach ($out as $k => $v) {
        if (preg_match('~^twitter:image(\d+)?$~', $k)) {
            $images[] = make_absolute_url($base, $v);
            unset($out[$k]);
        }
    }
    if (!empty($images)) {
        $out['twitter:image'] = $images[0];
        if (count($images) > 1) $out['twitter:image:all'] = $images;
    } elseif (isset($out['twitter:image'])) {
        $out['twitter:image'] = make_absolute_url($base, $out['twitter:image']);
    }
    if (!isset($out['twitter:card'])) $out['twitter:card'] = 'summary_large_image';
    return $out;
}

function extract_title_canonical_lang(DOMXPath $xp): array {
    $title = $xp->query('//title')->item(0);
    $link  = $xp->query('//link[@rel="canonical"]')->item(0);
    $html  = $xp->query('/html')->item(0);
    return [
        'title'     => $title ? trim($title->textContent) : null,
        'canonical' => $link ? $link->getAttribute('href') : null,
        'lang'      => $html ? $html->getAttribute('lang') : null,
        'dir'       => $html ? $html->getAttribute('dir')  : null,
    ];
}

function parse_jsonld_products(DOMXPath $xp): array {
    $nodes = $xp->query('//script[@type="application/ld+json"]');
    $products = [];
    $webpages = [];
    foreach ($nodes as $node) {
        /** @var DOMElement $node */
        $json = trim($node->textContent);
        if ($json === '') continue;
        $decoded = json_decode($json, true);
        if ($decoded === null) {
            $json = preg_replace('/[^\PC\s]/u', '', $json);
            $decoded = json_decode($json, true);
        }
        if ($decoded === null) continue;

        $candidates = [];
        if (isset($decoded['@type'])) {
            $candidates = [$decoded];
        } elseif (isset($decoded['@graph']) && is_array($decoded['@graph'])) {
            $candidates = $decoded['@graph'];
        } elseif (is_array($decoded)) {
            $candidates = $decoded;
        }

        foreach ($candidates as $obj) {
            if (!is_array($obj)) continue;
            $type = $obj['@type'] ?? null;
            if (is_array($type) ? in_array('Product', $type, true) : (strcasecmp((string)$type, 'Product') === 0)) {
                $products[] = $obj;
            } elseif (is_array($type) ? in_array('WebPage', $type, true) : (strcasecmp((string)$type, 'WebPage') === 0)) {
                $webpages[] = $obj;
            }
        }
    }
    return [$products, $webpages];
}

function normalize_schema_offer(array $offer): array {
    $map = [
        'NewCondition' => 'https://schema.org/NewCondition',
        'UsedCondition'=> 'https://schema.org/UsedCondition',
        'InStock'      => 'https://schema.org/InStock',
        'OutOfStock'   => 'https://schema.org/OutOfStock',
        'PreOrder'     => 'https://schema.org/PreOrder',
    ];
    foreach (['itemCondition','availability'] as $k) {
        if (isset($offer[$k]) && isset($map[$offer[$k]])) $offer[$k] = $map[$offer[$k]];
    }
    return $offer;
}

function absolutize_product_urls(array $product, string $base): array {
    $url_fields = ['image', 'url', 'skuUrl'];
    foreach ($url_fields as $field) {
        if (isset($product[$field]) && is_string($product[$field])) {
            $product[$field] = make_absolute_url($base, $product[$field]);
        } elseif (isset($product[$field]) && is_array($product[$field])) {
            $out = [];
            foreach ($product[$field] as $u) $out[] = is_string($u) ? make_absolute_url($base, $u) : $u;
            $product[$field] = $out;
        }
    }
    if (isset($product['offers'])) {
        if (is_assoc($product['offers'])) {
            $product['offers'] = normalize_schema_offer($product['offers']);
            if (isset($product['offers']['image']) && is_string($product['offers']['image'])) {
                $product['offers']['image'] = make_absolute_url($base, $product['offers']['image']);
            }
            if (isset($product['offers']['url']) && is_string($product['offers']['url'])) {
                $product['offers']['url'] = make_absolute_url($base, $product['offers']['url']);
            }
        } elseif (is_array($product['offers'])) {
            $new = [];
            foreach ($product['offers'] as $off) {
                if (!is_array($off)) { $new[] = $off; continue; }
                $new[] = normalize_schema_offer($off);
            }
            $product['offers'] = $new;
        }
    }
    return $product;
}

// ===================== Livewire & Site-Specific =====================
function decode_html_attr_json(string $s): ?array {
    $s = html_entity_decode($s, ENT_QUOTES | ENT_HTML5, 'UTF-8');
    $arr = json_decode($s, true);
    if (is_null($arr)) {
        $s = preg_replace('/[^\PC\s]/u', '', $s);
        $arr = json_decode($s, true);
    }
    return is_array($arr) ? $arr : null;
}

function extract_livewire_blocks(DOMXPath $xp): array {
    $blocks = [];
    foreach ($xp->query('//*[@wire:initial-data]') as $node) {
        /** @var DOMElement $node */
        $attr = $node->getAttribute('wire:initial-data');
        if (!$attr) continue;
        $decoded = decode_html_attr_json($attr);
        if (!$decoded) continue;
        $fingerprint = $decoded['fingerprint']['name'] ?? null;
        $serverMemo  = $decoded['serverMemo'] ?? [];
        $data        = $serverMemo['data'] ?? [];
        $errors      = $serverMemo['errors'] ?? [];
        $children    = $serverMemo['children'] ?? [];
        $blocks[] = [
            'component'   => $fingerprint,
            'data'        => $data,
            'errors'      => $errors,
            'children'    => $children,
            'raw'         => $decoded,
        ];
    }
    return $blocks;
}

function fa_to_en_digits(string $s): string {
    $fa = ['۰','۱','۲','۳','۴','۵','۶','۷','۸','۹','٬','،'];
    $en = ['0','1','2','3','4','5','6','7','8','9',',',','];
    return str_replace($fa, $en, $s);
}

function deep_collect_variants(array $arr): array {
    // Gather variant-like nodes: arrays of options (color/size/model) and stock/price per option
    $variants = [];
    $attributes = [];
    $stack = [ $arr ];
    while ($stack) {
        $node = array_pop($stack);
        if (!is_array($node)) continue;
        foreach ($node as $k => $v) {
            $kk = strtolower((string)$k);
            if (in_array($kk, ['variants','variant','options','product_options','models','choices'], true) && is_array($v)) {
                $variants = $v;
            }
            if (in_array($kk, ['attributes','attrs','productattributes'], true) && is_array($v)) {
                $attributes = $v;
            }
            if (is_array($v)) $stack[] = $v;
        }
    }
    return ['attributes'=>$attributes, 'variants'=>$variants];
}

function guess_price_from_livewire(array $blocks): array {
    $price = null; $original = null; $currency = null; $availability = null;
    $perVariant = []; // map option->price/stock if found

    $stack = $blocks;
    while ($stack) {
        $node = array_pop($stack);
        if (is_array($node)) {
            foreach ($node as $k => $v) {
                $kk = strtolower((string)$k);
                if (is_array($v)) {
                    // Detect per-variant structure like [{name:..., price:..., stock:...}, ...]
                    if (isset($v[0]) && is_array($v[0])) {
                        $rec = [];
                        foreach ($v as $row) {
                            $name = $row['name'] ?? ($row['title'] ?? ($row['label'] ?? null));
                            $p    = $row['price'] ?? ($row['finalPrice'] ?? ($row['amount'] ?? null));
                            $st   = $row['stock'] ?? ($row['qty'] ?? ($row['available'] ?? null));
                            if ($name !== null || $p !== null || $st !== null) {
                                $rec[] = ['name'=>$name,'price'=>$p,'stock'=>$st];
                            }
                        }
                        if ($rec) $perVariant = $rec;
                    }
                    $stack[] = $v;
                } else {
                    $sv = fa_to_en_digits((string)$v);
                    if (in_array($kk, ['price','finalprice','sellprice','currentprice','discountedprice','payable'], true)) {
                        if (preg_match('~\d{4,}~', $sv)) $price = preg_replace('~\D~','', $sv);
                    }
                    if (in_array($kk, ['originalprice','listprice','mrp','oldprice'], true)) {
                        if (preg_match('~\d{4,}~', $sv)) $original = preg_replace('~\D~','', $sv);
                    }
                    if (in_array($kk, ['currency','pricecurrency'], true)) $currency = (string)$v;
                    if (in_array($kk, ['availability','instock','stockstatus'], true)) $availability = (string)$v;
                }
            }
        }
    }
    return ['price'=>$price,'original'=>$original,'currency'=>$currency,'availability'=>$availability,'per_variant'=>$perVariant,'source'=>'livewire'];
}

// ===================== DOM scraping (desc/specs/images/breadcrumbs) =====================
function inner_html(\DOMNode $node): string {
    $doc = $node->ownerDocument;
    $html = '';
    foreach ($node->childNodes as $child) $html .= $doc->saveHTML($child);
    return $html;
}

function collect_description_and_specs(DOMXPath $xp): array {
    $desc_html = null;
    $desc_text = null;
    $specs = [];

    // Description sections: ids/classes likely containing Persian words or 'description'
    $candidates = $xp->query('//*[contains(@id,"desc") or contains(@id,"description") or contains(@class,"description") or contains(@id,"توض") or contains(@class,"توض") or contains(text(),"توضیحات")]');
    foreach ($candidates as $el) {
        /** @var DOMElement $el */
        $html = trim(inner_html($el));
        $text = trim(preg_replace('/\s+/u',' ', $el->textContent));
        if (mb_strlen($text) > 20) { $desc_html = $html; $desc_text = $text; break; }
    }

    // Specs from tables (key/value)
    foreach ($xp->query('//table') as $table) {
        $rows = [];
        foreach ($table->getElementsByTagName('tr') as $tr) {
            $cells = $tr->getElementsByTagName('td');
            if ($cells->length >= 2) {
                $k = trim(preg_replace('/\s+/u',' ', $cells->item(0)->textContent));
                $v = trim(preg_replace('/\s+/u',' ', $cells->item(1)->textContent));
                if ($k !== '' && $v !== '') $rows[$k] = $v;
            }
        }
        if (!empty($rows)) $specs[] = $rows;
    }

    // Specs from definition lists
    foreach ($xp->query('//dl') as $dl) {
        $rows = [];
        $dts = $dl->getElementsByTagName('dt');
        $dds = $dl->getElementsByTagName('dd');
        $len = min($dts->length, $dds->length);
        for ($i=0;$i<$len;$i++) {
            $k = trim(preg_replace('/\s+/u',' ', $dts->item($i)->textContent));
            $v = trim(preg_replace('/\s+/u',' ', $dds->item($i)->textContent));
            if ($k !== '' && $v !== '') $rows[$k] = $v;
        }
        if (!empty($rows)) $specs[] = $rows;
    }

    // Specs from bullet lists if they look like features
    foreach ($xp->query('//ul[li]') as $ul) {
        $items = [];
        foreach ($ul->getElementsByTagName('li') as $li) {
            $t = trim(preg_replace('/\s+/u',' ', $li->textContent));
            if ($t !== '') $items[] = $t;
        }
        if (count($items) >= 3) $specs[] = ['_list' => $items];
    }

    return ['description_html'=>$desc_html,'description_text'=>$desc_text,'specs'=>$specs];
}

function collect_breadcrumbs(DOMXPath $xp): array {
    $crumbs = [];
    foreach ($xp->query('//nav[contains(@class,"breadcrumb")]//a | //ul[contains(@class,"breadcrumb")]//a') as $a) {
        /** @var DOMElement $a */
        $crumbs[] = ['title' => trim($a->textContent), 'url' => $a->getAttribute('href')];
    }
    return $crumbs;
}

function collect_gallery_images(DOMXPath $xp, string $base, array $product): array {
    $preferredPrefix = null;
    if (isset($product['image'])) {
        $first = is_array($product['image']) ? ($product['image'][0] ?? null) : $product['image'];
        if (is_string($first)) {
            $first = make_absolute_url($base, $first);
            $preferredPrefix = preg_replace('~/[^/]*$~', '/', $first);
        }
    }
    $all = [];
    foreach ($xp->query('//div[contains(@class,"product")]//img[@src] | //div[contains(@class,"gallery")]//img[@src] | //img[@data-fslightbox and @src] | //img[@srcset] | //img[@src]') as $img) {
        /** @var DOMElement $img */
        $src = $img->getAttribute('src');
        if (!$src && $img->hasAttribute('srcset')) {
            $srcset = $img->getAttribute('srcset');
            $parts = array_map('trim', explode(',', $srcset));
            if (!empty($parts)) $src = trim(explode(' ', $parts[0])[0]);
        }
        if ($src) $all[] = make_absolute_url($base, $src);
    }
    $all = array_values(array_unique(array_map('encode_url_path_preserving', $all)));
    if ($preferredPrefix) {
        $filtered = array_values(array_filter($all, function($u) use ($preferredPrefix){
            return strpos($u, $preferredPrefix) === 0;
        }));
        if (!empty($filtered)) return $filtered;
    }
    return $all;
}

// ===================== Controller =====================
$inputUrl = isset($_GET['url']) ? trim($_GET['url']) : '';
if ($inputUrl === '' || !is_valid_url($inputUrl)) {
    respond_json(['ok'=>false,'error'=>'Invalid or missing url. Example: ?url=https://www.ghabnaab.com/R-993'], 400);
}

$host = parse_url($inputUrl, PHP_URL_HOST);
if (!in_array($host, ['www.ghabnaab.com','ghabnaab.com'], true)) {
    respond_json(['ok'=>false,'error'=>'This endpoint only accepts ghabnaab.com product URLs.'], 400);
}

[$body, $info, $err] = fetch_url($inputUrl);
$status   = (int)($info['http_code'] ?? 0);
$finalUrl = $info['url'] ?? $inputUrl;
if ($err || $status < 200 || $status >= 400 || !$body) {
    respond_json(['ok'=>false,'fetched'=>['status'=>$status,'error'=>$err ?: 'HTTP error','final_url'=>$finalUrl]], 502);
}

$html = to_utf8((string)$body, $info);
$xp   = dom_xpath($html);

// Base meta
$meta   = extract_metas($xp);
$og     = normalize_og_block($meta['og'], $finalUrl);
$tw     = normalize_twitter_block($meta['twitter'], $finalUrl);
$tc     = extract_title_canonical_lang($xp);

// JSON-LD
list($jsonldProducts, $jsonldWebpages) = parse_jsonld_products($xp);
$jsonldProductsAbs = [];
foreach ($jsonldProducts as $p) $jsonldProductsAbs[] = absolutize_product_urls($p, $finalUrl);
$productJsonLd = $jsonldProductsAbs[0] ?? null;

// Livewire
$livewireBlocks = extract_livewire_blocks($xp);
$lwPrice   = guess_price_from_livewire($livewireBlocks);
$lwDeep    = deep_collect_variants($livewireBlocks);

// Gallery
$gallery = collect_gallery_images($xp, $finalUrl, $productJsonLd ?: []);

// Desc & Specs
$descSpecs = collect_description_and_specs($xp);

// Breadcrumbs
$breadcrumbs = collect_breadcrumbs($xp);

// Compose product details
$core = [
    'name'        => $productJsonLd['name']        ?? ($og['og:title'] ?? null),
    'brand'       => $productJsonLd['brand']['name'] ?? null,
    'sku'         => $productJsonLd['sku']         ?? null,
    'description' => $productJsonLd['description'] ?? $descSpecs['description_text'],
    'categories'  => $productJsonLd['category']    ?? null,
];

$pricing = [
    'jsonld'  => $productJsonLd['offers'] ?? null,
    'livewire'=> $lwPrice,
];

$ratings = $productJsonLd['aggregateRating'] ?? null;

$availability = [
    'jsonld_itemCondition' => $productJsonLd['offers']['itemCondition'] ?? null,
    'jsonld_availability'  => $productJsonLd['offers']['availability'] ?? null,
];

$variants = [
    'attributes' => $lwDeep['attributes'],
    'variants'   => $lwDeep['variants'],
];

$seo = [
    'title_tag'  => $tc['title'],
    'meta_desc'  => $meta['misc']['description'] ?? null,
    'canonical'  => $tc['canonical'] ? make_absolute_url($finalUrl, $tc['canonical']) : null,
    'og'         => $og,
    'twitter'    => $tw,
    'webpage_ld' => $jsonldWebpages,
];

$output = [
    'ok'       => true,
    'source'   => [
        'requested_url' => $inputUrl,
        'final_url'     => $finalUrl,
        'http_status'   => $status,
        'lang'          => $tc['lang'],
        'dir'           => $tc['dir'],
        'fetched_at'    => gmdate('c'),
    ],
    'product_raw' => [
        'jsonld'   => $productJsonLd,
        'livewire' => $livewireBlocks,
    ],
    'product' => [
        'core'         => $core,
        'pricing'      => $pricing,
        'availability' => $availability,
        'ratings'      => $ratings,
        'variants'     => $variants,
        'media'        => ['images' => $gallery],
        'breadcrumbs'  => $breadcrumbs,
        'seo'          => $seo,
        'description'  => ['html' => $descSpecs['description_html'], 'text' => $descSpecs['description_text']],
        'specs'        => $descSpecs['specs'],
    ],
];

respond_json($output, 200);
?>