<?php
/**
 * Woo Product Info Scraper → JSON API
 * GET: product_url=https://example.com/product/...
 *
 * Response:
 * {
 *   "url": "...",
 *   "name": "...",
 *   "price": {"number": 3445000, "currency": "IRR", "text": "3,445,000 تومان"},
 *   "availability": {"schema": "InStock", "stock_text": "1 در انبار"},
 *   "images": ["...","..."],
 *   "description_html": "<p>...</p>",
 *   "description_text": "...",
 *   "features": ["ویژگی ۱", "ویژگی ۲", ...],
 *   "attributes": {"برند":"...", "وزن":"..."},
 *   "warnings": []
 * }
 */

header('Content-Type: application/json; charset=utf-8');

/* ---------- Input & URL normalization (handles Persian paths) ---------- */
$u = isset($_GET['product_url']) ? trim($_GET['product_url']) : '';

function smart_normalize_url($u) {
  if ($u === '') return '';
  if (!parse_url($u, PHP_URL_SCHEME)) $u = 'https://' . ltrim($u, '/');
  $parts = @parse_url($u);
  if (!$parts || empty($parts['host'])) return $u;

  $scheme = $parts['scheme'] ?? 'https';
  $host   = $parts['host'];
  $port   = isset($parts['port']) ? ':' . $parts['port'] : '';

  $path = '';
  if (!empty($parts['path'])) {
    $segments = explode('/', $parts['path']);
    $segments = array_map(function($seg){
      // preserve already-encoded parts
      return preg_match('/%[0-9A-Fa-f]{2}/', $seg) ? $seg : rawurlencode($seg);
    }, $segments);
    $path = implode('/', $segments);
    if ($path && $path[0] !== '/') $path = '/' . $path;
  }

  $query = '';
  if (!empty($parts['query'])) {
    $query = '?' . preg_replace_callback('/([^=&]+)=([^&]*)/', function($m){
      $k = preg_match('/%[0-9A-Fa-f]{2}/', $m[1]) ? $m[1] : rawurlencode(urldecode($m[1]));
      $v = preg_match('/%[0-9A-Fa-f]{2}/', $m[2]) ? $m[2] : rawurlencode(urldecode($m[2]));
      return $k . '=' . $v;
    }, $parts['query']);
  }

  $frag = '';
  if (!empty($parts['fragment'])) {
    $frag = '#' . (preg_match('/%[0-9A-Fa-f]{2}/', $parts['fragment']) ? $parts['fragment'] : rawurlencode(urldecode($parts['fragment'])));
  }

  return "{$scheme}://{$host}{$port}{$path}{$query}{$frag}";
}

$u = smart_normalize_url($u);
if ($u === '' || !filter_var($u, FILTER_VALIDATE_URL)) {
  http_response_code(400);
  echo json_encode(["error" => "Invalid or missing 'product_url'"], JSON_UNESCAPED_UNICODE);
  exit;
}

/* --------------------------- HTTP fetch helper -------------------------- */
function http_get($url) {
  $ch = curl_init($url);
  curl_setopt_array($ch, [
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_FOLLOWLOCATION => true,
    CURLOPT_MAXREDIRS      => 5,
    CURLOPT_TIMEOUT        => 25,
    CURLOPT_CONNECTTIMEOUT => 10,
    CURLOPT_SSL_VERIFYHOST => 2,
    CURLOPT_SSL_VERIFYPEER => true,
    CURLOPT_USERAGENT      => 'Mozilla/5.0 (compatible; WooProductInfoAPI/1.0)',
    CURLOPT_HTTPHEADER     => [
      'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
      'Accept-Language: fa-IR,fa;q=0.9,en-US;q=0.8'
    ],
  ]);
  $body = curl_exec($ch);
  $err  = curl_error($ch);
  $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  curl_close($ch);
  return [$body, $code, $err];
}

/* ------------------------------- Helpers -------------------------------- */
function clean_text($s) {
  $s = html_entity_decode($s ?? '', ENT_QUOTES | ENT_HTML5, 'UTF-8');
  $s = preg_replace('/\s+/u', ' ', trim($s));
  return $s;
}
function inner_html(DOMNode $node) {
  $doc = $node->ownerDocument;
  $html = '';
  foreach ($node->childNodes as $child) $html .= $doc->saveHTML($child);
  return $html;
}
function resolve_url($href, $base) {
  if (!$href) return '';
  if (parse_url($href, PHP_URL_SCHEME)) return $href;
  if (strpos($href, '//') === 0) {
    $scheme = parse_url($base, PHP_URL_SCHEME) ?: 'https';
    return $scheme . ':' . $href;
  }
  $p = parse_url($base);
  $scheme = $p['scheme'] ?? 'https';
  $host   = $p['host'] ?? '';
  $port   = isset($p['port']) ? ':' . $p['port'] : '';
  $path   = isset($p['path']) ? $p['path'] : '/';
  $path   = preg_replace('#/[^/]*$#', '/', $path);
  $abs    = ($href[0] === '/') ? $href : $path . $href;
  $abs    = preg_replace('#(/\.?/)#', '/', $abs);
  while (preg_match('#/(?!\.\.)[^/]+/\.\./#', $abs)) $abs = preg_replace('#/(?!\.\.)[^/]+/\.\./#', '/', $abs);
  return $scheme . '://' . $host . $port . $abs;
}
function take_largest_from_srcset($srcset) {
  $best = null; $bestW = -1;
  foreach (array_map('trim', explode(',', $srcset)) as $part) {
    if ($part === '') continue;
    $bits = preg_split('/\s+/', trim($part));
    $url = $bits[0] ?? '';
    $w   = 0;
    if (isset($bits[1]) && substr($bits[1], -1) === 'w') $w = (int)filter_var($bits[1], FILTER_SANITIZE_NUMBER_INT);
    if ($url && $w >= $bestW) { $best = $url; $bestW = $w; }
  }
  return $best ?: null;
}

/* -------------------------------- Fetch --------------------------------- */
list($html, $code, $err) = http_get($u);
if ($err || $code !== 200 || !$html) {
  http_response_code(502);
  echo json_encode(["error" => "Fetch failed", "http_code" => $code, "detail" => $err ?: 'empty body'], JSON_UNESCAPED_UNICODE);
  exit;
}

/* --------------------------------- DOM ---------------------------------- */
libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->loadHTML('<?xml encoding="UTF-8">' . $html);
libxml_clear_errors();
$xp = new DOMXPath($dom);

/* ------------------------------ Result base ----------------------------- */
$result = [
  "url" => $u,
  "name" => null,
  "price" => ["number" => null, "currency" => null, "text" => null],
  "availability" => ["schema" => null, "stock_text" => null],
  "images" => [],
  "description_html" => null,
  "description_text" => null,
  "features" => [],
  "attributes" => new stdClass(),
  "warnings" => []
];

/* ------------------------- Prefer JSON-LD Product ------------------------ */
function find_jsonld_product(DOMXPath $xp) {
  $nodes = $xp->query("//script[@type='application/ld+json']");
  foreach ($nodes as $s) {
    $json = $s->textContent;
    $data = json_decode($json, true);
    if (!$data) continue;

    // Direct Product object
    if (isset($data['@type'])) {
      $t = $data['@type'];
      if ((is_string($t) && stripos($t, 'Product') !== false) ||
          (is_array($t) && in_array('Product', $t))) {
        return $data;
      }
    }
    // @graph
    if (isset($data['@graph']) && is_array($data['@graph'])) {
      foreach ($data['@graph'] as $node) {
        if (!isset($node['@type'])) continue;
        $t = $node['@type'];
        if ((is_string($t) && stripos($t, 'Product') !== false) ||
            (is_array($t) && in_array('Product', $t))) {
          return $node;
        }
      }
    }
  }
  return null;
}

$prd = find_jsonld_product($xp);
if ($prd) {
  if (!empty($prd['name'])) $result['name'] = clean_text($prd['name']);
  if (!empty($prd['description'])) {
    $desc = $prd['description'];
    $result['description_text'] = clean_text($desc);
    $result['description_html'] = nl2br(htmlspecialchars($result['description_text'], ENT_QUOTES | ENT_HTML5, 'UTF-8'));
  }
  if (!empty($prd['image'])) {
    if (is_array($prd['image'])) {
      foreach ($prd['image'] as $img) {
        if (is_array($img) && !empty($img['url'])) $result['images'][] = $img['url'];
        elseif (is_string($img)) $result['images'][] = $img;
      }
    } elseif (is_string($prd['image'])) {
      $result['images'][] = $prd['image'];
    }
  }
  if (!empty($prd['offers'])) {
    $offers = $prd['offers'];
    if (isset($offers['price'])) $result['price']['number'] = (float)preg_replace('/[^\d.]/', '', (string)$offers['price']);
    if (!empty($offers['priceCurrency'])) $result['price']['currency'] = clean_text($offers['priceCurrency']);
    if (!empty($offers['availability'])) {
      $av = $offers['availability'];
      if (is_string($av)) {
        if (stripos($av, 'InStock') !== false) $av = 'InStock';
        elseif (stripos($av, 'OutOfStock') !== false) $av = 'OutOfStock';
        elseif (stripos($av, 'PreOrder') !== false) $av = 'PreOrder';
      }
      $result['availability']['schema'] = $av;
    }
  }
}

/* --------------------- Fallbacks: OpenGraph / Twitter ------------------- */
if (!$result['name']) {
  $n = $xp->query("//meta[@property='og:title' or @name='twitter:title']/@content")->item(0);
  if ($n) $result['name'] = clean_text($n->textContent);
}
if (!$result['price']['number']) {
  $v = $xp->query("//meta[@property='product:price:amount']/@content")->item(0);
  if ($v) $result['price']['number'] = (float)preg_replace('/[^\d.]/', '', $v->textContent);
}
if (!$result['price']['currency']) {
  $v = $xp->query("//meta[@property='product:price:currency']/@content")->item(0);
  if ($v) $result['price']['currency'] = clean_text($v->textContent);
}
if (!$result['availability']['schema']) {
  $v = $xp->query("//meta[@property='product:availability']/@content")->item(0);
  if ($v) {
    $a = strtolower($v->textContent);
    $result['availability']['schema'] = (strpos($a,'instock')!==false) ? 'InStock'
      : ((strpos($a,'out')!==false) ? 'OutOfStock' : $v->textContent);
  }
}
if (empty($result['images'])) {
  foreach ($xp->query("//meta[@property='og:image' or @name='twitter:image']/@content") as $m) {
    $result['images'][] = clean_text($m->textContent);
  }
}

/* ------------------------ UI DOM: name/price/stock ---------------------- */
if (!$result['name']) {
  $h1 = $xp->query("//h1[contains(@class,'product_title')]");
  if ($h1->length) $result['name'] = clean_text($h1->item(0)->textContent);
}
if (!$result['price']['text']) {
  $p = $xp->query("//*[contains(@class,'summary')]//*[contains(@class,'price')]//bdi | //*[contains(@class,'summary')]//*[contains(@class,'price')]");
  foreach ($p as $node) {
    $t = clean_text($node->textContent);
    if (preg_match('/\d/u', $t)) { $result['price']['text'] = $t; break; }
  }
}
if (!$result['availability']['stock_text']) {
  $s = $xp->query("//*[contains(@class,'stock')]");
  if ($s->length) $result['availability']['stock_text'] = clean_text($s->item(0)->textContent);
}

/* ---------------------------- Gallery images ---------------------------- */
$imgCandidates = [];
foreach ($xp->query("//div[contains(@class,'woocommerce-product-gallery')]//img | //a[contains(@class,'woocommerce-main-image')]/img") as $img) {
  /** @var DOMElement $img */
  foreach (['data-src','data-large_image','data-lazy-src','src','data-zoom-image'] as $attr) {
    if ($img->hasAttribute($attr)) $imgCandidates[] = $img->getAttribute($attr);
  }
  if ($img->hasAttribute('srcset')) {
    $best = take_largest_from_srcset($img->getAttribute('srcset'));
    if ($best) $imgCandidates[] = $best;
  }
}
foreach ($imgCandidates as $href) {
  $href = resolve_url($href, $u);
  if ($href && !in_array($href, $result['images'], true)) $result['images'][] = $href;
}

/* ------------------------- Description (HTML/text) ---------------------- */
if (!$result['description_html'] || !$result['description_text']) {
  $descNodes = $xp->query("//*[contains(@class,'woocommerce-Tabs-panel--description') or contains(@id,'tab-description') or contains(@class,'product-short-description')]");
  if ($descNodes->length) {
    $best = null; $bestLen = 0;
    foreach ($descNodes as $n) {
      $htmlBlock = trim(inner_html($n));
      $len = mb_strlen(strip_tags($htmlBlock), 'UTF-8');
      if ($len > $bestLen) { $best = $n; $bestLen = $len; }
    }
    if ($best) {
      $htmlBlock = inner_html($best);
      $result['description_html'] = $htmlBlock;
      $result['description_text'] = clean_text(strip_tags($htmlBlock));
    }
  }
}

/* -------------------------- Features (bulleted) ------------------------- */
if ($result['description_html']) {
  $tmpDom = new DOMDocument();
  libxml_use_internal_errors(true);
  $tmpDom->loadHTML('<?xml encoding="UTF-8">' . $result['description_html']);
  libxml_clear_errors();
  $tmpXp = new DOMXPath($tmpDom);
  foreach ($tmpXp->query("//ul/li") as $li) {
    $t = clean_text($li->textContent);
    if ($t && !in_array($t, $result['features'], true)) $result['features'][] = $t;
  }
}

/* ----------------------- Attributes table (optional) -------------------- */
$attrs = [];
$attrRows = $xp->query("//table[contains(@class,'shop_attributes')]//tr");
foreach ($attrRows as $tr) {
  $th = $xp->query(".//th", $tr)->item(0);
  $td = $xp->query(".//td", $tr)->item(0);
  if ($th && $td) {
    $k = clean_text($th->textContent);
    $v = clean_text($td->textContent);
    if ($k !== '' && $v !== '') $attrs[$k] = $v;
  }
}
if (!empty($attrs)) $result['attributes'] = $attrs;

/* ---------------------------- Final response ---------------------------- */
$result['images'] = array_values(array_unique($result['images']));

echo json_encode($result, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);