<?php
/**
 * masterkala.php (v1.2)
 * Detail Web Service for masterkala.com
 * - Robust description extraction via DOMXPath (multi-pattern)
 * - Extracts product options/variants (select/radio/checkbox)
 * - Flexible inline price detection + JSON-LD fallback
 * - Persian digits normalization
 * PHP 7+ compatible
 */

ini_set('display_errors', 0);
header('Content-Type: application/json; charset=utf-8');
header('Access-Control-Allow-Origin: *');
mb_internal_encoding('UTF-8');

function respond($arr){ echo json_encode($arr, JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT); exit; }
function err($msg){ respond(['ok'=>false,'error'=>$msg]); }

/* ---------------- Input ---------------- */
$url = isset($_GET['url']) ? trim($_GET['url']) : '';
if(!$url) err('invalid_or_missing_url');
if(!preg_match('~^https?://[^/]*masterkala\.com/.*~i', $url)) err('invalid_host_only_masterkala.com');

/* ---------------- Fetch ---------------- */
$html = http_get_follow($url, 45);
if($html === null) err('fetch_failed');

/* ---------------- Parse DOM ---------------- */
$dom = new DOMDocument();
libxml_use_internal_errors(true);
$loaded = $dom->loadHTML('<?xml encoding="utf-8" ?>'.$html, LIBXML_NOWARNING | LIBXML_NOERROR);
libxml_clear_errors();
$xp = new DOMXPath($dom);

/* ---------------- Canonical ---------------- */
$canonical = extract_canonical_dom($xp) ?: $url;

/* ---------------- JSON-LD ---------------- */
$jsonld   = parse_jsonld_all($xp); // returns ['product'=>.., 'breadcrumbs'=>..]
$p        = $jsonld['product'] ?? [];
$crumbs   = $jsonld['breadcrumbs'] ?? [];

/* ---------------- Inline Prices ---------------- */
$prices   = extract_inline_prices_flex($html);

/* ---------------- Images ---------------- */
$images   = extract_images_flex($html, $xp);

/* ---------------- Description ---------------- */
$desc_html = extract_description_dom($xp, $html);
$desc_text = trim(html_entity_decode(strip_tags(preg_replace('/\s+/u',' ', $desc_html)), ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8'));

/* ---------------- Specs / Attributes ---------------- */
$attributes = extract_specs_dom($xp, $html);

/* ---------------- Options / Variants ---------------- */
$options = extract_options_dom($xp);

/* ---------------- Stock Text ---------------- */
$stockTxt  = extract_stock_text($html, $xp);

/* ---------------- Resolve Price ---------------- */
// Prefer inline (تومان)، else JSONLD (ریال) /10
list($price_toman, $price_raw, $currency) = resolve_price($prices, $p);

/* ---------------- Output ---------------- */
respond([
  'ok'               => true,
  'url'              => $canonical,
  'name'             => $p['name'] ?? null,
  'brand'            => isset($p['brand']['name']) ? $p['brand']['name'] : ($p['brand'] ?? null),
  'sku'              => $p['sku'] ?? null,
  'product_id'       => $p['productID'] ?? null,
  'price_toman'      => $price_toman,
  'currency_source'  => $currency,
  'price_raw'        => $price_raw,
  'stock'            => $stockTxt ?: null,
  'images'           => $images,
  'categories'       => $crumbs,
  'attributes'       => $attributes,      // [{name,value}]
  'options'          => $options,         // [{name,type,required,values:[{id,label,price}] }]
  'description_html' => $desc_html ?: null,
  'description_text' => $desc_text ?: null,
  'debug' => [
    'inline_prices' => $prices,
    'jsonld_found'  => !empty($p),
  ],
]);

/* ================= Helpers ================= */

function http_get_follow($url, $timeout=40){
  $ctx = stream_context_create([
    'http'=>[
      'method'=>'GET',
      'timeout'=>$timeout,
      'header'=>"User-Agent: Mozilla/5.0\r\nAccept: text/html\r\n",
      'ignore_errors'=>true
    ]
  ]);
  $html = @file_get_contents($url, false, $ctx);
  if($html===false || $html===null) return null;
  return to_utf8($html);
}
function to_utf8($s){
  if(function_exists('mb_detect_encoding')){
    $enc = mb_detect_encoding($s, 'UTF-8, ISO-8859-1, Windows-1256, Windows-1252', true);
    if($enc && strtoupper($enc)!=='UTF-8'){ $s=@mb_convert_encoding($s,'UTF-8',$enc); }
  }
  return $s;
}
function num_clean($str){
  // تبدیل ارقام فارسی/عربی و حذف جداکننده‌ها
  $from = ['۰','۱','۲','۳','۴','۵','۶','۷','۸','۹','٠','١','٢','٣','٤','٥','٦','٧','٨','٩',',','٫','٬',"\xE2\x80\x8C","\xE2\x80\x8B",' '];
  $to   = ['0','1','2','3','4','5','6','7','8','9','0','1','2','3','4','5','6','7','8','9','','','','','','',''];
  return str_replace($from, $to, $str);
}

/* --- Canonical --- */
function extract_canonical_dom(DOMXPath $xp){
  $nodes = $xp->query('//link[@rel="canonical"]/@href');
  if($nodes && $nodes->length) return html_entity_decode($nodes->item(0)->nodeValue, ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8');
  return null;
}

/* --- JSON-LD robust --- */
function parse_jsonld_all(DOMXPath $xp){
  $scripts = $xp->query('//script[@type="application/ld+json"]');
  $out = ['product'=>[], 'breadcrumbs'=>[]];
  foreach($scripts as $sc){
    $blob = html_entity_decode($sc->nodeValue ?? '', ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8');
    $json = json_decode(trim($blob), true);
    if(!$json) continue;
    $nodes = is_assoc($json) ? [$json] : $json;
    foreach($nodes as $node){
      if(!is_array($node)) continue;
      if(isset($node['@graph']) && is_array($node['@graph'])){
        foreach($node['@graph'] as $g){ collect_ld_node2($g, $out); }
      } else {
        collect_ld_node2($node, $out);
      }
    }
  }
  return $out;
}
function collect_ld_node2($it, array &$out){
  if(!is_array($it) || !isset($it['@type'])) return;
  $type = is_array($it['@type']) ? implode(',', $it['@type']) : $it['@type'];
  if(stripos($type, 'Product')!==false){
    $out['product'] = array_merge($out['product'], $it);
  }
  if(stripos($type, 'BreadcrumbList')!==false && !empty($it['itemListElement'])){
    $cats = [];
    foreach($it['itemListElement'] as $li){
      $name = $li['item']['name'] ?? '';
      if($name!=='') $cats[] = trim($name);
    }
    // حذف آیتم‌های عمومی
    $cats = array_values(array_filter($cats, function($x){
      $x = mb_strtolower($x);
      return !in_array($x, ['خانه','محصولات','فروشگاه']);
    }));
    $out['breadcrumbs'] = $cats;
  }
}
function is_assoc($arr){ return is_array($arr) && array_keys($arr)!==range(0,count($arr)-1); }

/* --- Inline Prices (Flexible) --- */
function extract_inline_prices_flex($html){
  $out = ['available_price'=>null, 'price_with_discount'=>null, 'price'=>null];
  // available : { price: 3475000
  if(preg_match('#available\s*:\s*\{[^}]*?\bprice\s*:\s*([0-9,٫٬\x{200C}\x{200B}\s]+)#u', $html, $m))
    $out['available_price'] = (int)num_clean($m[1]);
  // price_with_discount: 3475000
  if(preg_match('#price_with_discount\s*:\s*([0-9,٫٬\x{200C}\x{200B}\s]+)#u', $html, $m))
    $out['price_with_discount'] = (int)num_clean($m[1]);
  // price: 4000000 (قدیمی)
  if(preg_match('#\bprice\s*:\s*([0-9,٫٬\x{200C}\x{200B}\s]+)\s*,\s*price_with_discount#u', $html, $m))
    $out['price'] = (int)num_clean($m[1]);
  return $out;
}

/* --- Images --- */
function extract_images_flex($html, DOMXPath $xp){
  $imgs = [];
  // گالری 600x600
  if(preg_match_all('#https?://(?:www\.)?masterkala\.com/image/cache/catalog/[^"\s]+-600x600\.(?:webp|jpg|jpeg|png)#i', $html, $m)){
    $imgs = array_merge($imgs, $m[0]);
  }
  // og:image
  $og = $xp->query('//meta[@property="og:image"]/@content');
  if($og && $og->length) $imgs[] = $og->item(0)->nodeValue;
  // یکتا
  $imgs = array_values(array_unique($imgs));
  return $imgs;
}

/* --- Description (DOM, multi-pattern) --- */
function extract_description_dom(DOMXPath $xp, $html){
  $candidates = [
    '//*[@id="tab-description"]',
    '//*[@id="description"]',
    '//*[contains(@class,"tab-description")]',
    '//*[contains(@class,"product-description")]',
    '//*[contains(@class,"post-content")]',
    '//*[contains(@class,"desc") and not(contains(@class,"short"))]',
    '//*[@itemprop="description"]',
    // برخی قالب‌ها: بخش محتوا داخل section/article
    '//article',
    '//section[contains(@class,"product") and contains(@class,"desc")]',
  ];
  foreach($candidates as $q){
    $nodes = $xp->query($q);
    if($nodes && $nodes->length){
      $html = inner_html($nodes->item(0));
      $html = clean_html($html);
      if(trim(strip_tags($html))!=='') return $html;
    }
  }
  // fallback: meta description
  $meta = $xp->query('//meta[@name="description"]/@content');
  if($meta && $meta->length){
    return '<p>'.htmlspecialchars($meta->item(0)->nodeValue, ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8').'</p>';
  }
  // آخرین fallback: og:description
  $ogd = $xp->query('//meta[@property="og:description"]/@content');
  if($ogd && $ogd->length){
    return '<p>'.htmlspecialchars($ogd->item(0)->nodeValue, ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8').'</p>';
  }
  return '';
}
function inner_html(DOMNode $node){
  $doc = $node->ownerDocument;
  $html = '';
  foreach($node->childNodes as $child){
    $html .= $doc->saveHTML($child);
  }
  return $html;
}

/* --- Specs / Attributes (DOM) --- */
function extract_specs_dom(DOMXPath $xp, $html){
  $attrs = [];

  // جدول‌هایی که دو ستون دارند (نام/مقدار)
  $tables = $xp->query('//table');
  foreach($tables as $tbl){
    $trs = $tbl->getElementsByTagName('tr');
    foreach($trs as $tr){
      $cells = [];
      foreach($tr->childNodes as $td){
        if($td->nodeType === XML_ELEMENT_NODE){
          $cells[] = trim(strip_tags(inner_html($td)));
        }
      }
      if(count($cells)>=2){
        $n = trim($cells[0]); $v = trim($cells[1]);
        if($n!=='' && $v!=='') $attrs[] = ['name'=>$n,'value'=>$v];
      }
    }
  }

  // dl/dt/dd
  if(empty($attrs)){
    $dls = $xp->query('//dl');
    foreach($dls as $dl){
      $html = inner_html($dl);
      if(preg_match_all('#<dt[^>]*>(.*?)</dt>\s*<dd[^>]*>(.*?)</dd>#is', $html, $pairs, PREG_SET_ORDER)){
        foreach($pairs as $p){
          $n = trim(strip_tags($p[1])); $v = trim(strip_tags($p[2]));
          if($n!=='' && $v!=='') $attrs[] = ['name'=>$n,'value'=>$v];
        }
      }
    }
  }

  // ul/li : "نام: مقدار"
  if(empty($attrs)){
    $uls = $xp->query('//ul');
    foreach($uls as $ul){
      $lis = $ul->getElementsByTagName('li');
      foreach($lis as $li){
        $txt = trim(preg_replace('/\s+/u',' ', strip_tags(inner_html($li))));
        if(mb_strpos($txt, ':')!==false){
          list($n,$v) = array_map('trim', explode(':', $txt, 2));
          if($n!=='' && $v!=='') $attrs[] = ['name'=>$n,'value'=>$v];
        }
      }
    }
  }

  // یکتا
  $seen = []; $final = [];
  foreach($attrs as $a){
    $key = mb_strtolower($a['name']).'|'.mb_strtolower($a['value']);
    if(isset($seen[$key])) continue;
    $seen[$key]=1; $final[]=$a;
  }
  return $final;
}

/* --- Options / Variants (DOM) --- */
function extract_options_dom(DOMXPath $xp){
  // الگوی رایج اوپن‌کارت/مسترکالا: فرم محصول با ورودی‌هایی مثل name="option[123]" (select/radio/checkbox)
  $options = [];

  // SELECTs
  $selects = $xp->query('//form//select[contains(@name,"option")]');
  foreach($selects as $sel){
    $name = trim($sel->getAttribute('name'));
    $labelNode = $sel->parentNode ? $sel->parentNode->getElementsByTagName('label')->item(0) : null;
    $label = $labelNode ? trim($labelNode->textContent) : '';
    if(!$label){
      // تلاش دیگر: previous sibling text
      $label = trim($sel->getAttribute('id') ?: $name);
    }
    $vals = [];
    foreach($sel->getElementsByTagName('option') as $opt){
      $id = $opt->getAttribute('value');
      $txt= trim($opt->textContent);
      if($id==='') continue;
      $vals[] = ['id'=>$id, 'label'=>$txt, 'price'=>null];
    }
    if($vals){
      $options[] = [
        'name' => $label ?: $name,
        'type' => 'select',
        'required' => (bool)$sel->getAttribute('required'),
        'values' => $vals
      ];
    }
  }

  // RADIOs
  $radios = $xp->query('//form//input[@type="radio" and contains(@name,"option")]');
  if($radios && $radios->length){
    // گروه‌بندی بر اساس name
    $groups = [];
    foreach($radios as $r){
      $nm = $r->getAttribute('name');
      if(!isset($groups[$nm])) $groups[$nm] = [];
      $label = '';
      // سعی برای یافتن label متناظر
      if($r->parentNode){
        $label = trim(strip_tags(inner_html($r->parentNode)));
      }
      $groups[$nm][] = ['id'=>$r->getAttribute('value'), 'label'=>trim($label), 'price'=>null];
    }
    foreach($groups as $nm=>$vals){
      $options[] = ['name'=>$nm, 'type'=>'radio', 'required'=>false, 'values'=>$vals];
    }
  }

  // CHECKBOXes
  $checks = $xp->query('//form//input[@type="checkbox" and contains(@name,"option")]');
  if($checks && $checks->length){
    $groups = [];
    foreach($checks as $c){
      $nm = $c->getAttribute('name');
      if(!isset($groups[$nm])) $groups[$nm] = [];
      $label = '';
      if($c->parentNode){
        $label = trim(strip_tags(inner_html($c->parentNode)));
      }
      $groups[$nm][] = ['id'=>$c->getAttribute('value'), 'label'=>trim($label), 'price'=>null];
    }
    foreach($groups as $nm=>$vals){
      $options[] = ['name'=>$nm, 'type'=>'checkbox', 'required'=>false, 'values'=>$vals];
    }
  }

  return $options;
}

/* --- Stock Text --- */
function extract_stock_text($html, DOMXPath $xp){
  // DOM-based
  $nodes = $xp->query('//*[contains(@class,"stock") or contains(@class,"availability")]');
  if($nodes && $nodes->length){
    $t = trim(preg_replace('/\s+/u',' ', $nodes->item(0)->textContent));
    if($t!=='') return $t;
  }
  // regex fallback
  if(preg_match('#>(ناموجود|اتمام موجودی|تمام شد|موجود نیست)<#u', $html, $m)) return $m[1];
  return null;
}

/* --- Price Resolver --- */
function resolve_price(array $prices, array $jsonldProduct){
  $price_toman = null; $price_raw = null; $currency = null;
  if(!empty($prices['available_price']) || !empty($prices['price_with_discount'])){
    $price_toman = (int)($prices['available_price'] ?: $prices['price_with_discount']);
    $price_raw   = (string)($price_toman * 10);
    $currency    = 'IRR';
  } else {
    // JSON-LD: ممکن است offers آرایه یا آبجکت باشد
    $offers = $jsonldProduct['offers'] ?? [];
    if(isset($offers['price'])){
      $price_raw = (string)num_clean($offers['price']);
      $currency  = $offers['priceCurrency'] ?? 'IRR';
      $price_toman = (int)round(((int)$price_raw)/10);
    } elseif (is_array($offers) && isset($offers[0]['price'])) {
      $price_raw = (string)num_clean($offers[0]['price']);
      $currency  = $offers[0]['priceCurrency'] ?? 'IRR';
      $price_toman = (int)round(((int)$price_raw)/10);
    }
  }
  return [$price_toman, $price_raw, $currency];
}

/* --- Cleaning --- */
function clean_html($h){
  $h = preg_replace('#<script\b[^<]*(?:(?!</script>)<[^<]*)*</script>#is', '', $h);
  $h = preg_replace('#<style\b[^<]*(?:(?!</style>)<[^<]*)*</style>#is', '', $h);
  $h = preg_replace('/\son\w+="[^"]*"/i', '', $h);
  $h = preg_replace("/\son\w+='[^']*'/i", '', $h);
  return trim($h);
}