<?php
declare(strict_types=1);
header('Content-Type: application/json; charset=utf-8');
error_reporting(E_ALL);
ini_set('display_errors', '0');
set_time_limit(90);

/* ---------- helpers ---------- */
function respond($status, array $payload) {
  http_response_code($status);
  echo json_encode($payload, JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES|JSON_PRETTY_PRINT);
  exit;
}

// نرمال‌سازی URL با پشتیبانی از مسیر/کوئری فارسی و هاست IDN
function normalizeUrl(string $url): string {
  $url = trim($url);
  // اگر کاربر لینک را قبلاً درصد-کُد کرده، همین را استفاده می‌کنیم
  // ولی برای ایمنی، دوباره قطعه‌ها را بررسی و فقط حروف غیرمجاز را کُد می‌کنیم.
  $p = @parse_url($url);
  if (!$p || empty($p['scheme']) || empty($p['host'])) return $url;

  $scheme = strtolower($p['scheme']);
  $host   = $p['host'];

  // هاست IDN → ASCII
  if (function_exists('idn_to_ascii')) {
    $host_ascii = @idn_to_ascii($host, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);
    if ($host_ascii) $host = $host_ascii;
  }

  // مسیر: هر segment را جداگانه rawurlencode کن (اگر قبلاً encode شده، تکراری نمی‌شود)
  $path = isset($p['path']) ? $p['path'] : '';
  if ($path !== '') {
    $segments = explode('/', $path);
    foreach ($segments as &$seg) {
      if ($seg === '' || $seg === '.') continue;
      // decode → encode تا یکنواخت شود
      $seg = rawurlencode(rawurldecode($seg));
    }
    $path = implode('/', $segments);
    if ($path === '') $path = '/';
  } else {
    $path = '/';
  }

  // کوئری: کلید/مقدارها را بازسازی کنیم
  $query = '';
  if (!empty($p['query'])) {
    $queryParts = [];
    foreach (explode('&', $p['query']) as $pair) {
      if ($pair === '') continue;
      $kv = explode('=', $pair, 2);
      $k  = rawurlencode(rawurldecode($kv[0]));
      $v  = isset($kv[1]) ? rawurlencode(rawurldecode($kv[1])) : '';
      $queryParts[] = $k . ($v!=='' ? '=' . $v : '');
    }
    $query = implode('&', $queryParts);
  }

  $port = isset($p['port']) ? ':' . (int)$p['port'] : '';
  $frag = isset($p['fragment']) ? '#' . rawurlencode(rawurldecode($p['fragment'])) : '';

  return $scheme . '://' . $host . $port . $path . ($query!=='' ? '?' . $query : '') . $frag;
}

function httpGet($url, $timeout=25) {
  $url = normalizeUrl($url);
  $ch = curl_init($url);
  curl_setopt_array($ch, [
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_FOLLOWLOCATION => true,
    CURLOPT_MAXREDIRS      => 5,
    CURLOPT_CONNECTTIMEOUT => 12,
    CURLOPT_TIMEOUT        => $timeout,
    CURLOPT_USERAGENT      => 'Mozilla/5.0 (compatible; StarPhoneListScraper/1.1)',
    CURLOPT_HTTPHEADER     => [
      'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
      'Accept-Language: fa,en;q=0.9'
    ],
    CURLOPT_ENCODING       => '' // enable gzip/deflate
  ]);
  $body = curl_exec($ch);
  $err  = curl_error($ch);
  $code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
  curl_close($ch);
  return [$code,$body,$err];
}

function absUrl($base, $href) {
  if (!$href) return '';
  if (preg_match('#^https?://#i', $href)) return normalizeUrl($href);
  $pu = parse_url($base);
  if (!$pu) return normalizeUrl($href);
  $scheme = $pu['scheme'] ?? 'https';
  $host   = $pu['host'] ?? '';
  $port   = isset($pu['port']) ? ':'.$pu['port'] : '';
  $basePath = isset($pu['path']) ? preg_replace('#/[^/]*$#','/',$pu['path']) : '/';
  if (strpos($href,'/')===0) $path = $href; else $path = rtrim($basePath,'/').'/'.$href;

  // normalize /../ and /./
  $parts = [];
  foreach (explode('/', $path) as $part) {
    if ($part==='' || $part === '.') continue;
    if ($part === '..') { array_pop($parts); continue; }
    $parts[] = $part;
  }
  $path = '/' . implode('/', $parts);
  return normalizeUrl($scheme.'://'.$host.$port.$path);
}

function makePageUrl($base, $page) {
  if ($page <= 1) return normalizeUrl($base);

  // اگر از قبل query دارد، page را اضافه/به‌روز کنیم
  $p = parse_url($base);
  $query = [];
  if (!empty($p['query'])) {
    parse_str($p['query'], $query);
  }
  $query['page'] = $page;

  $scheme = $p['scheme'] ?? 'https';
  $host   = $p['host'] ?? '';
  $port   = isset($p['port']) ? ':' . (int)$p['port'] : '';
  $path   = $p['path'] ?? '/';

  // بازسازی query با http_build_query (خروجی percent-encode می‌شود)
  $qs = http_build_query($query, '', '&', PHP_QUERY_RFC3986);

  return normalizeUrl($scheme.'://'.$host.$port.$path.($qs ? '?'.$qs : ''));
}

function textify($s) {
  return trim(preg_replace('/\s+/u',' ', is_string($s)?$s:$s->textContent));
}

/* ---------- JSON-LD parser ---------- */
function extractJsonLdBlocks($html) {
  $out=[];
  if (preg_match_all('#<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>#is', $html, $m)) {
    foreach ($m[1] as $raw) {
      $raw = trim(preg_replace('#/\*.*?\*/#s','',$raw));
      $json = json_decode($raw, true);
      if ($json===null) {
        $json = json_decode(preg_replace('/[\x00-\x1F]/','',$raw), true);
      }
      if ($json!==null) $out[] = $json;
    }
  }
  return $out;
}

function parseProductsFromJsonLd($html, $baseUrl='') {
  $items = [];
  foreach (extractJsonLdBlocks($html) as $blk) {
    $queue = [$blk];
    while ($queue) {
      $node = array_shift($queue);
      if (is_array($node)) {
        if (isset($node['@type'])) {
          $tp = is_string($node['@type']) ? $node['@type'] : implode(',', (array)$node['@type']);
          if (stripos($tp,'ProductCollection')!==false) {
            $offers = $node['offers'] ?? null;
            if (isset($offers['offers']) && is_array($offers['offers'])) $offers = $offers['offers'];
            if (is_array($offers)) {
              foreach ($offers as $of) {
                $url  = $of['url'] ?? ($of['itemOffered']['url'] ?? '');
                $name = $of['name'] ?? ($of['itemOffered']['name'] ?? '');
                if ($url)  $url  = absUrl($baseUrl, $url);
                if ($name || $url) $items[] = ['name'=>trim((string)$name), 'url'=>$url];
              }
            }
          }
        }
        foreach ($node as $k=>$v) if (is_array($v)) $queue[] = $v;
      }
    }
  }
  // unique by URL
  $seen=[]; $out=[];
  foreach ($items as $it) {
    $key = $it['url'] ?: $it['name'];
    if ($key && !isset($seen[$key])) { $seen[$key]=1; $out[]=$it; }
  }
  return $out;
}

/* ---------- DOM fallback ---------- */
function parseProductsFromDom($html, $baseUrl='') {
  $dom = new DOMDocument();
  @$dom->loadHTML($html, LIBXML_NOERROR|LIBXML_NOWARNING);
  $xp  = new DOMXPath($dom);
  $nodes = $xp->query("//a[contains(@class,'s-link')]");
  $out=[];
  if ($nodes && $nodes->length) {
    foreach ($nodes as $a) {
      /** @var DOMElement $a */
      $name = textify($a);
      $href = $a->getAttribute('href');
      $url  = absUrl($baseUrl, $href);
      if ($url || $name) $out[] = ['name'=>$name, 'url'=>$url];
    }
  }
  // unique
  $seen=[]; $uniq=[];
  foreach ($out as $it) {
    $key = $it['url'] ?: $it['name'];
    if ($key && !isset($seen[$key])) { $seen[$key]=1; $uniq[]=$it; }
  }
  return $uniq;
}

/* ---------- controller ---------- */
$input = isset($_GET['url']) ? trim($_GET['url']) : '';
$pages = isset($_GET['pages']) ? (int)$_GET['pages'] : 1;
if ($input==='') respond(400, ['error'=>'Missing required parameter: url']);

$base = normalizeUrl($input);
// اعتبارسنجی سبک: فقط scheme/host را چک کن تا URLهای UTF-8 رد نشوند
$pu = @parse_url($base);
if (!$pu || empty($pu['scheme']) || empty($pu['host'])) {
  respond(400, ['error'=>'Invalid URL (parsed)']);
}

if ($pages<1) $pages=1;
$pages = min($pages, 100);

$all = [];
$errors = [];
for ($i=1; $i<=$pages; $i++) {
  $url = makePageUrl($base, $i);
  list($code,$html,$err) = httpGet($url);
  if ($err || $code>=400 || !$html) {
    $errors[] = ['page'=>$i, 'url'=>$url, 'error'=>$err ?: ('HTTP '.$code)];
    continue;
  }
  $items = parseProductsFromJsonLd($html, $url);
  if (empty($items)) $items = parseProductsFromDom($html, $url);
  if (empty($items)) {
    $errors[] = ['page'=>$i, 'url'=>$url, 'warning'=>'No products detected'];
  } else {
    $all = array_merge($all, $items);
  }
}

// de-dup across pages
$seen=[]; $products=[];
foreach ($all as $it) {
  $key = $it['url'] ?: ($it['name'].'#'.count($products));
  if ($key && !isset($seen[$key])) { $seen[$key]=1; $products[]=$it; }
}

$out = [
  'source' => $base,
  'pages_requested' => $pages,
  'count' => count($products),
  'products' => $products,
  'errors' => $errors
];

respond(200, $out);