<?php
/**
 * Saalambabaa Category Product List Webservice
 *
 * Usage example:
 *   sbaba_category.php?url=https://saalambabaa.com/96-samsung-tv&pages=3
 *
 * Output: JSON with product name + url for all pages.
 */

header('Content-Type: application/json; charset=utf-8');

/* ===================== Helpers ===================== */

function cat_json_response($data, $code = 200) {
    http_response_code($code);
    echo json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);
    exit;
}

function cat_fetch_url($url) {
    $url = trim($url);

    if (!preg_match('~^https?://~i', $url)) {
        $url = 'https://' . ltrim($url, '/');
    }

    $parts = parse_url($url);
    if (!$parts || empty($parts['host'])) {
        return [null, 'Invalid URL'];
    }

    // Optional: lock to saalambabaa.com
    if (strpos($parts['host'], 'saalambabaa.com') === false &&
        strpos($parts['host'], 'salambabaa.com') === false) {
        return [null, 'This API is limited to saalambabaa.com'];
    }

    if (function_exists('curl_init')) {
        $ch = curl_init();
        curl_setopt_array($ch, [
            CURLOPT_URL            => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_FOLLOWLOCATION => true,
            CURLOPT_MAXREDIRS      => 5,
            CURLOPT_TIMEOUT        => 20,
            CURLOPT_CONNECTTIMEOUT => 10,
            CURLOPT_USERAGENT      => 'Mozilla/5.0 (compatible; sbaba-category-bot/1.0)',
            CURLOPT_SSL_VERIFYPEER => false,
            CURLOPT_SSL_VERIFYHOST => false,
        ]);

        $html = curl_exec($ch);
        $err  = curl_error($ch);
        $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($html === false || $code >= 400) {
            return [null, "HTTP error: $code / $err"];
        }

        return [$html, null];
    }

    // Fallback بدون cURL
    $context = stream_context_create([
        'http' => [
            'method'  => 'GET',
            'timeout' => 20,
            'header'  => "User-Agent: Mozilla/5.0 (compatible; sbaba-category-bot/1.0)\r\n",
        ]
    ]);

    $html = @file_get_contents($url, false, $context);
    if ($html === false) {
        return [null, 'Failed to fetch HTML (file_get_contents)'];
    }

    return [$html, null];
}

/**
 * Build URL for a specific page.
 * - page 1  => base URL as-is
 * - page 2+ => add or replace "page" query param
 */
function cat_build_page_url($baseUrl, $page) {
    if ($page <= 1) {
        return $baseUrl;
    }

    $parts = parse_url($baseUrl);
    $query = [];

    if (isset($parts['query'])) {
        parse_str($parts['query'], $query);
    }

    // Set / override page
    $query['page'] = $page;

    // Rebuild query string
    $parts['query'] = http_build_query($query);

    // Build final URL
    $scheme   = isset($parts['scheme']) ? $parts['scheme'] . '://' : '';
    $host     = isset($parts['host']) ? $parts['host'] : '';
    $port     = isset($parts['port']) ? ':' . $parts['port'] : '';
    $path     = isset($parts['path']) ? $parts['path'] : '';
    $queryStr = $parts['query'] ? '?' . $parts['query'] : '';
    $fragment = isset($parts['fragment']) ? '#' . $parts['fragment'] : '';

    return $scheme . $host . $port . $path . $queryStr . $fragment;
}

/**
 * Recursively collect all Product nodes from decoded JSON-LD data.
 */
function cat_collect_products_from_node($node, &$products) {
    if (!is_array($node)) {
        return;
    }

    // If this node is a Product
    if (isset($node['@type'])) {
        $types = is_array($node['@type']) ? $node['@type'] : [$node['@type']];
        if (in_array('Product', $types, true)) {
            $products[] = $node;
        }
    }

    // Recurse into children
    foreach ($node as $value) {
        if (is_array($value)) {
            cat_collect_products_from_node($value, $products);
        }
    }
}

/**
 * Extract minimal product list (name + url) from HTML using JSON-LD.
 */
function cat_extract_products_from_html($html, $pageNumber) {
    $result = [];

    if (!preg_match_all('~<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>~is', $html, $matches)) {
        return $result;
    }

    $rawProducts = [];
    foreach ($matches[1] as $block) {
        $json = trim(html_entity_decode($block, ENT_QUOTES | ENT_HTML5, 'UTF-8'));
        if ($json === '') {
            continue;
        }

        // Remove possible trailing semicolon
        $json = preg_replace('~;\s*$~', '', $json);

        $data = json_decode($json, true);
        if (!is_array($data)) {
            continue;
        }

        cat_collect_products_from_node($data, $rawProducts);
    }

    // Deduplicate by name+url
    $seen = [];
    foreach ($rawProducts as $p) {
        $name = isset($p['name']) ? trim($p['name']) : null;

        // URL can be in "url" or "@id"
        $url  = null;
        if (isset($p['url']) && is_string($p['url'])) {
            $url = trim($p['url']);
        } elseif (isset($p['@id']) && is_string($p['@id'])) {
            $url = trim($p['@id']);
        }

        if ($name === null || $name === '' || $url === null || $url === '') {
            continue;
        }

        $key = $name . '|' . $url;
        if (isset($seen[$key])) {
            continue;
        }
        $seen[$key] = true;

        $result[] = [
            'name' => $name,
            'url'  => $url,
            'page' => $pageNumber,
        ];
    }

    return $result;
}

/* ===================== Main ===================== */

$baseUrl = isset($_GET['url']) ? trim($_GET['url']) : '';
$pages   = isset($_GET['pages']) ? (int)$_GET['pages'] : 1;

if ($baseUrl === '') {
    cat_json_response([
        'success' => false,
        'error'   => 'Missing "url" parameter. Example: ?url=https://saalambabaa.com/96-samsung-tv&pages=2',
    ], 400);
}

if ($pages < 1) {
    $pages = 1;
}

$allProducts   = [];
$pagesScanned  = [];
$globalSeen    = [];

// Loop over pages
for ($page = 1; $page <= $pages; $page++) {
    $pageUrl = cat_build_page_url($baseUrl, $page);
    $pagesScanned[] = $pageUrl;

    list($html, $err) = cat_fetch_url($pageUrl);
    if ($html === null) {
        // Skip this page but note error
        $allProducts[] = [
            '_error_page' => $pageUrl,
            '_error_msg'  => $err,
        ];
        continue;
    }

    $products = cat_extract_products_from_html($html, $page);

    // Global deduplicate by name+url across pages
    foreach ($products as $prod) {
        $key = $prod['name'] . '|' . $prod['url'];
        if (isset($globalSeen[$key])) {
            continue;
        }
        $globalSeen[$key] = true;
        $allProducts[] = $prod;
    }
}

// Final JSON output
cat_json_response([
    'success'       => true,
    'source_url'    => $baseUrl,
    'pages_requested' => $pages,
    'pages_scanned' => $pagesScanned,
    'products'      => $allProducts,
]);
