/**
 * AskEngine — Content Extraction Engine
 * 
 * EXTRACTION STRATEGY (priority order):
 * 1. JSON-LD structured data (schema.org) — most reliable, pre-structured
 * 2. Open Graph / meta tags — good summary data
 * 3. Microdata (itemscope/itemprop) — inline structured data
 * 4. Platform-specific adapters (Amazon, eBay, Shopify, etc.)
 * 5. Semantic HTML heuristics (article, main, product cards, listings)
 * 6. Generic DOM text extraction — fallback
 * 
 * EDGE CASES HANDLED:
 * - SPAs (React, Vue, Angular) with virtual DOM / lazy loading
 * - Infinite scroll / paginated content
 * - Shadow DOM components
 * - iframe-embedded content
 * - Dynamically loaded content (MutationObserver)
 * - Anti-scraping obfuscation (CSS hiding, font-based encoding)
 * - Cookie walls / consent banners (skip, don't extract)
 * - Image-heavy pages with alt text as primary data
 * - Tables (product comparisons, pricing tables)
 * - Multi-language sites
 * - Minified / obfuscated class names (Tailwind, CSS modules, etc.)
 */

class AskEngineExtractor {
  constructor() {
    this.siteType = null;
    this.extractedData = null;
    this.lastExtractionTime = 0;
    this.mutationDebounce = null;
    this.observer = null;
  }

  // =========================================================================
  // MAIN EXTRACTION PIPELINE
  // =========================================================================

  async extractPageContent() {
    const startTime = performance.now();
    const url = window.location.href;
    const hostname = window.location.hostname;

    // Detect site type for specialized extraction
    this.siteType = this.detectSiteType(hostname, url);

    // Build extraction result with all available methods
    const result = {
      url,
      hostname,
      siteType: this.siteType,
      title: document.title,
      timestamp: Date.now(),
      structured: null,   // JSON-LD / microdata
      meta: null,          // OG tags, meta description
      items: [],           // Extracted product/listing items
      content: null,       // Main text content
      navigation: null,    // Site navigation structure
      extractionMs: 0
    };

    // Layer 1: Structured data (highest confidence)
    result.structured = this.extractStructuredData();

    // Layer 2: Meta tags
    result.meta = this.extractMetaTags();

    // Layer 3: Platform-specific adapter
    const adapterItems = this.runPlatformAdapter();
    if (adapterItems && adapterItems.length > 0) {
      result.items = adapterItems;
    }

    // Layer 4: Semantic HTML extraction (if adapter didn't find enough)
    if (result.items.length < 3) {
      const semanticItems = this.extractSemanticItems();
      // Merge, deduplicating by text similarity
      result.items = this.mergeAndDedupe(result.items, semanticItems);
    }

    // Layer 5: If still not much, try generic card/list detection
    if (result.items.length < 3) {
      const genericItems = this.extractGenericListings();
      result.items = this.mergeAndDedupe(result.items, genericItems);
    }

    // Layer 6: Main content text (for non-listing pages like articles, docs)
    result.content = this.extractMainContent();

    // Layer 7: Navigation context
    result.navigation = this.extractNavigation();

    // Check for Shadow DOM content
    const shadowItems = this.extractFromShadowDOM(document.body);
    if (shadowItems.length > 0) {
      result.items = this.mergeAndDedupe(result.items, shadowItems);
    }

    result.extractionMs = Math.round(performance.now() - startTime);
    this.extractedData = result;
    this.lastExtractionTime = Date.now();

    return result;
  }

  // =========================================================================
  // LAYER 1: JSON-LD & STRUCTURED DATA
  // =========================================================================

  extractStructuredData() {
    const results = [];

    // JSON-LD scripts
    const jsonLdScripts = document.querySelectorAll('script[type="application/ld+json"]');
    jsonLdScripts.forEach(script => {
      try {
        const data = JSON.parse(script.textContent);
        // Handle both single objects and arrays
        const items = Array.isArray(data) ? data : 
                      data['@graph'] ? data['@graph'] : [data];
        
        items.forEach(item => {
          if (item['@type']) {
            results.push({
              type: item['@type'],
              source: 'json-ld',
              data: this.flattenJsonLd(item)
            });
          }
        });
      } catch (e) {
        // Malformed JSON-LD, skip
      }
    });

    // Microdata (itemscope/itemprop)
    const microdataItems = document.querySelectorAll('[itemscope]');
    microdataItems.forEach(el => {
      const type = el.getAttribute('itemtype') || '';
      const props = {};
      el.querySelectorAll('[itemprop]').forEach(prop => {
        const name = prop.getAttribute('itemprop');
        const value = prop.getAttribute('content') || 
                      prop.getAttribute('href') ||
                      prop.textContent?.trim();
        if (name && value) {
          props[name] = value.substring(0, 500);
        }
      });
      if (Object.keys(props).length > 0) {
        results.push({
          type: type.split('/').pop(),
          source: 'microdata',
          data: props
        });
      }
    });

    // RDFa
    const rdfaItems = document.querySelectorAll('[typeof]');
    rdfaItems.forEach(el => {
      const type = el.getAttribute('typeof');
      const props = {};
      el.querySelectorAll('[property]').forEach(prop => {
        const name = prop.getAttribute('property');
        const value = prop.getAttribute('content') || prop.textContent?.trim();
        if (name && value) {
          props[name] = value.substring(0, 500);
        }
      });
      if (Object.keys(props).length > 0) {
        results.push({ type, source: 'rdfa', data: props });
      }
    });

    return results.length > 0 ? results : null;
  }

  flattenJsonLd(obj, prefix = '', depth = 0) {
    if (depth > 5) return {};
    const flat = {};
    for (const [key, value] of Object.entries(obj)) {
      if (key.startsWith('@') && key !== '@type') continue;
      const fullKey = prefix ? `${prefix}.${key}` : key;
      if (value && typeof value === 'object' && !Array.isArray(value)) {
        Object.assign(flat, this.flattenJsonLd(value, fullKey, depth + 1));
      } else if (Array.isArray(value)) {
        flat[fullKey] = value.map(v => 
          typeof v === 'object' ? JSON.stringify(v).substring(0, 200) : String(v)
        ).join(', ');
      } else {
        flat[fullKey] = String(value).substring(0, 500);
      }
    }
    return flat;
  }

  // =========================================================================
  // LAYER 2: META TAGS
  // =========================================================================

  extractMetaTags() {
    const meta = {};
    
    // Standard meta
    const desc = document.querySelector('meta[name="description"]');
    if (desc) meta.description = desc.getAttribute('content');

    const keywords = document.querySelector('meta[name="keywords"]');
    if (keywords) meta.keywords = keywords.getAttribute('content');

    // Open Graph
    document.querySelectorAll('meta[property^="og:"]').forEach(el => {
      const prop = el.getAttribute('property').replace('og:', '');
      meta[`og_${prop}`] = el.getAttribute('content');
    });

    // Twitter Card
    document.querySelectorAll('meta[name^="twitter:"]').forEach(el => {
      const prop = el.getAttribute('name').replace('twitter:', '');
      meta[`twitter_${prop}`] = el.getAttribute('content');
    });

    // Article-specific
    document.querySelectorAll('meta[property^="article:"]').forEach(el => {
      const prop = el.getAttribute('property').replace('article:', '');
      meta[`article_${prop}`] = el.getAttribute('content');
    });

    return Object.keys(meta).length > 0 ? meta : null;
  }

  // =========================================================================
  // LAYER 3: PLATFORM-SPECIFIC ADAPTERS
  // =========================================================================

  detectSiteType(hostname, url) {
    const h = hostname.toLowerCase();
    
    // E-commerce platforms
    if (h.includes('amazon.')) return 'amazon';
    if (h.includes('ebay.')) return 'ebay';
    if (h.includes('etsy.')) return 'etsy';
    if (h.includes('walmart.')) return 'walmart';
    if (h.includes('target.')) return 'target';
    if (h.includes('bestbuy.')) return 'bestbuy';
    
    // Shopify detection (check for Shopify-specific globals)
    if (document.querySelector('meta[name="shopify-checkout-api-token"]') ||
        document.querySelector('link[href*="cdn.shopify.com"]') ||
        window.Shopify) return 'shopify';
    
    // WooCommerce
    if (document.querySelector('body.woocommerce') ||
        document.querySelector('.woocommerce-page') ||
        document.querySelector('link[href*="woocommerce"]')) return 'woocommerce';
    
    // Real estate
    if (h.includes('zillow.')) return 'zillow';
    if (h.includes('realtor.')) return 'realtor';
    if (h.includes('redfin.')) return 'redfin';
    
    // Jobs
    if (h.includes('indeed.')) return 'indeed';
    if (h.includes('linkedin.')) return 'linkedin';
    if (h.includes('glassdoor.')) return 'glassdoor';
    
    // Food / restaurants
    if (h.includes('yelp.')) return 'yelp';
    if (h.includes('doordash.')) return 'doordash';
    if (h.includes('ubereats.')) return 'ubereats';
    if (h.includes('grubhub.')) return 'grubhub';
    
    // Content
    if (h.includes('reddit.')) return 'reddit';
    if (h.includes('wikipedia.')) return 'wikipedia';
    if (h.includes('medium.')) return 'medium';
    if (h.includes('github.')) return 'github';
    if (h.includes('stackoverflow.')) return 'stackoverflow';

    // Booking/travel
    if (h.includes('booking.com')) return 'booking';
    if (h.includes('airbnb.')) return 'airbnb';
    
    // Generic CMS detection
    if (document.querySelector('meta[name="generator"][content*="WordPress"]')) return 'wordpress';
    if (document.querySelector('meta[name="generator"][content*="Squarespace"]')) return 'squarespace';
    if (document.querySelector('meta[name="generator"][content*="Wix"]')) return 'wix';
    
    return 'generic';
  }

  runPlatformAdapter() {
    const adapters = {
      'shopify': () => this.extractShopify(),
      'woocommerce': () => this.extractWooCommerce(),
      'amazon': () => this.extractAmazon(),
      'ebay': () => this.extractEbay(),
      'zillow': () => this.extractGenericListings(), // Zillow is heavily JS-rendered
      'indeed': () => this.extractIndeed(),
      'yelp': () => this.extractYelp(),
      'github': () => this.extractGitHub(),
      'reddit': () => this.extractReddit(),
      'wikipedia': () => this.extractWikipedia(),
    };

    const adapter = adapters[this.siteType];
    if (adapter) {
      try {
        return adapter();
      } catch (e) {
        console.warn('[AskEngine] Adapter failed, falling back:', e);
        return [];
      }
    }
    return [];
  }

  // --- Shopify ---
  extractShopify() {
    const items = [];
    
    // Shopify product JSON (often embedded in page)
    const productJson = document.querySelector('script[data-product-json], script#ProductJson');
    if (productJson) {
      try {
        const product = JSON.parse(productJson.textContent);
        items.push(this.normalizeItem({
          name: product.title,
          price: product.price ? (product.price / 100).toFixed(2) : null,
          description: product.description?.replace(/<[^>]*>/g, ''),
          image: product.featured_image,
          variants: product.variants?.map(v => v.title).join(', '),
          available: product.available,
          vendor: product.vendor,
          type: product.type
        }));
      } catch (e) {}
    }

    // Collection/listing pages - product cards
    document.querySelectorAll('.product-card, .grid-product, .product-item, .product, [class*="ProductCard"], [class*="product-card"]').forEach(card => {
      items.push(this.extractCardData(card));
    });

    return items.filter(Boolean);
  }

  // --- WooCommerce ---
  extractWooCommerce() {
    const items = [];
    document.querySelectorAll('.product, .type-product, li.product').forEach(card => {
      const name = card.querySelector('.woocommerce-loop-product__title, .product-title, h2, h3')?.textContent?.trim();
      const price = card.querySelector('.price, .woocommerce-Price-amount')?.textContent?.trim();
      const image = card.querySelector('img')?.src;
      const link = card.querySelector('a[href]')?.href;
      const rating = card.querySelector('.star-rating')?.getAttribute('aria-label');
      
      if (name) {
        items.push(this.normalizeItem({ name, price, image, link, rating }));
      }
    });
    return items;
  }

  // --- Amazon ---
  extractAmazon() {
    const items = [];
    
    // Search results
    document.querySelectorAll('[data-component-type="s-search-result"], .s-result-item').forEach(card => {
      const name = card.querySelector('h2 a span, .a-text-normal')?.textContent?.trim();
      const price = card.querySelector('.a-price .a-offscreen')?.textContent?.trim();
      const rating = card.querySelector('.a-icon-alt')?.textContent?.trim();
      const image = card.querySelector('.s-image, img[data-image-latency]')?.src;
      const link = card.querySelector('h2 a')?.href;
      const reviews = card.querySelector('.a-size-base.s-underline-text')?.textContent?.trim();
      const prime = !!card.querySelector('[aria-label*="Prime"], .a-icon-prime');
      
      if (name) {
        items.push(this.normalizeItem({ name, price, rating, image, link, reviews, prime: prime ? 'Prime eligible' : null }));
      }
    });

    // Product detail page
    if (items.length === 0) {
      const name = document.querySelector('#productTitle')?.textContent?.trim();
      const price = document.querySelector('#priceblock_ourprice, #priceblock_dealprice, .a-price .a-offscreen')?.textContent?.trim();
      if (name) {
        items.push(this.normalizeItem({
          name,
          price,
          rating: document.querySelector('#acrPopover')?.getAttribute('title'),
          image: document.querySelector('#landingImage, #imgBlkFront')?.src,
          description: document.querySelector('#feature-bullets')?.textContent?.trim()?.substring(0, 500)
        }));
      }
    }
    return items;
  }

  // --- eBay ---
  extractEbay() {
    const items = [];
    document.querySelectorAll('.s-item, .srp-results .s-item__wrapper').forEach(card => {
      const name = card.querySelector('.s-item__title')?.textContent?.trim();
      const price = card.querySelector('.s-item__price')?.textContent?.trim();
      const image = card.querySelector('.s-item__image img')?.src;
      const link = card.querySelector('.s-item__link')?.href;
      const condition = card.querySelector('.SECONDARY_INFO')?.textContent?.trim();
      const shipping = card.querySelector('.s-item__shipping')?.textContent?.trim();
      
      if (name && name !== 'Shop on eBay') {
        items.push(this.normalizeItem({ name, price, image, link, condition, shipping }));
      }
    });
    return items;
  }

  // --- Indeed ---
  extractIndeed() {
    const items = [];
    document.querySelectorAll('.job_seen_beacon, .jobsearch-ResultsList > li, [class*="jobCard"]').forEach(card => {
      const title = card.querySelector('.jobTitle, [class*="jobTitle"] a, h2')?.textContent?.trim();
      const company = card.querySelector('.companyName, [data-testid="company-name"]')?.textContent?.trim();
      const location = card.querySelector('.companyLocation, [data-testid="text-location"]')?.textContent?.trim();
      const salary = card.querySelector('.salary-snippet, [class*="salary"]')?.textContent?.trim();
      const snippet = card.querySelector('.job-snippet, [class*="job-snippet"]')?.textContent?.trim();
      const link = card.querySelector('a[href*="/viewjob"], a[data-jk]')?.href;
      
      if (title) {
        items.push(this.normalizeItem({ name: title, company, location, salary, description: snippet, link }));
      }
    });
    return items;
  }

  // --- Yelp ---
  extractYelp() {
    const items = [];
    document.querySelectorAll('[class*="businessName"], [data-testid="serp-ia-card"]').forEach((card) => {
      const container = card.closest('li, [class*="container"]') || card.parentElement?.parentElement;
      if (!container) return;
      
      const name = card.textContent?.trim();
      const rating = container.querySelector('[aria-label*="star rating"]')?.getAttribute('aria-label');
      const category = container.querySelector('[class*="categoryList"]')?.textContent?.trim();
      const priceRange = container.querySelector('[class*="priceRange"]')?.textContent?.trim();
      const neighborhood = container.querySelector('[class*="neighborhood"]')?.textContent?.trim();
      
      if (name) {
        items.push(this.normalizeItem({ name, rating, category, priceRange, neighborhood }));
      }
    });
    return items;
  }

  // --- GitHub ---
  extractGitHub() {
    const items = [];
    // Repository list (search results or user profile)
    document.querySelectorAll('.repo-list-item, [class*="Box-row"], article.border').forEach(card => {
      const name = card.querySelector('a[href*="/"]')?.textContent?.trim();
      const description = card.querySelector('p, [class*="description"]')?.textContent?.trim();
      const language = card.querySelector('[itemprop="programmingLanguage"], [class*="language"]')?.textContent?.trim();
      const stars = card.querySelector('[href*="stargazers"], [class*="star"]')?.textContent?.trim();
      const link = card.querySelector('a[href*="/"]')?.href;
      
      if (name) {
        items.push(this.normalizeItem({ name, description, language, stars, link }));
      }
    });
    return items;
  }

  // --- Reddit ---
  extractReddit() {
    const items = [];
    // New and old Reddit
    document.querySelectorAll('shreddit-post, .Post, .thing.link, [data-testid="post-container"]').forEach(post => {
      const title = post.querySelector('a[slot="title"], [data-testid="post-title"], a.title, h3')?.textContent?.trim() ||
                    post.getAttribute('post-title');
      const author = post.querySelector('[data-testid="post-author"], .author, [slot="authorName"]')?.textContent?.trim() ||
                     post.getAttribute('author');
      const score = post.querySelector('[data-testid="post-score"], .score, [slot="post-score"]')?.textContent?.trim() ||
                    post.getAttribute('score');
      const subreddit = post.querySelector('[data-testid="subreddit-name"], .subreddit, [slot="subreddit-info"]')?.textContent?.trim();
      const comments = post.querySelector('[data-testid="post-comment-count"], .comments, [slot="comment-count"]')?.textContent?.trim() ||
                       post.getAttribute('comment-count');
      
      if (title) {
        items.push(this.normalizeItem({ name: title, author, score, subreddit, comments }));
      }
    });
    return items;
  }

  // --- Wikipedia ---
  extractWikipedia() {
    const items = [];
    const content = document.querySelector('#mw-content-text .mw-parser-output');
    if (!content) return items;

    // Extract article sections
    const sections = [];
    let currentSection = { heading: 'Introduction', text: '' };
    
    for (const child of content.children) {
      if (['H2', 'H3'].includes(child.tagName)) {
        if (currentSection.text.trim()) {
          sections.push({ ...currentSection });
        }
        currentSection = {
          heading: child.textContent?.replace('[edit]', '').trim(),
          text: ''
        };
      } else if (child.tagName === 'P') {
        currentSection.text += child.textContent?.trim() + ' ';
      }
    }
    if (currentSection.text.trim()) sections.push(currentSection);

    // Infobox
    const infobox = {};
    document.querySelectorAll('.infobox th, .infobox td').forEach((cell, i, cells) => {
      if (cell.tagName === 'TH' && cells[i + 1]?.tagName === 'TD') {
        infobox[cell.textContent.trim()] = cells[i + 1].textContent.trim().substring(0, 200);
      }
    });

    items.push(this.normalizeItem({
      name: document.querySelector('#firstHeading')?.textContent?.trim(),
      description: sections.slice(0, 3).map(s => `${s.heading}: ${s.text.substring(0, 300)}`).join('\n'),
      infobox: Object.keys(infobox).length > 0 ? JSON.stringify(infobox) : null
    }));

    return items;
  }

  // =========================================================================
  // LAYER 4: SEMANTIC HTML HEURISTICS
  // =========================================================================

  extractSemanticItems() {
    const items = [];
    
    // Look for common product/card patterns
    const cardSelectors = [
      // Semantic
      'article', '[role="article"]', '[role="listitem"]',
      // Common card patterns
      '.card', '.item', '.listing', '.result', '.product',
      '[class*="card"]', '[class*="Card"]',
      '[class*="item"]', '[class*="Item"]',
      '[class*="listing"]', '[class*="Listing"]',
      '[class*="result"]', '[class*="Result"]',
      '[class*="product"]', '[class*="Product"]',
      '[class*="tile"]', '[class*="Tile"]',
    ];

    // Find the selector that returns the most consistent-looking set of items
    let bestSelector = null;
    let bestCount = 0;

    for (const selector of cardSelectors) {
      try {
        const elements = document.querySelectorAll(selector);
        // Filter to only visible elements with some content
        const visible = Array.from(elements).filter(el => {
          const rect = el.getBoundingClientRect();
          return rect.width > 50 && rect.height > 50 && el.textContent.trim().length > 10;
        });
        
        // Prefer selectors that return 3-50 similar items (typical listing pages)
        if (visible.length >= 3 && visible.length <= 100 && visible.length > bestCount) {
          // Check structural consistency — siblings should look alike
          if (this.areStructurallySimilar(visible.slice(0, 5))) {
            bestSelector = selector;
            bestCount = visible.length;
          }
        }
      } catch (e) {}
    }

    if (bestSelector) {
      document.querySelectorAll(bestSelector).forEach(card => {
        const item = this.extractCardData(card);
        if (item) items.push(item);
      });
    }

    return items;
  }

  areStructurallySimilar(elements) {
    if (elements.length < 2) return true;
    
    // Compare child tag signatures
    const signatures = elements.map(el => {
      return Array.from(el.children).map(c => c.tagName).join(',');
    });
    
    // At least 60% should share the same structure
    const freq = {};
    signatures.forEach(s => { freq[s] = (freq[s] || 0) + 1; });
    const maxFreq = Math.max(...Object.values(freq));
    return maxFreq / signatures.length >= 0.6;
  }

  extractCardData(card) {
    if (!card) return null;
    
    const data = {};
    
    // Name: first heading, or first link text, or first strong text
    data.name = card.querySelector('h1, h2, h3, h4, h5, h6')?.textContent?.trim() ||
                card.querySelector('a[href]')?.textContent?.trim() ||
                card.querySelector('strong, b, [class*="title"], [class*="name"]')?.textContent?.trim();
    
    if (!data.name || data.name.length < 2) return null;
    
    // Price: look for currency patterns
    const priceEl = card.querySelector('[class*="price"], [class*="Price"], [data-price], .amount');
    if (priceEl) {
      data.price = priceEl.textContent?.trim();
    } else {
      // Try to find price via regex in card text
      const priceMatch = card.textContent.match(/[\$€£¥₹]\s?\d[\d,]*\.?\d*/);
      if (priceMatch) data.price = priceMatch[0];
    }
    
    // Image
    const img = card.querySelector('img[src], img[data-src], img[data-lazy-src]');
    if (img) {
      data.image = img.src || img.dataset.src || img.dataset.lazySrc;
    }
    
    // Link
    const link = card.querySelector('a[href]');
    if (link) data.link = link.href;
    
    // Rating
    const ratingEl = card.querySelector('[class*="rating"], [class*="Rating"], [class*="star"], [aria-label*="star"], [aria-label*="rating"]');
    if (ratingEl) {
      data.rating = ratingEl.getAttribute('aria-label') || ratingEl.textContent?.trim();
    }
    
    // Description: paragraph text or secondary text
    const desc = card.querySelector('p, [class*="description"], [class*="desc"], [class*="snippet"], [class*="subtitle"]');
    if (desc) {
      data.description = desc.textContent?.trim()?.substring(0, 300);
    }
    
    // Additional attributes: anything else that looks like metadata
    card.querySelectorAll('[class*="tag"], [class*="badge"], [class*="label"], [class*="category"]').forEach(tag => {
      const text = tag.textContent?.trim();
      if (text && text.length < 50) {
        if (!data.tags) data.tags = [];
        data.tags.push(text);
      }
    });
    
    // Location (for listings, jobs, restaurants)
    const locationEl = card.querySelector('[class*="location"], [class*="Location"], [class*="address"], address');
    if (locationEl) {
      data.location = locationEl.textContent?.trim();
    }

    return this.normalizeItem(data);
  }

  // =========================================================================
  // LAYER 5: GENERIC LISTING DETECTION (AI-like heuristic)
  // =========================================================================

  extractGenericListings() {
    const items = [];
    
    // Strategy: Find repeated sibling elements that look like a list
    const containers = document.querySelectorAll('ul, ol, [role="list"], .grid, [class*="grid"], [class*="Grid"], [class*="list"], [class*="List"], main, [role="main"]');
    
    let bestContainer = null;
    let bestChildCount = 0;
    
    containers.forEach(container => {
      const children = Array.from(container.children).filter(c => {
        const rect = c.getBoundingClientRect();
        return rect.width > 50 && rect.height > 30 && c.textContent.trim().length > 10;
      });
      
      if (children.length >= 3 && children.length > bestChildCount && this.areStructurallySimilar(children.slice(0, 8))) {
        bestContainer = container;
        bestChildCount = children.length;
      }
    });
    
    if (bestContainer) {
      Array.from(bestContainer.children).forEach(child => {
        const item = this.extractCardData(child);
        if (item) items.push(item);
      });
    }
    
    // Fallback: try extracting from tables
    if (items.length < 3) {
      const tableItems = this.extractFromTables();
      items.push(...tableItems);
    }
    
    return items;
  }

  extractFromTables() {
    const items = [];
    
    document.querySelectorAll('table').forEach(table => {
      const headers = Array.from(table.querySelectorAll('thead th, tr:first-child th')).map(th => th.textContent?.trim());
      if (headers.length < 2) return;
      
      table.querySelectorAll('tbody tr, tr:not(:first-child)').forEach(row => {
        const cells = Array.from(row.querySelectorAll('td'));
        if (cells.length < 2) return;
        
        const rowData = {};
        cells.forEach((cell, i) => {
          const key = headers[i] || `column_${i}`;
          rowData[key] = cell.textContent?.trim()?.substring(0, 300);
        });
        
        // Use first column as name
        rowData.name = rowData.name || Object.values(rowData)[0];
        items.push(this.normalizeItem(rowData));
      });
    });
    
    return items;
  }

  // =========================================================================
  // LAYER 6: MAIN CONTENT EXTRACTION (for articles, docs, etc.)
  // =========================================================================

  extractMainContent() {
    // Priority: main > article > #content > .content > largest text block
    const mainEl = document.querySelector('main, [role="main"], article, #content, .content, .post-content, .entry-content, .article-body');
    
    if (mainEl) {
      return this.getCleanText(mainEl, 3000);
    }
    
    // Fallback: find the element with the most paragraph text
    let bestBlock = null;
    let bestLength = 0;
    
    document.querySelectorAll('div, section').forEach(el => {
      const paragraphs = el.querySelectorAll('p');
      const totalText = Array.from(paragraphs).reduce((sum, p) => sum + p.textContent.length, 0);
      if (totalText > bestLength && totalText > 200) {
        bestBlock = el;
        bestLength = totalText;
      }
    });
    
    if (bestBlock) {
      return this.getCleanText(bestBlock, 3000);
    }
    
    // Last resort: body text
    return this.getCleanText(document.body, 2000);
  }

  getCleanText(element, maxLength) {
    // Remove script, style, nav, header, footer, aside content
    const clone = element.cloneNode(true);
    clone.querySelectorAll('script, style, nav, header, footer, aside, [role="navigation"], [role="banner"], [role="complementary"], .nav, .header, .footer, .sidebar, .menu, .ad, .advertisement, [class*="cookie"], [class*="consent"], [class*="popup"], [class*="modal"]').forEach(el => el.remove());
    
    const text = clone.textContent
      ?.replace(/\s+/g, ' ')
      ?.trim()
      ?.substring(0, maxLength);
    
    return text || null;
  }

  // =========================================================================
  // LAYER 7: NAVIGATION EXTRACTION
  // =========================================================================

  extractNavigation() {
    const nav = {};
    
    // Breadcrumbs
    const breadcrumbs = document.querySelector('[class*="breadcrumb"], [class*="Breadcrumb"], [aria-label="breadcrumb"], nav[class*="bread"]');
    if (breadcrumbs) {
      nav.breadcrumbs = Array.from(breadcrumbs.querySelectorAll('a, span, li')).map(el => el.textContent?.trim()).filter(Boolean);
    }
    
    // Category / section
    const h1 = document.querySelector('h1');
    if (h1) nav.pageHeading = h1.textContent?.trim();
    
    // Active filters
    const activeFilters = [];
    document.querySelectorAll('[class*="active-filter"], [class*="activeFilter"], [class*="selected-filter"], .chip, [class*="applied"]').forEach(el => {
      const text = el.textContent?.trim();
      if (text && text.length < 50) activeFilters.push(text);
    });
    if (activeFilters.length > 0) nav.activeFilters = activeFilters;
    
    // Pagination info
    const pagination = document.querySelector('[class*="pagination"], [class*="Pagination"], [role="navigation"][aria-label*="page"]');
    if (pagination) {
      const current = pagination.querySelector('.active, [aria-current="page"], .current')?.textContent?.trim();
      nav.currentPage = current;
    }
    
    // Result count
    const resultCount = document.body.textContent.match(/(\d[\d,]*)\s*(?:results?|items?|products?|listings?|found)/i);
    if (resultCount) nav.totalResults = resultCount[0];
    
    return Object.keys(nav).length > 0 ? nav : null;
  }

  // =========================================================================
  // SHADOW DOM SUPPORT
  // =========================================================================

  extractFromShadowDOM(root, depth = 0) {
    if (depth > 5) return [];
    const items = [];
    
    const walk = (element) => {
      if (element.shadowRoot) {
        // Found a shadow root — extract from it
        const shadowItems = this.extractSemanticItemsFromRoot(element.shadowRoot);
        items.push(...shadowItems);
        // Recurse into shadow DOM children
        element.shadowRoot.querySelectorAll('*').forEach(child => {
          if (child.shadowRoot) {
            items.push(...this.extractFromShadowDOM(child, depth + 1));
          }
        });
      }
    };
    
    if (root.querySelectorAll) {
      root.querySelectorAll('*').forEach(walk);
    }
    
    return items;
  }

  extractSemanticItemsFromRoot(root) {
    const items = [];
    root.querySelectorAll('article, [role="article"], .card, .item, .product, [class*="card"]').forEach(card => {
      const item = this.extractCardData(card);
      if (item) items.push(item);
    });
    return items;
  }

  // =========================================================================
  // SPA / DYNAMIC CONTENT SUPPORT
  // =========================================================================

  watchForDynamicContent(callback) {
    // Debounced mutation observer for SPAs
    if (this.observer) this.observer.disconnect();
    
    this.observer = new MutationObserver((mutations) => {
      // Only re-extract if significant DOM changes occurred
      const significant = mutations.some(m => {
        return m.addedNodes.length > 0 && 
               Array.from(m.addedNodes).some(n => 
                 n.nodeType === 1 && n.textContent?.trim().length > 50
               );
      });
      
      if (significant) {
        clearTimeout(this.mutationDebounce);
        this.mutationDebounce = setTimeout(() => {
          callback();
        }, 800); // Wait for renders to settle
      }
    });
    
    this.observer.observe(document.body, {
      childList: true,
      subtree: true
    });
  }

  // Watch for URL changes (SPA navigation)
  watchForNavigation(callback) {
    let lastUrl = window.location.href;
    
    // pushState/replaceState interception
    const originalPushState = history.pushState;
    const originalReplaceState = history.replaceState;
    
    history.pushState = function(...args) {
      originalPushState.apply(this, args);
      if (window.location.href !== lastUrl) {
        lastUrl = window.location.href;
        setTimeout(callback, 500);
      }
    };
    
    history.replaceState = function(...args) {
      originalReplaceState.apply(this, args);
      if (window.location.href !== lastUrl) {
        lastUrl = window.location.href;
        setTimeout(callback, 500);
      }
    };
    
    // popstate for back/forward
    window.addEventListener('popstate', () => {
      if (window.location.href !== lastUrl) {
        lastUrl = window.location.href;
        setTimeout(callback, 500);
      }
    });
  }

  // =========================================================================
  // UTILITIES
  // =========================================================================

  normalizeItem(data) {
    if (!data || !data.name) return null;
    
    // Clean up whitespace
    for (const key of Object.keys(data)) {
      if (typeof data[key] === 'string') {
        data[key] = data[key].replace(/\s+/g, ' ').trim();
        if (data[key].length === 0) delete data[key];
      }
    }
    
    // Ensure name isn't too long
    if (data.name && data.name.length > 200) {
      data.name = data.name.substring(0, 200) + '...';
    }
    
    return data;
  }

  mergeAndDedupe(existing, incoming) {
    const seen = new Set(existing.map(i => i?.name?.toLowerCase()?.substring(0, 50)));
    const merged = [...existing];
    
    for (const item of incoming) {
      if (item && item.name) {
        const key = item.name.toLowerCase().substring(0, 50);
        if (!seen.has(key)) {
          seen.add(key);
          merged.push(item);
        }
      }
    }
    
    return merged.filter(Boolean);
  }

  // Compress extraction to fit within token limits for API calls
  compressForAPI(maxTokenEstimate = 4000) {
    if (!this.extractedData) return null;
    
    const data = this.extractedData;
    const compressed = {
      url: data.url,
      site: data.siteType,
      title: data.title,
    };
    
    // Add meta context
    if (data.meta?.description) compressed.pageDescription = data.meta.description;
    if (data.navigation?.pageHeading) compressed.heading = data.navigation.pageHeading;
    if (data.navigation?.breadcrumbs) compressed.breadcrumbs = data.navigation.breadcrumbs.join(' > ');
    if (data.navigation?.totalResults) compressed.totalResults = data.navigation.totalResults;
    
    // Add items (truncated to fit)
    if (data.items.length > 0) {
      compressed.items = data.items.slice(0, 30).map(item => {
        // Only keep the most important fields per item
        const slim = { name: item.name };
        if (item.price) slim.price = item.price;
        if (item.rating) slim.rating = item.rating;
        if (item.description) slim.desc = item.description.substring(0, 150);
        if (item.location) slim.location = item.location;
        if (item.category) slim.category = item.category;
        if (item.company) slim.company = item.company;
        if (item.salary) slim.salary = item.salary;
        if (item.link) slim.link = item.link;
        if (item.tags) slim.tags = item.tags.slice(0, 3);
        return slim;
      });
      compressed.itemCount = data.items.length;
    }
    
    // Add structured data summary
    if (data.structured && data.structured.length > 0) {
      compressed.structuredTypes = data.structured.map(s => s.type);
      // Include first structured item's data
      const firstStructured = data.structured[0]?.data;
      if (firstStructured) {
        compressed.structuredData = Object.fromEntries(
          Object.entries(firstStructured).slice(0, 10)
        );
      }
    }
    
    // Add main content if no items (article/doc page)
    if (data.items.length === 0 && data.content) {
      compressed.content = data.content.substring(0, 2000);
    }
    
    // Rough token estimate check
    const jsonStr = JSON.stringify(compressed);
    if (jsonStr.length > maxTokenEstimate * 4) {
      // Trim content and items
      if (compressed.content) {
        compressed.content = compressed.content.substring(0, 1000);
      }
      if (compressed.items) {
        compressed.items = compressed.items.slice(0, 15);
      }
    }
    
    return compressed;
  }

  // Destroy watchers
  destroy() {
    if (this.observer) {
      this.observer.disconnect();
      this.observer = null;
    }
  }
}

// Export for content script
if (typeof window !== 'undefined') {
  window.AskEngineExtractor = AskEngineExtractor;
}
