#!/usr/bin/env python3
"""
Debug script for analyzing febest.com.au structure and fixing scraping issues
"""

import asyncio
import logging
import json
import time
from bs4 import BeautifulSoup
from browser_utils import create_browser
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

BASE_URL = "https://febest.com.au"

async def analyze_search_page():
    """Analyze search page structure"""
    logger.info("=== ANALYZING SEARCH PAGE ===")
    
    browser = None
    try:
        browser = await create_browser()
        search_url = f"{BASE_URL}/catalogsearch/result/?q=brake+pad"
        
        logger.info(f"Loading search URL: {search_url}")
        await browser.goto(search_url, wait_until='networkidle')
        
        # Wait for page to load
        await asyncio.sleep(3)
        
        # Get page content
        html_content = await browser.content()
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Save HTML for analysis
        with open('debug_search_page.html', 'w', encoding='utf-8') as f:
            f.write(html_content)
        logger.info("Search page HTML saved to debug_search_page.html")
        
        # Analyze page structure
        logger.info("=== PAGE STRUCTURE ANALYSIS ===")
        
        # Check for common product containers
        selectors_to_check = [
            '.products-grid',
            '.product-items',
            '.product-item',
            '.item',
            '.product',
            '[class*="product"]',
            '.catalog-product-view',
            '.category-products',
            '.search-results',
            '.results',
            '.listing',
            '.grid',
            'li[class*="item"]',
            'div[class*="item"]'
        ]
        
        for selector in selectors_to_check:
            elements = soup.select(selector)
            if elements:
                logger.info(f"Found {len(elements)} elements with selector: {selector}")
                if len(elements) <= 5:  # Show details for small numbers
                    for i, elem in enumerate(elements[:3]):
                        logger.info(f"  Element {i+1}: {elem.name} with classes: {elem.get('class', [])}")
        
        # Look for any links that might be products
        all_links = soup.find_all('a', href=True)
        product_links = []
        for link in all_links:
            href = link.get('href', '')
            if any(keyword in href.lower() for keyword in ['.html', 'product', 'item', 'part']):
                product_links.append(href)
        
        logger.info(f"Found {len(product_links)} potential product links")
        if product_links:
            logger.info("Sample product links:")
            for link in product_links[:5]:
                logger.info(f"  {link}")
        
        # Check for JavaScript-loaded content
        scripts = soup.find_all('script')
        js_indicators = ['ajax', 'json', 'product', 'catalog', 'search']
        for script in scripts:
            script_text = script.get_text() if script.string else ""
            if any(indicator in script_text.lower() for indicator in js_indicators):
                logger.info("Found JavaScript that might load products dynamically")
                break
        
        return len(product_links) > 0
        
    except Exception as e:
        logger.error(f"Error analyzing search page: {str(e)}")
        return False
    finally:
        if browser:
            await browser.close()

async def analyze_category_page():
    """Analyze category page structure"""
    logger.info("=== ANALYZING CATEGORY PAGE ===")
    
    browser = None
    try:
        browser = await create_browser()
        category_url = f"{BASE_URL}/subaru.html"
        
        logger.info(f"Loading category URL: {category_url}")
        await browser.goto(category_url, wait_until='networkidle')
        
        # Wait for page to load
        await asyncio.sleep(3)
        
        # Get page content
        html_content = await browser.content()
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Save HTML for analysis
        with open('debug_category_page.html', 'w', encoding='utf-8') as f:
            f.write(html_content)
        logger.info("Category page HTML saved to debug_category_page.html")
        
        # Check page title and content
        title = soup.find('title')
        if title:
            logger.info(f"Page title: {title.get_text()}")
        
        # Look for product containers
        selectors_to_check = [
            '.products-grid',
            '.product-items',
            '.product-item',
            '.item',
            '.product',
            '[class*="product"]',
            '.category-products',
            '.listing',
            '.grid',
            'li[class*="item"]',
            'div[class*="item"]'
        ]
        
        found_containers = False
        for selector in selectors_to_check:
            elements = soup.select(selector)
            if elements:
                logger.info(f"Found {len(elements)} elements with selector: {selector}")
                found_containers = True
                
                # Analyze first few elements
                for i, elem in enumerate(elements[:3]):
                    logger.info(f"  Element {i+1}:")
                    logger.info(f"    Tag: {elem.name}")
                    logger.info(f"    Classes: {elem.get('class', [])}")
                    
                    # Look for links in this element
                    links = elem.find_all('a', href=True)
                    if links:
                        logger.info(f"    Links found: {len(links)}")
                        for link in links[:2]:
                            logger.info(f"      {link.get('href')}")
                    
                    # Look for images
                    images = elem.find_all('img')
                    if images:
                        logger.info(f"    Images found: {len(images)}")
                        for img in images[:2]:
                            logger.info(f"      {img.get('src', img.get('data-src', 'No src'))}")
        
        if not found_containers:
            logger.warning("No product containers found with standard selectors!")
            
            # Try to find any structure
            logger.info("Looking for any structured content...")
            divs_with_class = soup.find_all('div', class_=True)
            logger.info(f"Found {len(divs_with_class)} divs with classes")
            
            # Group by class names
            class_counts = {}
            for div in divs_with_class:
                classes = div.get('class', [])
                for cls in classes:
                    class_counts[cls] = class_counts.get(cls, 0) + 1
            
            # Show most common classes
            sorted_classes = sorted(class_counts.items(), key=lambda x: x[1], reverse=True)
            logger.info("Most common CSS classes:")
            for cls, count in sorted_classes[:10]:
                logger.info(f"  .{cls}: {count} occurrences")
        
        return found_containers
        
    except Exception as e:
        logger.error(f"Error analyzing category page: {str(e)}")
        return False
    finally:
        if browser:
            await browser.close()

async def test_direct_requests():
    """Test direct HTTP requests to understand the site structure"""
    logger.info("=== TESTING DIRECT REQUESTS ===")
    
    import aiohttp
    
    urls_to_test = [
        f"{BASE_URL}/",
        f"{BASE_URL}/subaru.html",
        f"{BASE_URL}/catalogsearch/result/?q=brake+pad",
        f"{BASE_URL}/api/products",  # Common API endpoint
        f"{BASE_URL}/rest/V1/products",  # Magento API
    ]
    
    async with aiohttp.ClientSession() as session:
        for url in urls_to_test:
            try:
                logger.info(f"Testing URL: {url}")
                async with session.get(url) as response:
                    logger.info(f"  Status: {response.status}")
                    logger.info(f"  Content-Type: {response.headers.get('content-type', 'Unknown')}")
                    
                    if response.status == 200:
                        content = await response.text()
                        logger.info(f"  Content length: {len(content)} characters")
                        
                        # Check for JSON response
                        if 'application/json' in response.headers.get('content-type', ''):
                            try:
                                json_data = await response.json()
                                logger.info(f"  JSON response with {len(json_data)} items")
                            except:
                                logger.info("  Invalid JSON response")
                    
            except Exception as e:
                logger.error(f"  Error: {str(e)}")

async def main():
    """Main debug function"""
    logger.info("Starting febest.com.au debug analysis...")
    
    # Test 1: Analyze search page
    search_success = await analyze_search_page()
    
    # Test 2: Analyze category page  
    category_success = await analyze_category_page()
    
    # Test 3: Test direct requests
    await test_direct_requests()
    
    # Summary
    logger.info("=== SUMMARY ===")
    logger.info(f"Search page analysis: {'SUCCESS' if search_success else 'FAILED'}")
    logger.info(f"Category page analysis: {'SUCCESS' if category_success else 'FAILED'}")
    
    if not search_success and not category_success:
        logger.error("CRITICAL: Both search and category pages failed analysis!")
        logger.error("This suggests the site structure is different than expected.")
        logger.error("Check the saved HTML files for manual analysis.")
    
    logger.info("Debug analysis complete. Check the generated HTML files for detailed inspection.")

if __name__ == "__main__":
    asyncio.run(main())