from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from typing import List, Dict, Any
from urllib.parse import urljoin
import logging
import time
import re

from browser_utils import create_browser

logger = logging.getLogger(__name__)

BASE_URL = "https://febest.com.au"

def parse_products_with_browser(url: str) -> List[Dict[str, Any]]:
    """
    Parse products from a page using browser automation with enhanced Magento support
    """
    products = []
    browser = None
    
    try:
        logger.info(f"Starting to parse products from: {url}")
        browser = create_browser()
        if not browser:
            logger.error("Failed to create browser instance")
            return products
            
        # Load the page
        logger.info("Loading page...")
        browser.get(url)
        
        # Wait for page to load with multiple conditions
        try:
            WebDriverWait(browser, 20).until(
                lambda driver: driver.execute_script("return document.readyState") == "complete"
            )
            
            # Additional wait for dynamic content
            WebDriverWait(browser, 15).until(
                EC.any_of(
                    EC.presence_of_element_located((By.CSS_SELECTOR, ".product-item")),
                    EC.presence_of_element_located((By.CSS_SELECTOR, ".product-item-info")),
                    EC.presence_of_element_located((By.CSS_SELECTOR, ".item")),
                    EC.presence_of_element_located((By.CSS_SELECTOR, ".products")),
                    EC.presence_of_element_located((By.CSS_SELECTOR, ".category-products")),
                    EC.presence_of_element_located((By.CSS_SELECTOR, "[class*='product']")),
                    EC.presence_of_element_located((By.TAG_NAME, "main"))
                )
            )
        except Exception as e:
            logger.warning(f"Timeout waiting for product elements: {str(e)}, proceeding anyway...")
        
        # Scroll to load lazy content
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        browser.execute_script("window.scrollTo(0, 0);")
        time.sleep(2)
        
        html_content = browser.page_source
        soup = BeautifulSoup(html_content, 'html.parser')
        
        logger.info(f"Page loaded, HTML content length: {len(html_content)}")
        logger.info(f"Page title: {soup.title.string if soup.title else 'No title'}")
        
        # Enhanced selectors for Magento-based sites
        container_selectors = [
            '.product-item-info',           # Standard Magento product item
            '.product-item',                # Alternative Magento selector
            '.product-items .product-item', # Nested product items
            '.products .product-item',      # Products container items
            '.products-grid .product-item', # Grid layout items
            '.category-products .product-item', # Category page items
            '.item.product',                # Combined item and product classes
            '.product',                     # Generic product class
            '.item',                        # Generic item class
            '.products .item',              # Products container items
            '.product-items .item',         # Product items container
            '.search-results .item',        # Search results
            '.catalog-product-view',        # Single product view
            '.products-list .item',         # List layout items
            '[class*="product-item"]',      # Any class containing product-item
            '[class*="product"]',           # Any class containing product
            'div[data-product-id]',         # Data attribute selectors
            'li[data-product-id]',
            'article[data-product-id]'
        ]
        
        product_containers = []
        used_selector = None
        
        for selector in container_selectors:
            containers = soup.select(selector)
            if containers:
                product_containers = containers
                used_selector = selector
                logger.info(f"Found {len(containers)} product containers using selector: {selector}")
                break
        
        if not product_containers:
            logger.warning("No product containers found with standard selectors, trying alternative approach...")
            
            # Try to find any links that look like products
            all_links = soup.find_all('a', href=True)
            potential_products = []
            
            for link in all_links:
                href = link.get('href', '')
                if (href.endswith('.html') and 
                    any(keyword in href.lower() for keyword in ['product', 'item', 'part']) and
                    not any(skip in href.lower() for skip in ['category', 'cms', 'contact', 'about'])):
                    potential_products.append(link.parent if link.parent else link)
            
            if potential_products:
                product_containers = potential_products[:20]  # Limit to avoid too many
                logger.info(f"Found {len(product_containers)} potential products from links")
            else:
                logger.error("No products found at all!")
                # Save page for debugging
                try:
                    with open('debug_no_products.html', 'w', encoding='utf-8') as f:
                        f.write(html_content)
                    logger.info("Page content saved to debug_no_products.html")
                except Exception as save_error:
                    logger.error(f"Failed to save debug file: {str(save_error)}")
                return products
        
        # If no containers found, try to find any links to .html pages
        if not product_containers:
            logger.warning("No product containers found, searching for product links...")
            all_links = soup.find_all('a', href=True)
            product_links = [link for link in all_links if link.get('href', '').endswith('.html')]
            
            if product_links:
                logger.info(f"Found {len(product_links)} potential product links")
                # Create fake containers from links
                product_containers = product_links[:25]  # Limit to 25
        
        logger.info(f"Processing {len(product_containers)} product containers...")
        
        for i, container in enumerate(product_containers[:30]):  # Limit to 30 products
            try:
                logger.debug(f"Processing container {i+1}/{len(product_containers)}")
                
                # Find product link with enhanced selectors
                link = None
                product_url = ""
                
                link_selectors = [
                    'a.product-item-link',          # Standard Magento product link
                    'a[href*=".html"]',             # Any link ending with .html
                    'h2 a[href*=".html"]',          # H2 with product link
                    'h3 a[href*=".html"]',          # H3 with product link
                    '.product-name a',              # Product name link
                    '.product-item-name a',         # Product item name link
                    '.title a',                     # Title link
                    '.name a',                      # Name link
                    'a[href]'                       # Any link
                ]
                
                for link_selector in link_selectors:
                    found_links = container.select(link_selector)
                    for found_link in found_links:
                        href = found_link.get('href', '').strip()
                        if href and (href.endswith('.html') or 'product' in href.lower() or '/item/' in href.lower()):
                            link = found_link
                            break
                    if link:
                        break
                
                # If container itself is a link
                if not link and container.name == 'a':
                    href = container.get('href', '').strip()
                    if href and (href.endswith('.html') or 'product' in href.lower()):
                        link = container
                
                if not link:
                    logger.debug(f"No valid product link found in container {i+1}")
                    continue
                
                href = link.get('href', '').strip()
                if not href:
                    continue
                
                # Build full product URL
                if href.startswith('/'):
                    product_url = BASE_URL + href
                elif href.startswith('http'):
                    product_url = href
                else:
                    product_url = urljoin(BASE_URL, href)
                
                # Extract slug from URL
                slug = href.split('/')[-1] if '/' in href else href
                if slug.endswith('.html'):
                    slug = slug[:-5]  # Remove .html extension
                
                # Extract product name with enhanced selectors
                name = ""
                name_selectors = [
                    '.product-item-name a',         # Magento standard
                    '.product-item-name',           # Magento container
                    '.product-name a',              # Product name link
                    '.product-name',                # Product name container
                    'h2.product-name',              # H2 product name
                    'h3.product-name',              # H3 product name
                    'h2 a',                         # H2 with link
                    'h3 a',                         # H3 with link
                    'h4 a',                         # H4 with link
                    '.product-item-link',           # Product item link
                    '.title a',                     # Title link
                    '.name a',                      # Name link
                    '.product-title',               # Product title
                    'a[class*="name"]',             # Any link with name in class
                    'a[class*="title"]',            # Any link with title in class
                    '.item-title',                  # Item title
                    '.item-name'                    # Item name
                ]
                
                for name_selector in name_selectors:
                    name_elem = container.select_one(name_selector)
                    if name_elem:
                        name_text = name_elem.get_text(strip=True)
                        if name_text and len(name_text) > 2 and not name_text.lower().startswith(('add to', 'quick view')):
                            name = name_text
                            break
                
                # If no name found, use link text
                if not name and link:
                    link_text = link.get_text(strip=True)
                    if link_text and len(link_text) > 2:
                        name = link_text
                
                # Clean name
                if name:
                    name = re.sub(r'\s+', ' ', name).strip()
                    # Remove common unwanted text
                    name = re.sub(r'(add to cart|quick view|compare|wishlist)', '', name, flags=re.IGNORECASE).strip()
                
                # Extract image URL with enhanced selectors
                img_url = ""
                img_selectors = [
                    '.product-image-main img',      # Main product image
                    '.product-item-photo img',      # Product item photo
                    '.product-image img',           # Product image
                    '.product-photo img',           # Product photo
                    '.item-image img',              # Item image
                    '.product-item-img img',        # Product item image
                    'img.product-image-photo',      # Product image photo class
                    'img[class*="product"]',        # Any img with product in class
                    '.image img',                   # Generic image container
                    '.photo img',                   # Photo container
                    'img[alt*="product"]',          # Image with product in alt
                    'img[src*="product"]',          # Image with product in src
                    'img'                           # Any image
                ]
                
                for img_selector in img_selectors:
                    img_elem = container.select_one(img_selector)
                    if img_elem:
                        # Try multiple attributes for image source
                        for attr in ['src', 'data-src', 'data-lazy', 'data-original', 'data-srcset', 'data-image']:
                            img_src = img_elem.get(attr)
                            if img_src and img_src.strip() and not img_src.startswith('data:'):
                                # Handle srcset attribute
                                if attr == 'data-srcset' and ',' in img_src:
                                    img_src = img_src.split(',')[0].strip().split(' ')[0]
                                
                                # Build full image URL
                                if img_src.startswith('//'):
                                    img_url = 'https:' + img_src
                                elif img_src.startswith('/'):
                                    img_url = BASE_URL + img_src
                                elif not img_src.startswith('http'):
                                    img_url = urljoin(BASE_URL, img_src)
                                else:
                                    img_url = img_src
                                
                                # Skip default/placeholder images
                                if not any(skip in img_url.lower() for skip in ['default', 'placeholder', 'no-image', 'noimage']):
                                    break
                        
                        if img_url:
                            break
                
                # Try to find image in parent container if not found
                if not img_url:
                    parent = container.parent
                    if parent:
                        for img_selector in img_selectors:
                            img_elem = parent.select_one(img_selector)
                            if img_elem:
                                for attr in ['src', 'data-src', 'data-lazy', 'data-original']:
                                    img_src = img_elem.get(attr)
                                    if img_src and img_src.strip() and not img_src.startswith('data:'):
                                        if img_src.startswith('//'):
                                            img_url = 'https:' + img_src
                                        elif img_src.startswith('/'):
                                            img_url = BASE_URL + img_src
                                        elif not img_src.startswith('http'):
                                            img_url = urljoin(BASE_URL, img_src)
                                        else:
                                            img_url = img_src
                                        break
                                if img_url:
                                    break
                
                # Extract price with enhanced selectors
                price = ""
                price_selectors = [
                    '.price-final_price .price',
                    '.price-box .price',
                    '.regular-price .price',
                    '.special-price .price',
                    '.price',
                    '.regular-price',
                    '.special-price',
                    '.price-final_price',
                    '[class*="price"]',
                    '.cost',
                    '.amount'
                ]
                
                for price_selector in price_selectors:
                    price_elem = container.select_one(price_selector)
                    if price_elem:
                        price_text = price_elem.get_text(strip=True)
                        # Clean price text
                        if price_text and any(char.isdigit() for char in price_text):
                            price = price_text
                            break
                
                # Extract description
                description = extract_description(container, name)
                
                # Only add product if we have essential data
                if name and (slug or product_url):
                    product_data = {
                        "name": name,
                        "url": product_url,  # Use full URL instead of slug
                        "img": img_url,
                        "price": price,
                        "description": description
                    }
                    products.append(product_data)
                    logger.debug(f"Added product: {name[:50]}...")
                else:
                    logger.debug(f"Skipped product - missing essential data. Name: {bool(name)}, URL: {bool(slug or product_url)}")
                    
            except Exception as e:
                logger.error(f"Error parsing product container {i+1}: {str(e)}")
                continue

        logger.info(f"Successfully parsed {len(products)} products from {url}")
        
        # If no products found, log page content for debugging
        if not products:
            logger.warning("No products found! Saving page content for debugging...")
            try:
                with open('debug_no_products.html', 'w', encoding='utf-8') as f:
                    f.write(html_content)
                logger.info("Page content saved to debug_no_products.html")
            except:
                pass
        
        return products
        
    except Exception as e:
        logger.error(f"Error parsing products with browser: {str(e)}")
        return []
    
    finally:
        if browser:
            browser.quit()

def extract_description(container, name: str) -> str:
    """
    Extract product description from container with enhanced logic
    """
    description = ""
    
    desc_selectors = [
        '.product-item-description',
        '.product-description',
        '.product-summary',
        '.product-excerpt',
        '.product-short-description',
        '.item-description',
        '.description',
        '.summary',
        '.excerpt',
        '.product-info',
        '.product-details',
        '.product-content',
        '[class*="description"]',
        '[class*="summary"]',
        '[class*="excerpt"]',
        '[class*="info"]'
    ]
    
    for desc_selector in desc_selectors:
        desc_elem = container.select_one(desc_selector)
        if desc_elem:
            desc_text = desc_elem.get_text(strip=True)
            if len(desc_text) > 15 and not desc_text.lower().startswith(('add to', 'quick view', 'compare')):
                description = desc_text
                break
    
    if not description:
        text_elements = container.select('p, div:not([class*="price"]):not([class*="button"]):not([class*="action"])')
        for elem in text_elements:
            text = elem.get_text(strip=True)
            if (len(text) > 20 and 
                not any(keyword in text.lower() for keyword in ['add to cart', 'quick view', 'compare', 'wishlist']) and
                text.lower() != name.lower()):
                description = text[:200] + ('...' if len(text) > 200 else '')
                break
    
    if not description:
        parent = container.parent
        if parent:
            for desc_selector in desc_selectors[:3]:
                desc_elem = parent.select_one(desc_selector)
                if desc_elem:
                    desc_text = desc_elem.get_text(strip=True)
                    if len(desc_text) > 15:
                        description = desc_text
                        break
    
    if not description:
        attr_selectors = ['title', 'alt', 'data-description', 'data-summary', 'aria-label']
        all_elements = container.find_all(['a', 'img', 'div', 'span'])
        for elem in all_elements:
            for attr in attr_selectors:
                attr_value = elem.get(attr, '')
                if attr_value and len(attr_value) > 20 and attr_value.lower() != name.lower():
                    description = attr_value[:150] + ('...' if len(attr_value) > 150 else '')
                    break
            if description:
                break
    
    return description

def parse_product_item(product_elem, base_url: str = BASE_URL) -> Dict[str, Any]:
    """
    Parse individual product item from HTML element
    """
    result = {}
    
    try:
        name_selectors = [
            '.product-item-name',
            '.product-name',
            'h2.product-name',
            'h3.product-name',
            '.product-item-link',
            'h2 a',
            'h3 a',
            'a'
        ]
        
        for selector in name_selectors:
            name_elem = product_elem.select_one(selector)
            if name_elem:
                result["name"] = name_elem.get_text(strip=True)
                href = name_elem.get('href')
                if href:
                    result["url"] = href.split('/')[-1].replace('.html', '') if href.endswith('.html') else href
                break
        
        if "name" not in result:
            alt_selectors = ['.product-item-description', '.description', 'span', 'div']
            for selector in alt_selectors:
                elem = product_elem.select_one(selector)
                if elem:
                    text = elem.get_text(strip=True)
                    if text and len(text) > 3:
                        result["name"] = text
                        if elem.name == 'a' and elem.get('href'):
                            href = elem.get('href')
                            if href:
                                slug = href.split('/')[-1].replace('.html', '') if href.endswith('.html') else href
                                result["url"] = slug
                        break
        
        result["name"] = result.get("name", "Error parsing product")
        result["url"] = result.get("url", "")
        
        img_selectors = [
            '.product-image-main img',
            '.product-item-photo img',
            '.product-image img',
            'img'
        ]
        
        for selector in img_selectors:
            img_elem = product_elem.select_one(selector)
            if img_elem:
                for attr in ['src', 'data-src', 'data-lazy', 'data-original']:
                    img_src = img_elem.get(attr)
                    if img_src and img_src.strip() and not img_src.startswith('data:'):
                        result["img"] = urljoin(base_url, img_src)
                        break
                if "img" in result:
                    break
        
        result["img"] = result.get("img", "")
        
        price_selectors = [
            '.price',
            '.price-final_price',
            '.regular-price',
            '[class*="price"]'
        ]
        
        for selector in price_selectors:
            price_elem = product_elem.select_one(selector)
            if price_elem:
                price_text = price_elem.get_text(strip=True)
                if price_text and any(char.isdigit() for char in price_text):
                    result["price"] = price_text
                    break
        
        result["price"] = result.get("price", "")
        
        description = extract_description(product_elem, result.get("name", ""))
        result["description"] = description
        
        return result
        
    except Exception as e:
        logger.error(f"Error parsing product item: {str(e)}")
        return {}