from fastapi import FastAPI, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from typing import List, Dict, Any
import requests
from bs4 import BeautifulSoup
import logging
from datetime import datetime
import re
from urllib.parse import urljoin, quote
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
import time

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(
    title="Febest Auto Parts API",
    description="API for scraping auto parts from febest.com.au",
    version="2.0.0"
)

# Configure CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

BASE_URL = "https://febest.com.au"

def create_browser():
    """
    Create and configure Chrome browser instance for scraping
    """
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
    
    try:
        driver = webdriver.Chrome(
            service=webdriver.chrome.service.Service(ChromeDriverManager().install()),
            options=chrome_options
        )
        return driver
    except Exception as e:
        logger.error(f"Failed to create browser: {str(e)}")
        return None

def analyze_page_structure(url: str) -> Dict[str, Any]:
    """
    Analyze the actual HTML structure of a page to find correct selectors
    """
    driver = None
    try:
        driver = create_browser()
        if not driver:
            return {"error": "Failed to create browser"}
            
        logger.info(f"Analyzing page structure: {url}")
        driver.get(url)
        
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )
        time.sleep(5)
        
        html_content = driver.page_source
        soup = BeautifulSoup(html_content, 'html.parser')
        
        analysis = {
            "url": url,
            "title": soup.title.string if soup.title else "No title",
            "product_containers": [],
            "image_selectors": [],
            "name_selectors": [],
            "price_selectors": [],
            "description_selectors": []
        }
        
        potential_containers = soup.find_all(['div', 'li', 'article'], class_=True)
        
        for container in potential_containers:
            classes = ' '.join(container.get('class', []))
            
            if any(keyword in classes.lower() for keyword in ['product', 'item', 'result']):
                img = container.find('img')
                link = container.find('a', href=True)
                
                if img and link:
                    container_info = {
                        "selector": f".{classes.replace(' ', '.')}",
                        "classes": classes,
                        "has_image": bool(img),
                        "has_link": bool(link),
                        "image_src": img.get('src') if img else None,
                        "link_href": link.get('href') if link else None,
                        "text_content": container.get_text(strip=True)[:100] + "..." if len(container.get_text(strip=True)) > 100 else container.get_text(strip=True)
                    }
                    analysis["product_containers"].append(container_info)
        
        images = soup.find_all('img')
        for img in images[:10]:
            src = img.get('src') or img.get('data-src')
            if src and any(keyword in src.lower() for keyword in ['product', 'item', 'catalog']):
                parent_classes = []
                parent = img.parent
                while parent and len(parent_classes) < 3:
                    if parent.get('class'):
                        parent_classes.extend(parent.get('class'))
                    parent = parent.parent
                
                analysis["image_selectors"].append({
                    "img_src": src,
                    "img_alt": img.get('alt', ''),
                    "parent_classes": parent_classes,
                    "selector_path": f"{'.' + '.'.join(parent_classes[:2]) if parent_classes else ''} img"
                })
        
        return analysis
        
    except Exception as e:
        logger.error(f"Error analyzing page structure: {str(e)}")
        return {"error": str(e)}
    
    finally:
        if driver:
            driver.quit()

def extract_photos_with_browser(url: str) -> List[str]:
    """
    Extract photos from product page using browser automation
    """
    photos = []
    driver = None
    
    try:
        driver = create_browser()
        if not driver:
            return photos
            
        logger.info(f"Loading page with browser: {url}")
        driver.get(url)
        
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )
        time.sleep(3)
        
        try:
            fotorama_nav = driver.find_element(By.CSS_SELECTOR, ".fotorama__nav-wrap.fotorama__nav-wrap--horizontal")
            nav_images = fotorama_nav.find_elements(By.TAG_NAME, "img")
            
            for img in nav_images:
                src = img.get_attribute("src") or img.get_attribute("data-src")
                if src:
                    photo_url = urljoin(BASE_URL, src)
                    if photo_url not in photos:
                        photos.append(photo_url)
                        
        except NoSuchElementException:
            logger.info("Fotorama gallery not found, trying alternative selectors")
        
        if not photos:
            gallery_selectors = [
                ".fotorama img",
                ".product-gallery img",
                ".gallery img",
                ".product-image-gallery img",
                "[class*='gallery'] img",
                "[class*='fotorama'] img"
            ]
            
            for selector in gallery_selectors:
                try:
                    images = driver.find_elements(By.CSS_SELECTOR, selector)
                    for img in images:
                        src = img.get_attribute("src") or img.get_attribute("data-src")
                        if src and "product" in src.lower():
                            photo_url = urljoin(BASE_URL, src)
                            if photo_url not in photos:
                                photos.append(photo_url)
                    if photos:
                        break
                except Exception as e:
                    continue
        
        if not photos:
            all_images = driver.find_elements(By.TAG_NAME, "img")
            for img in all_images:
                src = img.get_attribute("src") or img.get_attribute("data-src")
                if src and any(keyword in src.lower() for keyword in ["product", "item", "catalog"]):
                    photo_url = urljoin(BASE_URL, src)
                    if photo_url not in photos:
                        photos.append(photo_url)
        
        logger.info(f"Extracted {len(photos)} photos using browser")
        
    except Exception as e:
        logger.error(f"Error extracting photos with browser: {str(e)}")
    
    finally:
        if driver:
            driver.quit()
    
    return photos

def parse_products_with_browser(url: str) -> List[Dict[str, Any]]:
    """
    Parse products from a page using browser automation with enhanced Magento support
    """
    products = []
    browser = None
    
    try:
        browser = create_browser()
        browser.get(url)
        
        WebDriverWait(browser, 15).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )
        time.sleep(8)
        
        html_content = browser.page_source
        soup = BeautifulSoup(html_content, 'html.parser')
        
        product_containers = []
        
        container_selectors = [
            '.product-item-info',
            '.product-item',
            '.item',
            '.product',
            '.products-grid .item',
            '.category-products .item',
            '[class*="product-item"]',
            '[class*="product"]'
        ]
        
        for selector in container_selectors:
            containers = soup.select(selector)
            if containers:
                product_containers = containers
                logger.info(f"Found {len(containers)} products using selector: {selector}")
                break
        
        for container in product_containers[:25]:
            try:
                link = None
                link_selectors = [
                    'a.product-item-link',
                    'a[href*=".html"]',
                    'h2 a',
                    'h3 a',
                    '.product-name a',
                    '.product-item-name a',
                    'a'
                ]
                
                for link_selector in link_selectors:
                    found_link = container.select_one(link_selector)
                    if found_link and found_link.get('href', '').endswith('.html'):
                        link = found_link
                        break
                
                if not link:
                    continue
                
                href = link.get('href', '')
                if not href or not href.endswith('.html'):
                    continue
                
                if href.startswith('/'):
                    product_url = BASE_URL + href
                elif not href.startswith('http'):
                    product_url = urljoin(url, href)
                else:
                    product_url = href
                
                slug = href.split('/')[-1].replace('.html', '') if '/' in href else href.replace('.html', '')
                
                name = ""
                name_selectors = [
                    '.product-item-name',
                    '.product-name',
                    'h2.product-name',
                    'h3.product-name',
                    '.product-item-link',
                    'h2 a',
                    'h3 a',
                    'h4 a'
                ]
                
                for name_selector in name_selectors:
                    name_elem = container.select_one(name_selector)
                    if name_elem:
                        name = name_elem.get_text(strip=True)
                        break
                
                if not name:
                    name = link.get_text(strip=True)
                
                img_url = ""
                img_selectors = [
                    '.product-image-main img',
                    '.product-item-photo img',
                    '.product-image img',
                    '.product-photo img',
                    '.item-image img',
                    '.product-item-img img',
                    'img.product-image-photo',
                    'img[class*="product"]',
                    'img'
                ]
                
                for img_selector in img_selectors:
                    img_elem = container.select_one(img_selector)
                    if img_elem:
                        for attr in ['src', 'data-src', 'data-lazy', 'data-original', 'data-srcset']:
                            img_src = img_elem.get(attr)
                            if img_src and img_src.strip() and not img_src.startswith('data:'):
                                if attr == 'data-srcset' and ',' in img_src:
                                    img_src = img_src.split(',')[0].strip().split(' ')[0]
                                
                                if img_src.startswith('//'):
                                    img_url = 'https:' + img_src
                                elif img_src.startswith('/'):
                                    img_url = BASE_URL + img_src
                                elif not img_src.startswith('http'):
                                    img_url = urljoin(BASE_URL, img_src)
                                else:
                                    img_url = img_src
                                
                                if img_url and ('placeholder' not in img_url.lower() and 
                                              'loading' not in img_url.lower() and
                                              img_url.endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp'))):
                                    break
                        if img_url:
                            break
                
                if not img_url:
                    parent = container.parent
                    if parent:
                        for img_selector in img_selectors:
                            img_elem = parent.select_one(img_selector)
                            if img_elem:
                                for attr in ['src', 'data-src', 'data-lazy', 'data-original']:
                                    img_src = img_elem.get(attr)
                                    if img_src and img_src.strip() and not img_src.startswith('data:'):
                                        if img_src.startswith('//'):
                                            img_url = 'https:' + img_src
                                        elif img_src.startswith('/'):
                                            img_url = BASE_URL + img_src
                                        else:
                                            img_url = img_src
                                        break
                                if img_url:
                                    break
                
                price = ""
                price_selectors = [
                    '.price',
                    '.price-final_price',
                    '.regular-price',
                    '.special-price',
                    '.price-box .price',
                    '[class*="price"]'
                ]
                
                for price_selector in price_selectors:
                    price_elem = container.select_one(price_selector)
                    if price_elem:
                        price = price_elem.get_text(strip=True)
                        break
                
                description = ""
                desc_selectors = [
                    '.product-item-description',
                    '.product-description',
                    '.product-summary',
                    '.product-excerpt',
                    '.product-short-description',
                    '.item-description',
                    '.description',
                    '.summary',
                    '.excerpt',
                    '.product-info',
                    '.product-details',
                    '.product-content',
                    '[class*="description"]',
                    '[class*="summary"]',
                    '[class*="excerpt"]',
                    '[class*="info"]'
                ]
                
                for desc_selector in desc_selectors:
                    desc_elem = container.select_one(desc_selector)
                    if desc_elem:
                        desc_text = desc_elem.get_text(strip=True)
                        if len(desc_text) > 15 and not desc_text.lower().startswith(('add to', 'quick view', 'compare')):
                            description = desc_text
                            break
                
                if not description:
                    text_elements = container.select('p, div:not([class*="price"]):not([class*="button"]):not([class*="action"])')
                    for elem in text_elements:
                        text = elem.get_text(strip=True)
                        if (len(text) > 20 and 
                            not any(keyword in text.lower() for keyword in ['add to cart', 'quick view', 'compare', 'wishlist']) and
                            text.lower() != name.lower()):
                            description = text[:200] + ('...' if len(text) > 200 else '')
                            break
                
                if not description:
                    parent = container.parent
                    if parent:
                        for desc_selector in desc_selectors[:3]:
                            desc_elem = parent.select_one(desc_selector)
                            if desc_elem:
                                desc_text = desc_elem.get_text(strip=True)
                                if len(desc_text) > 15:
                                    description = desc_text
                                    break
                
                if not description:
                    attr_selectors = ['title', 'alt', 'data-description', 'data-summary', 'aria-label']
                    all_elements = container.find_all(['a', 'img', 'div', 'span'])
                    for elem in all_elements:
                        for attr in attr_selectors:
                            attr_value = elem.get(attr, '')
                            if attr_value and len(attr_value) > 20 and attr_value.lower() != name.lower():
                                description = attr_value[:150] + ('...' if len(attr_value) > 150 else '')
                                break
                        if description:
                            break
                
                if name and slug:
                    products.append({
                        "name": name,
                        "url": slug,
                        "img": img_url,
                        "price": price,
                        "description": description
                    })
                    
            except Exception as e:
                logger.error(f"Error parsing product container: {str(e)}")
                continue

        logger.info(f"Parsed {len(products)} products from {url}")
        return products
        
    except Exception as e:
        logger.error(f"Error parsing products with browser: {str(e)}")
        return []
    
    finally:
        if browser:
            browser.quit()

def parse_product_item(product_elem, base_url: str = BASE_URL) -> Dict[str, Any]:
    """
    Parse individual product item from HTML element
    """
    result = {}
    
    try:
        name_selectors = [
            '.product-item-name',
            '.product-name',
            'h2.product-name',
            'h3.product-name',
            '.product-item-link',
            'h2 a',
            'h3 a',
            'a'
        ]
        
        for selector in name_selectors:
            name_elem = product_elem.select_one(selector)
            if name_elem:
                result["name"] = name_elem.get_text(strip=True)
                href = name_elem.get('href')
                if href:
                    result["url"] = href.split('/')[-1].replace('.html', '') if href.endswith('.html') else href
                break
        
        if "name" not in result:
            alt_selectors = ['.product-item-description', '.description', 'span', 'div']
            for selector in alt_selectors:
                elem = product_elem.select_one(selector)
                if elem:
                    text = elem.get_text(strip=True)
                    if text and len(text) > 3:
                        result["name"] = text
                        if elem.name == 'a' and elem.get('href'):
                            href = elem.get('href')
                            if href:
                                slug = href.split('/')[-1].replace('.html', '') if href.endswith('.html') else href
                                result["url"] = slug
                        break
        
        result["name"] = result.get("name", "Error parsing product")
        result["url"] = result.get("url", "")
        
        img_selectors = [
            '.product-image-main img',
            '.product-item-photo img',
            '.product-image img',
            '.product-photo img',
            '.item-image img',
            '.product-item-img img',
            'img.product-image-photo',
            'img[class*="product"]',
            'img'
        ]
        
        for selector in img_selectors:
            img_elem = product_elem.select_one(selector)
            if img_elem:
                for attr in ['src', 'data-src', 'data-lazy', 'data-original']:
                    img_src = img_elem.get(attr)
                    if img_src and img_src.strip() and not img_src.startswith('data:'):
                        result["img"] = urljoin(base_url, img_src)
                        break
                if "img" in result:
                    break
        
        result["img"] = result.get("img", "")
        
        price_selectors = [
            '.price',
            '.price-final_price',
            '.regular-price',
            '[class*="price"]'
        ]
        
        for selector in price_selectors:
            price_elem = product_elem.select_one(selector)
            if price_elem:
                price_text = price_elem.get_text(strip=True)
                if price_text and any(char.isdigit() for char in price_text):
                    result["price"] = price_text
                    break
        
        result["price"] = result.get("price", "")
        
        desc_selectors = [
            '.product-item-description',
            '.product-description',
            '.description',
            '.summary',
            '.product-info'
        ]
        
        for selector in desc_selectors:
            desc_elem = product_elem.select_one(selector)
            if desc_elem:
                desc_text = desc_elem.get_text(strip=True)
                if (len(desc_text) > 15 and 
                    not desc_text.lower().startswith(('add to', 'quick view', 'compare', 'wishlist'))):
                    result["description"] = desc_text
                    break
        
        if "description" not in result or not result["description"]:
            text_elements = product_elem.select('p, div:not([class*="price"]):not([class*="button"]):not([class*="action"])')
            for elem in text_elements:
                text = elem.get_text(strip=True)
                if (len(text) > 20 and 
                    not any(keyword in text.lower() for keyword in ['add to cart', 'quick view', 'compare', 'wishlist']) and
                    text.lower() != result.get("name", "").lower()):
                    result["description"] = text[:200] + ('...' if len(text) > 200 else '')
                    break
        
        result["description"] = result.get("description", "")
        
        return result
        
    except Exception as e:
        logger.error(f"Error parsing product item: {str(e)}")
        return {}

def create_session():
    """
    Create a session with proper headers for web scraping
    """
    session = requests.Session()
    session.headers.update({
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
    })
    return session

def fetch_page(session: requests.Session, url: str) -> str:
    """
    Fetch page content with error handling
    """
    try:
        response = session.get(url, timeout=30)
        if response.status_code == 200:
            return response.text
        else:
            logger.error(f"HTTP {response.status_code} for URL: {url}")
            return ""
    except Exception as e:
        logger.error(f"Error fetching {url}: {str(e)}")
        return ""

@app.get("/search")
def search_products(query: str = Query(..., description="Search query")) -> Dict[str, Any]:
    """
    Search for products by query
    """
    try:
        search_url = f"{BASE_URL}/search?q={quote(query)}"
        logger.info(f"Searching for products with query: {query}")
        
        products = parse_products_with_browser(search_url)
        
        return {
            "success": True,
            "query": query,
            "total_products": len(products),
            "products": products,
            "timestamp": datetime.now().isoformat()
        }
        
    except Exception as e:
        logger.error(f"Error in search_products: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")

@app.get("/shop-by-mark")
def shop_by_mark(mark: str = Query(..., description="Car mark/brand")) -> Dict[str, Any]:
    """
    Get products by car mark/brand
    """
    try:
        mark_url = f"{BASE_URL}/shop-by-mark/{mark.lower()}"
        logger.info(f"Fetching products for mark: {mark}")
        
        products = parse_products_with_browser(mark_url)
        
        return {
            "success": True,
            "mark": mark,
            "total_products": len(products),
            "products": products,
            "timestamp": datetime.now().isoformat()
        }
        
    except Exception as e:
        logger.error(f"Error in shop_by_mark: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Failed to fetch products for mark {mark}: {str(e)}")

@app.get("/product-details")
def get_product_details(url: str = Query(..., description="Product URL or slug")) -> Dict[str, Any]:
    """
    Get detailed information about a specific product
    """
    try:
        if not url.startswith('http'):
            if url.startswith('/'):
                product_url = f"{BASE_URL}{url}"
            else:
                product_url = f"{BASE_URL}/{url}"
        else:
            product_url = url
            
        logger.info(f"Fetching product details from: {product_url}")
        
        photos = extract_photos_with_browser(product_url)
        
        browser = None
        try:
            browser = create_browser()
            browser.get(product_url)
            
            WebDriverWait(browser, 10).until(
                EC.presence_of_element_located((By.TAG_NAME, "body"))
            )
            
            soup = BeautifulSoup(browser.page_source, 'html.parser')
            
            name = ""
            price = ""
            description = ""
            
            name_selectors = [
                'h1.product-title',
                'h1.product-name', 
                '.product-title',
                '.product-name',
                'h1',
                '.page-title'
            ]
            
            for selector in name_selectors:
                name_elem = soup.select_one(selector)
                if name_elem:
                    name = name_elem.get_text(strip=True)
                    break
            
            price_selectors = [
                '.price',
                '.product-price',
                '.current-price',
                '[class*="price"]'
            ]
            
            for selector in price_selectors:
                price_elem = soup.select_one(selector)
                if price_elem:
                    price_text = price_elem.get_text(strip=True)
                    price_match = re.search(r'[\$€£¥₽]?[\d,]+\.?\d*', price_text)
                    if price_match:
                        price = price_match.group()
                        break
            
            desc_selectors = [
                '.product-description',
                '.product-summary',
                '.description',
                '.summary',
                '.product-details',
                '.product-info'
            ]
            
            for selector in desc_selectors:
                desc_elem = soup.select_one(selector)
                if desc_elem:
                    description = desc_elem.get_text(strip=True)
                    break
            
            return {
                "success": True,
                "product": {
                    "name": name,
                    "url": product_url,
                    "price": price,
                    "description": description,
                    "photos": photos
                },
                "timestamp": datetime.now().isoformat()
            }
            
        finally:
            if browser:
                browser.quit()
                
    except Exception as e:
        logger.error(f"Error in get_product_details: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Failed to fetch product details: {str(e)}")

@app.get("/analyze-structure")
def analyze_structure(url: str = Query(..., description="URL to analyze")) -> Dict[str, Any]:
    """
    Analyze page structure to understand selectors
    """
    try:
        logger.info(f"Analyzing page structure for: {url}")
        structure = analyze_page_structure(url)
        
        return {
            "success": True,
            "url": url,
            "structure": structure,
            "timestamp": datetime.now().isoformat()
        }
        
    except Exception as e:
        logger.error(f"Error in analyze_structure: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Failed to analyze structure: {str(e)}")

@app.get("/health")
def health_check():
    """
    Health check endpoint
    """
    return {
        "status": "healthy",
        "timestamp": datetime.now().isoformat(),
        "version": "2.0.0"
    }

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)