#!/usr/bin/env python3
"""
Restaurant Photo Downloader
Downloads photos for each restaurant and stores them in database and local storage
"""

import requests
import os
import uuid
import time
import re
from datetime import datetime
from urllib.parse import quote_plus, urlparse
from sqlalchemy import create_engine, text
from PIL import Image
import urllib.request
from typing import List, Dict, Optional, Tuple
import random

class RestaurantPhotoDownloader:
    """Downloads and manages restaurant photos"""
    
    def __init__(self):
        # Database connection
        DATABASE_URL = f"postgresql://postgres:{quote_plus('F@f@k0s!!')}@localhost:5432/bookbeach"
        self.engine = create_engine(DATABASE_URL)
        
        # Photo storage directory (following memory requirement)
        self.photo_dir = "restaurant_photos"
        os.makedirs(self.photo_dir, exist_ok=True)
        
        # Request session with proper headers
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.9',
            'Connection': 'keep-alive',
        })
        
        # Photo sources - using free image services
        self.photo_sources = [
            "https://source.unsplash.com/800x600/?restaurant,{cuisine}",
            "https://source.unsplash.com/800x600/?{name},restaurant",
            "https://source.unsplash.com/800x600/?greek,taverna,{cuisine}",
            "https://source.unsplash.com/800x600/?mediterranean,food,{cuisine}",
            "https://picsum.photos/800/600?random={random_id}",
            "https://source.unsplash.com/800x600/?athens,restaurant,{cuisine}",
            "https://source.unsplash.com/800x600/?{cuisine},food,restaurant"
        ]
    
    def get_restaurants_without_photos(self) -> List:
        """Get restaurants that don't have photos yet"""
        with self.engine.connect() as db:
            result = db.execute(text("""
                SELECT r.restaurant_id, r.restaurant_name, r.cuisine_type, r.description
                FROM restaurants r
                LEFT JOIN restaurant_photos rp ON r.restaurant_id = rp.restaurant_id
                WHERE rp.photo_id IS NULL
                ORDER BY r.restaurant_name
            """)).fetchall()
            
            return list(result)
    
    def clean_text_for_url(self, text: str) -> str:
        """Clean text to make it suitable for URL usage"""
        if not text:
            return ""
        
        # Remove special characters and normalize
        cleaned = re.sub(r'[^\w\s-]', '', text.lower())
        cleaned = re.sub(r'[-\s]+', '+', cleaned.strip())
        return cleaned
    
    def generate_photo_urls(self, restaurant_name: str, cuisine_type: str) -> List[str]:
        """Generate potential photo URLs for a restaurant"""
        urls = []
        
        # Clean inputs
        clean_name = self.clean_text_for_url(restaurant_name)
        clean_cuisine = self.clean_text_for_url(cuisine_type) if cuisine_type else "restaurant"
        
        # Generate URLs from different sources
        for source_template in self.photo_sources:
            try:
                if "{name}" in source_template:
                    url = source_template.format(name=clean_name, cuisine=clean_cuisine)
                elif "{cuisine}" in source_template:
                    url = source_template.format(cuisine=clean_cuisine)
                elif "{random_id}" in source_template:
                    url = source_template.format(random_id=random.randint(1, 10000))
                else:
                    url = source_template
                
                urls.append(url)
            except:
                continue
        
        return urls
    
    def download_photo(self, url: str, restaurant_name: str, photo_type: str = 'general') -> Optional[Dict]:
        """Download a single photo and return metadata"""
        try:
            # Clean restaurant name for filename
            safe_name = re.sub(r'[^\w\s-]', '', restaurant_name)
            safe_name = re.sub(r'[-\s]+', '_', safe_name.strip())[:50]
            
            # Generate unique filename
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            random_suffix = random.randint(1000, 9999)
            filename = f"{safe_name}_{photo_type}_{timestamp}_{random_suffix}.jpg"
            filepath = os.path.join(self.photo_dir, filename)
            
            # Download image
            print(f"  📷 Downloading from: {url}")
            response = self.session.get(url, timeout=15, stream=True)
            response.raise_for_status()
            
            # Save image
            with open(filepath, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            
            # Get image dimensions and file size
            try:
                with Image.open(filepath) as img:
                    width, height = img.size
                file_size = os.path.getsize(filepath)
            except:
                width = height = None
                file_size = os.path.getsize(filepath)
            
            metadata = {
                'photo_path': filepath,
                'photo_url': url,
                'photo_type': photo_type,
                'file_size': file_size,
                'width': width,
                'height': height,
                'filename': filename
            }
            
            print(f"  ✅ Downloaded: {filename} ({file_size} bytes, {width}x{height})")
            return metadata
            
        except Exception as e:
            print(f"  ❌ Download failed: {e}")
            return None
    
    def save_photo_to_database(self, restaurant_id: str, photo_metadata: Dict, is_primary: bool = False) -> bool:
        """Save photo metadata to database"""
        try:
            with self.engine.connect() as db:
                photo_id = str(uuid.uuid4())
                
                db.execute(text("""
                    INSERT INTO restaurant_photos (
                        photo_id, restaurant_id, photo_path, photo_url, photo_type,
                        file_size, width, height, is_primary, uploaded_at, created_at
                    ) VALUES (
                        :photo_id, :restaurant_id, :photo_path, :photo_url, :photo_type,
                        :file_size, :width, :height, :is_primary, NOW(), NOW()
                    )
                """), {
                    'photo_id': photo_id,
                    'restaurant_id': restaurant_id,
                    'photo_path': photo_metadata['photo_path'],
                    'photo_url': photo_metadata['photo_url'],
                    'photo_type': photo_metadata['photo_type'],
                    'file_size': photo_metadata['file_size'],
                    'width': photo_metadata['width'],
                    'height': photo_metadata['height'],
                    'is_primary': is_primary
                })
                
                db.commit()
                print(f"  💾 Saved to database: {photo_id}")
                return True
                
        except Exception as e:
            print(f"  ❌ Database save failed: {e}")
            return False
    
    def download_photos_for_restaurant(self, restaurant_id: str, restaurant_name: str, cuisine_type: str, max_photos: int = 3) -> int:
        """Download multiple photos for a single restaurant"""
        print(f"\n🍽️ Processing: {restaurant_name} ({cuisine_type})")
        
        # Generate photo URLs
        photo_urls = self.generate_photo_urls(restaurant_name, cuisine_type)
        
        downloaded_count = 0
        photo_types = ['general', 'interior', 'food']
        
        for i, url in enumerate(photo_urls[:max_photos]):
            if downloaded_count >= max_photos:
                break
            
            photo_type = photo_types[downloaded_count] if downloaded_count < len(photo_types) else 'general'
            is_primary = (downloaded_count == 0)  # First photo is primary
            
            # Download photo
            photo_metadata = self.download_photo(url, restaurant_name, photo_type)
            
            if photo_metadata:
                # Save to database
                if self.save_photo_to_database(restaurant_id, photo_metadata, is_primary):
                    downloaded_count += 1
                else:
                    # Clean up file if database save failed
                    try:
                        os.remove(photo_metadata['photo_path'])
                    except:
                        pass
            
            # Be polite to image services
            time.sleep(1)
        
        print(f"  🎯 Downloaded {downloaded_count} photos for {restaurant_name}")
        return downloaded_count
    
    def download_all_restaurant_photos(self, max_restaurants: Optional[int] = None) -> Dict:
        """Download photos for all restaurants without photos"""
        print("🚀 Starting restaurant photo download process...")
        
        # Get restaurants without photos
        restaurants = self.get_restaurants_without_photos()
        
        if max_restaurants:
            restaurants = restaurants[:max_restaurants]
        
        print(f"📊 Found {len(restaurants)} restaurants without photos")
        
        if not restaurants:
            print("✅ All restaurants already have photos!")
            return {'processed': 0, 'successful': 0, 'failed': 0}
        
        successful = 0
        failed = 0
        
        for i, (restaurant_id, restaurant_name, cuisine_type, description) in enumerate(restaurants, 1):
            print(f"\n📍 Progress: {i}/{len(restaurants)}")
            
            try:
                photos_downloaded = self.download_photos_for_restaurant(
                    str(restaurant_id), 
                    restaurant_name, 
                    cuisine_type or 'restaurant'
                )
                
                if photos_downloaded > 0:
                    successful += 1
                else:
                    failed += 1
                    print(f"  ⚠️ No photos downloaded for {restaurant_name}")
                
            except Exception as e:
                failed += 1
                print(f"  ❌ Failed to process {restaurant_name}: {e}")
            
            # Progress checkpoint every 10 restaurants
            if i % 10 == 0:
                print(f"\n📊 Checkpoint: {i}/{len(restaurants)} processed (✅ {successful} successful, ❌ {failed} failed)")
        
        return {
            'processed': len(restaurants),
            'successful': successful,
            'failed': failed
        }
    
    def get_photo_statistics(self) -> Dict:
        """Get statistics about restaurant photos"""
        with self.engine.connect() as db:
            # Total counts
            total_result = db.execute(text("SELECT COUNT(*) FROM restaurants")).fetchone()
            total_restaurants = total_result[0] if total_result else 0
            
            with_photos_result = db.execute(text("""
                SELECT COUNT(DISTINCT restaurant_id) FROM restaurant_photos
            """)).fetchone()
            restaurants_with_photos = with_photos_result[0] if with_photos_result else 0
            
            total_photos_result = db.execute(text("SELECT COUNT(*) FROM restaurant_photos")).fetchone()
            total_photos = total_photos_result[0] if total_photos_result else 0
            
            # Average photos per restaurant
            avg_result = db.execute(text("""
                SELECT ROUND(AVG(photo_count), 2) 
                FROM (
                    SELECT COUNT(*) as photo_count 
                    FROM restaurant_photos 
                    GROUP BY restaurant_id
                ) as counts
            """)).fetchone()
            avg_photos = avg_result[0] if avg_result else 0
            
            # Photo types distribution
            type_distribution = db.execute(text("""
                SELECT photo_type, COUNT(*) 
                FROM restaurant_photos 
                GROUP BY photo_type 
                ORDER BY COUNT(*) DESC
            """)).fetchall()
            
            return {
                'total_restaurants': total_restaurants,
                'restaurants_with_photos': restaurants_with_photos,
                'restaurants_without_photos': total_restaurants - restaurants_with_photos,
                'total_photos': total_photos,
                'avg_photos_per_restaurant': float(avg_photos) if avg_photos else 0,
                'photo_types': {row[0]: row[1] for row in type_distribution}
            }

def main():
    """Main execution function"""
    downloader = RestaurantPhotoDownloader()
    
    try:
        # Show initial statistics
        print("📊 Initial Statistics:")
        stats = downloader.get_photo_statistics()
        print(f"   🏪 Total restaurants: {stats['total_restaurants']}")
        print(f"   📷 Restaurants with photos: {stats['restaurants_with_photos']}")
        print(f"   📷 Restaurants without photos: {stats['restaurants_without_photos']}")
        print(f"   🖼️ Total photos: {stats['total_photos']}")
        
        if stats['restaurants_without_photos'] == 0:
            print("\n✅ All restaurants already have photos!")
            return
        
        # Download photos
        print(f"\n🔄 Starting photo download for {stats['restaurants_without_photos']} restaurants...")
        results = downloader.download_all_restaurant_photos()
        
        # Show final statistics
        print(f"\n🎉 Photo download completed!")
        print(f"   📊 Processed: {results['processed']} restaurants")
        print(f"   ✅ Successful: {results['successful']} restaurants")
        print(f"   ❌ Failed: {results['failed']} restaurants")
        
        # Updated statistics
        final_stats = downloader.get_photo_statistics()
        print(f"\n📊 Final Statistics:")
        print(f"   🏪 Total restaurants: {final_stats['total_restaurants']}")
        print(f"   📷 Restaurants with photos: {final_stats['restaurants_with_photos']}")
        print(f"   🖼️ Total photos: {final_stats['total_photos']}")
        print(f"   📊 Average photos per restaurant: {final_stats['avg_photos_per_restaurant']}")
        
        if final_stats['photo_types']:
            print(f"   🏷️ Photo types:")
            for photo_type, count in final_stats['photo_types'].items():
                print(f"      {photo_type}: {count} photos")
        
        print(f"\n📁 Photos stored in: {downloader.photo_dir}")
        
    except Exception as e:
        print(f"❌ Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()