#!/usr/bin/env python3
"""
Download Unique Beach Photos Script
Downloads unique, location-specific photos for beaches using multiple free sources
No API keys required!
"""

import os
import requests
import json
import time
import uuid
from bs4 import BeautifulSoup, Tag
from urllib.parse import quote_plus

def search_duckduckgo_images(beach_name, city):
    """Search for beach images using DuckDuckGo (no API key required)"""
    search_queries = [
        f'"{beach_name}" "{city}" Greece beach',
        f'{beach_name} {city} Greece παραλία',
        f'{beach_name} beach {city} Greece'
    ]
    
    # Headers to avoid being blocked
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    # Try each search query
    for search_query in search_queries:
        try:
            # Search using DuckDuckGo (no API key required)
            search_url = f"https://duckduckgo.com/html/?q={search_query}"
            response = requests.get(search_url, headers=headers, timeout=10)
            
            if response.status_code == 200:
                soup = BeautifulSoup(response.content, 'html.parser')
                results = soup.find_all('img')
                
                if results:
                    print(f"✅ Found {len(results)} potential images for '{search_query}'")
                    # Filter for beach-related images
                    beach_images = []
                    for img in results[:15]:  # Top 15 results
                        # Ensure img is a Tag object before accessing attributes
                        if isinstance(img, Tag):
                            img_src = img.get('src', '')
                            if img_src and (isinstance(img_src, str)) and ('beach' in img_src.lower() or 'greece' in img_src.lower() or beach_name.lower() in img_src.lower()):
                                beach_images.append({
                                    'url': img_src,
                                    'source': 'DuckDuckGo Search',
                                    'title': search_query
                                })
                    
                    if beach_images:
                        return beach_images[:5]  # Return top 5
                        
        except Exception as e:
            print(f"❌ Search failed for '{search_query}': {str(e)}")
            continue
    
    return None

def download_image_from_url(image_url, beach_name, city, upload_dir):
    """Download image from a direct URL"""
    try:
        # Create filename
        safe_beach_name = "".join(c for c in beach_name if c.isalnum() or c in (' ', '-', '_')).strip()
        safe_beach_name = safe_beach_name.replace(' ', '_').lower()
        safe_city = city.replace(' ', '_').lower()
        
        # Get file extension from URL or default to .jpg
        ext = '.jpg'
        if '.' in image_url:
            ext = '.' + image_url.split('.')[-1].split('?')[0]
            if ext.lower() not in ['.jpg', '.jpeg', '.png', '.gif']:
                ext = '.jpg'
        
        filename = f"{safe_beach_name}_{safe_city}_duckduckgo_{uuid.uuid4().hex[:8]}{ext}"
        file_path = os.path.join(upload_dir, filename)
        
        # Set headers to mimic a browser request
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        # Download the image
        response = requests.get(image_url, headers=headers, stream=True, timeout=30)
        response.raise_for_status()
        
        with open(file_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        
        # Verify file size (at least 5KB)
        if os.path.getsize(file_path) < 5000:
            os.remove(file_path)
            return None
            
        print(f"✅ Downloaded: {filename}")
        return filename
        
    except Exception as e:
        print(f"❌ Failed to download image from {image_url}: {str(e)}")
        return None

def main():
    """Main function to download unique photos for all beaches in the JSON file"""
    # Load beaches from JSON file
    json_file_path = "greek_beaches_export.json"
    
    try:
        with open(json_file_path, 'r', encoding='utf-8') as f:
            beaches = json.load(f)
    except Exception as e:
        print(f"❌ Failed to load JSON file: {str(e)}")
        return
    
    print(f"🏖️ Found {len(beaches)} beaches in the database")
    
    # Create upload directory if it doesn't exist
    upload_dir = "backend/uploads/beach_photos"
    os.makedirs(upload_dir, exist_ok=True)
    
    success_count = 0
    failed_count = 0
    
    print(f"📸 Starting unique photo download process...")
    print("=" * 60)
    
    for i, beach in enumerate(beaches, 1):
        beach_name = beach['name']
        city = beach['city']
        
        print(f"\n[{i}/{len(beaches)}] Processing: {beach_name}, {city}")
        
        # Search for photos using DuckDuckGo
        photos = search_duckduckgo_images(beach_name, city)
        
        if not photos:
            print(f"❌ No unique photos found for {beach_name}")
            failed_count += 1
            continue
        
        # Try to download the best photo
        photo_downloaded = False
        for photo_data in photos:
            filename = download_image_from_url(photo_data['url'], beach_name, city, upload_dir)
            
            if filename:
                photo_downloaded = True
                success_count += 1
                break
        
        if not photo_downloaded:
            print(f"❌ Failed to download unique photo for {beach_name}")
            failed_count += 1
        
        # Rate limiting - be respectful to servers
        time.sleep(2)  # 2 second delay between requests
    
    print(f"\n📊 DOWNLOAD COMPLETE:")
    print(f"✅ Successful: {success_count} beaches")
    print(f"❌ Failed: {failed_count} beaches")
    print(f"📈 Success rate: {success_count/len(beaches)*100:.1f}%")
    
    if success_count > 0:
        print(f"\n🎉 {success_count} unique beach photos successfully downloaded!")
        print(f"📁 Photos saved to: {upload_dir}")

if __name__ == "__main__":
    main()