"""
Analyzes images and selects highest resolution versions
Removes duplicates and creates optimized image list
"""
import os
from pathlib import Path
from PIL import Image
import json

def get_image_info(image_path):
    """Get image dimensions and file size"""
    try:
        with Image.open(image_path) as img:
            width, height = img.size
            file_size = os.path.getsize(image_path)
            resolution = width * height
            return {
                'width': width,
                'height': height,
                'file_size': file_size,
                'resolution': resolution
            }
    except Exception as e:
        print(f"Error reading {image_path}: {e}")
        return None

def group_similar_images(folder_path):
    """Group images by base name (without suffix)"""
    groups = {}
    
    for file in os.listdir(folder_path):
        if file.lower().endswith(('.jpg', '.jpeg', '.png')):
            # Extract base name (e.g., "01" from "01.jpg", "01_1.jpg", "01_2.jpg")
            base_name = file.split('.')[0].split('_')[0].split('-')[0]
            
            if base_name not in groups:
                groups[base_name] = []
            
            full_path = os.path.join(folder_path, file)
            info = get_image_info(full_path)
            
            if info:
                groups[base_name].append({
                    'filename': file,
                    'path': full_path,
                    **info
                })
    
    return groups

def select_best_images(groups):
    """Select highest resolution image from each group"""
    best_images = {}
    
    for base_name, images in groups.items():
        if not images:
            continue
            
        # Sort by resolution (width * height), then by file size
        best = max(images, key=lambda x: (x['resolution'], x['file_size']))
        best_images[base_name] = best['filename']
    
    return best_images

def analyze_all_folders(base_path='downloaded_images'):
    """Analyze all image folders and create optimized list"""
    results = {}
    
    for folder in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder)
        
        if not os.path.isdir(folder_path):
            continue
        
        print(f"\nAnalyzing: {folder}")
        groups = group_similar_images(folder_path)
        best_images = select_best_images(groups)
        
        results[folder] = {
            'total_files': sum(len(imgs) for imgs in groups.values()),
            'unique_images': len(best_images),
            'best_images': best_images
        }
        
        print(f"  Total files: {results[folder]['total_files']}")
        print(f"  Unique images: {results[folder]['unique_images']}")
        print(f"  Duplicates removed: {results[folder]['total_files'] - results[folder]['unique_images']}")
    
    return results

def create_image_manifest(results, output_file='image_manifest.json'):
    """Create JSON manifest with best images for each folder"""
    manifest = {}
    
    for folder, data in results.items():
        # Sort by base name numerically
        sorted_images = sorted(
            data['best_images'].items(),
            key=lambda x: int(''.join(filter(str.isdigit, x[0])) or '0')
        )
        
        manifest[folder] = [img[1] for img in sorted_images]
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(manifest, f, indent=2, ensure_ascii=False)
    
    print(f"\n✓ Image manifest created: {output_file}")
    return manifest

if __name__ == "__main__":
    print("="*70)
    print("IMAGE ANALYSIS - Selecting Best Resolution Images")
    print("="*70)
    
    results = analyze_all_folders()
    manifest = create_image_manifest(results)
    
    # Print summary
    total_original = sum(r['total_files'] for r in results.values())
    total_unique = sum(r['unique_images'] for r in results.values())
    total_duplicates = total_original - total_unique
    
    print("\n" + "="*70)
    print("SUMMARY")
    print("="*70)
    print(f"Total original files: {total_original}")
    print(f"Unique images (best resolution): {total_unique}")
    print(f"Duplicates/lower quality removed: {total_duplicates}")
    print(f"Space saved: {(total_duplicates / total_original * 100):.1f}%")
    print("="*70)

