#!/usr/bin/env python3
"""
Oprava formátu pro správné rozdělení sloupců v Google Sheets
"""

import requests

def create_csv_format():
    """Vytvoř CSV formát který Google Sheets správně rozdělí"""
    
    print("📥 Stahuji data a připravuji CSV formát...")
    
    url = "https://download.geonames.org/export/dump/countryInfo.txt"
    
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        
        lines = response.text.strip().split('\n')
        
        # Headers s lepšími názvy
        headers = [
            "ISO2",
            "ISO3", 
            "Numeric",
            "FIPS",
            "Country",
            "Capital",
            "Area_km2",
            "Population",
            "Continent",
            "Domain",
            "Currency_Code",
            "Currency_Name",
            "Phone",
            "Postal_Format",
            "Postal_Regex",
            "Languages",
            "GeoName_ID",
            "Neighbours"
        ]
        
        countries_data = []
        
        for line in lines:
            if line.startswith('#') or not line.strip():
                continue
            
            fields = line.split('\t')
            if len(fields) >= 17:
                country_row = []
                for i, field in enumerate(fields[:18]):
                    clean_field = field.strip()
                    
                    # Remove commas and quotes that could break CSV
                    clean_field = clean_field.replace(',', ' ')
                    clean_field = clean_field.replace('"', "'")
                    
                    # Format numbers
                    if i == 6 or i == 7:  # Area or Population
                        if clean_field and clean_field.isdigit():
                            clean_field = f"{int(clean_field):,}".replace(',', ' ')
                    elif i == 8:  # Continent
                        continent_map = {
                            'AF': 'Africa', 'AS': 'Asia', 'EU': 'Europe',
                            'NA': 'North America', 'SA': 'South America', 
                            'OC': 'Oceania', 'AN': 'Antarctica'
                        }
                        clean_field = continent_map.get(clean_field, clean_field)
                    
                    country_row.append(clean_field)
                
                while len(country_row) < len(headers):
                    country_row.append('')
                
                countries_data.append(country_row[:len(headers)])
        
        countries_data.sort(key=lambda x: x[4])  # Sort by country name
        
        print(f"✅ Zpracováno {len(countries_data)} zemí")
        return headers, countries_data
        
    except Exception as e:
        print(f"❌ Chyba: {e}")
        return None, None

def create_multiple_formats(headers, countries):
    """Vytvoř několik formátů pro různé způsoby kopírování"""
    
    # Format 1: CSV s čárkami (nejspolehlivější)
    print("💾 Vytvářím CSV formát...")
    csv_lines = []
    csv_lines.append(','.join(f'"{header}"' for header in headers))
    
    for country in countries:
        csv_row = []
        for cell in country:
            # Escape quotes and wrap in quotes
            escaped_cell = str(cell).replace('"', '""')
            csv_row.append(f'"{escaped_cell}"')
        csv_lines.append(','.join(csv_row))
    
    with open('countries_CSV_format.csv', 'w', encoding='utf-8') as f:
        f.write('\n'.join(csv_lines))
    
    # Format 2: Semicolon separated (evropský standard)
    print("💾 Vytvářím SSV formát (středníky)...")
    ssv_lines = []
    ssv_lines.append(';'.join(headers))
    
    for country in countries:
        ssv_lines.append(';'.join(str(cell) for cell in country))
    
    with open('countries_SSV_format.csv', 'w', encoding='utf-8') as f:
        f.write('\n'.join(ssv_lines))
    
    # Format 3: Pipe separated (backup)
    print("💾 Vytvářím PSV formát (|)...")
    psv_lines = []
    psv_lines.append('|'.join(headers))
    
    for country in countries:
        psv_lines.append('|'.join(str(cell) for cell in country))
    
    with open('countries_PSV_format.txt', 'w', encoding='utf-8') as f:
        f.write('\n'.join(psv_lines))
    
    return csv_lines, ssv_lines, psv_lines

def create_google_sheets_ready():
    """Vytvoř speciální formát optimalizovaný pro Google Sheets"""
    
    print("🎯 Vytvářím Google Sheets optimalizovaný formát...")
    
    url = "https://download.geonames.org/export/dump/countryInfo.txt"
    response = requests.get(url, timeout=30)
    lines = response.text.strip().split('\n')
    
    # Simplified headers (short, no special chars)
    simple_headers = [
        "ISO2", "ISO3", "Number", "FIPS", "Country", "Capital", 
        "Area", "Population", "Continent", "Domain", "Currency", 
        "CurrencyName", "Phone", "PostalFormat", "Languages", "Neighbours"
    ]
    
    sheets_data = []
    sheets_data.append('\t'.join(simple_headers))  # Use tabs
    
    for line in lines:
        if line.startswith('#') or not line.strip():
            continue
        
        fields = line.split('\t')
        if len(fields) >= 17:
            # Take only the most important fields
            row = [
                fields[0].strip(),  # ISO2
                fields[1].strip(),  # ISO3
                fields[2].strip(),  # Number
                fields[3].strip(),  # FIPS
                fields[4].strip(),  # Country
                fields[5].strip(),  # Capital
                fields[6].strip(),  # Area
                fields[7].strip(),  # Population
                fields[8].strip(),  # Continent
                fields[9].strip(),  # Domain
                fields[10].strip(), # Currency
                fields[11].strip(), # Currency Name
                fields[12].strip(), # Phone
                fields[13].strip(), # Postal Format
                fields[15].strip() if len(fields) > 15 else '', # Languages
                fields[17].strip() if len(fields) > 17 else ''  # Neighbours
            ]
            
            # Clean special characters that might break parsing
            cleaned_row = []
            for cell in row:
                clean = cell.replace('\r', '').replace('\n', ' ')
                # Remove any remaining tabs that could mess up formatting
                clean = clean.replace('\t', ' ')
                cleaned_row.append(clean)
            
            sheets_data.append('\t'.join(cleaned_row))
    
    # Save with different encoding to ensure compatibility
    with open('countries_GOOGLE_SHEETS_ready.txt', 'w', encoding='utf-8-sig') as f:
        f.write('\n'.join(sheets_data))
    
    return sheets_data

def main():
    """Hlavní funkce s několika formáty"""
    
    print("🔧 OPRAVA FORMÁTU PRO GOOGLE SHEETS")
    print("=" * 50)
    
    # Download data
    headers, countries = create_csv_format()
    
    if not countries:
        print("❌ Nepodařilo se získat data")
        return
    
    # Create multiple formats
    csv_lines, ssv_lines, psv_lines = create_multiple_formats(headers, countries)
    
    # Create Google Sheets optimized format
    sheets_data = create_google_sheets_ready()
    
    print("\n" + "=" * 50)
    print("✅ VYTVOŘENO NĚKOLIK FORMÁTŮ!")
    print("=" * 50)
    
    print("\n📊 VYTVOŘENÉ SOUBORY:")
    print("1. 🥇 countries_GOOGLE_SHEETS_ready.txt  (DOPORUČENO)")
    print("2. 📊 countries_CSV_format.csv           (CSV s čárkami)")
    print("3. 🇪🇺 countries_SSV_format.csv           (evropské středníky)")
    print("4. 🔧 countries_PSV_format.txt           (pipe separated)")
    
    print(f"\n📈 STATISTIKY:")
    print(f"   Zemí: {len(countries)}")
    print(f"   Sloupců: {len(headers)}")
    
    print(f"\n🎯 DOPORUČENÝ POSTUP:")
    print("1. ⭐ Zkus první: countries_GOOGLE_SHEETS_ready.txt")
    print("   - Otevři soubor")
    print("   - Ctrl+A → Ctrl+C")
    print("   - Google Sheets → A1 → Ctrl+V")
    print()
    print("2. 🔄 Pokud nefunguje, zkus: countries_CSV_format.csv")
    print("   - Otevři v Excelu nebo Notepadu")
    print("   - Ctrl+A → Ctrl+C")
    print("   - Google Sheets → A1 → Ctrl+V")
    print()
    print("3. 🇪🇺 Pro evropské nastavení: countries_SSV_format.csv")
    
    print(f"\n📋 NÁHLED prvních 3 zemí (GOOGLE_SHEETS formát):")
    print("=" * 50)
    for i, line in enumerate(sheets_data[:4]):
        if i == 0:
            print(f"HLAVIČKY: {line}")
            print("-" * 30)
        else:
            fields = line.split('\t')
            print(f"{i}. {fields[4]} ({fields[0]}) - {fields[5]}")
    
    print("\n🚀 ZKUS GOOGLE_SHEETS_ready.txt JAKO PRVNÍ!")

if __name__ == "__main__":
    main()