#!/usr/bin/env python3
"""
Formátování US států pro Google Sheets
"""

def format_us_states():
    """Zpracuj a formátuj US státy pro Google Sheets"""
    
    raw_data = """US,AE,"Armed Forces Europe, Middle East, & Canada"
US,AK,"Alaska"
US,AL,"Alabama"
US,AP,"Armed Forces Pacific"
US,AR,"Arkansas"
US,AS,"American Samoa"
US,AZ,"Arizona"
US,CA,"California"
US,CO,"Colorado"
US,CT,"Connecticut"
US,DC,"District of Columbia"
US,DE,"Delaware"
US,FL,"Florida"
US,FM,"Federated States of Micronesia"
US,GA,"Georgia"
US,GU,"Guam"
US,HI,"Hawaii"
US,IA,"Iowa"
US,ID,"Idaho"
US,IL,"Illinois"
US,IN,"Indiana"
US,KS,"Kansas"
US,KY,"Kentucky"
US,LA,"Louisiana"
US,MA,"Massachusetts"
US,MD,"Maryland"
US,ME,"Maine"
US,MH,"Marshall Islands"
US,MI,"Michigan"
US,MN,"Minnesota"
US,MO,"Missouri"
US,MP,"Northern Mariana Islands"
US,MS,"Mississippi"
US,MT,"Montana"
US,NC,"North Carolina"
US,ND,"North Dakota"
US,NE,"Nebraska"
US,NH,"New Hampshire"
US,NJ,"New Jersey"
US,NM,"New Mexico"
US,NV,"Nevada"
US,NY,"New York"
US,OH,"Ohio"
US,OK,"Oklahoma"
US,OR,"Oregon"
US,PA,"Pennsylvania"
US,PW,"Palau"
US,RI,"Rhode Island"
US,SC,"South Carolina"
US,SD,"South Dakota"
US,TN,"Tennessee"
US,TX,"Texas"
US,UT,"Utah"
US,VA,"Virginia"
US,VI,"Virgin Islands"
US,VT,"Vermont"
US,WA,"Washington"
US,WI,"Wisconsin"
US,WV,"West Virginia"
US,WY,"Wyoming" """
    
    print("🇺🇸 Zpracovávám US státy a teritoria...")
    
    lines = raw_data.strip().split('\n')
    processed_states = []
    
    # Headers for the table
    headers = ["Country", "State_Code", "State_Name", "Type", "Region"]
    
    # Helper function to determine type and region
    def get_state_info(code, name):
        # Determine type
        if code in ['AE', 'AP']:
            state_type = "Armed Forces"
            region = "Military"
        elif code in ['AS', 'FM', 'GU', 'MH', 'MP', 'PW', 'VI']:
            state_type = "Territory"
            region = "Territory"
        elif code == 'DC':
            state_type = "Federal District"
            region = "Mid-Atlantic"
        else:
            state_type = "State"
            # Assign regions based on common US regional divisions
            northeast = ['CT', 'ME', 'MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'VT']
            midwest = ['IL', 'IN', 'IA', 'KS', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI']
            south = ['AL', 'AR', 'DE', 'FL', 'GA', 'KY', 'LA', 'MD', 'MS', 'NC', 'OK', 'SC', 'TN', 'TX', 'VA', 'WV']
            west = ['AK', 'AZ', 'CA', 'CO', 'HI', 'ID', 'MT', 'NV', 'NM', 'OR', 'UT', 'WA', 'WY']
            
            if code in northeast:
                region = "Northeast"
            elif code in midwest:
                region = "Midwest"
            elif code in south:
                region = "South"
            elif code in west:
                region = "West"
            else:
                region = "Other"
        
        return state_type, region
    
    # Process each line
    for line in lines:
        # Parse CSV-like format
        parts = line.split(',')
        if len(parts) >= 3:
            country = parts[0].strip()
            state_code = parts[1].strip()
            state_name = parts[2].strip().strip('"')  # Remove quotes
            
            state_type, region = get_state_info(state_code, state_name)
            
            processed_states.append([
                country,
                state_code,
                state_name,
                state_type,
                region
            ])
    
    print(f"✅ Zpracováno {len(processed_states)} států a teritorií")
    return headers, processed_states

def create_ssv_format(headers, states):
    """Vytvoř SSV formát (středníky) pro Google Sheets"""
    
    print("💾 Vytvářím SSV formát pro Google Sheets...")
    
    ssv_lines = []
    
    # Header row
    ssv_lines.append(';'.join(headers))
    
    # Data rows
    for state in states:
        # Clean data for SSV format
        clean_state = []
        for cell in state:
            clean_cell = str(cell).replace(';', ',')  # Replace semicolons with commas
            clean_state.append(clean_cell)
        
        ssv_lines.append(';'.join(clean_state))
    
    # Save to file
    with open('us_states_SSV_ready.csv', 'w', encoding='utf-8') as f:
        f.write('\n'.join(ssv_lines))
    
    print(f"💾 Uloženo do: us_states_SSV_ready.csv")
    return ssv_lines

def create_enhanced_format(headers, states):
    """Vytvoř rozšířený formát s dodatečnými informacemi"""
    
    print("🔧 Vytvářím rozšířený formát...")
    
    # Enhanced headers
    enhanced_headers = [
        "Country",
        "State_Code", 
        "State_Name",
        "State_Type",
        "Region",
        "Capital",
        "Nickname",
        "Statehood_Year"
    ]
    
    # Additional data for major states (sample)
    additional_info = {
        'AL': ('Montgomery', 'Heart of Dixie', '1819'),
        'AK': ('Juneau', 'Last Frontier', '1959'),
        'AZ': ('Phoenix', 'Grand Canyon State', '1912'),
        'AR': ('Little Rock', 'Natural State', '1836'),
        'CA': ('Sacramento', 'Golden State', '1850'),
        'CO': ('Denver', 'Centennial State', '1876'),
        'CT': ('Hartford', 'Constitution State', '1788'),
        'DE': ('Dover', 'First State', '1787'),
        'FL': ('Tallahassee', 'Sunshine State', '1845'),
        'GA': ('Atlanta', 'Peach State', '1788'),
        'HI': ('Honolulu', 'Aloha State', '1959'),
        'ID': ('Boise', 'Gem State', '1890'),
        'IL': ('Springfield', 'Prairie State', '1818'),
        'IN': ('Indianapolis', 'Hoosier State', '1816'),
        'IA': ('Des Moines', 'Hawkeye State', '1846'),
        'KS': ('Topeka', 'Sunflower State', '1861'),
        'KY': ('Frankfort', 'Bluegrass State', '1792'),
        'LA': ('Baton Rouge', 'Pelican State', '1812'),
        'ME': ('Augusta', 'Pine Tree State', '1820'),
        'MD': ('Annapolis', 'Old Line State', '1788'),
        'MA': ('Boston', 'Bay State', '1788'),
        'MI': ('Lansing', 'Great Lakes State', '1837'),
        'MN': ('Saint Paul', 'Land of 10,000 Lakes', '1858'),
        'MS': ('Jackson', 'Magnolia State', '1817'),
        'MO': ('Jefferson City', 'Show Me State', '1821'),
        'MT': ('Helena', 'Big Sky Country', '1889'),
        'NE': ('Lincoln', 'Cornhusker State', '1867'),
        'NV': ('Carson City', 'Silver State', '1864'),
        'NH': ('Concord', 'Live Free or Die', '1788'),
        'NJ': ('Trenton', 'Garden State', '1787'),
        'NM': ('Santa Fe', 'Land of Enchantment', '1912'),
        'NY': ('Albany', 'Empire State', '1788'),
        'NC': ('Raleigh', 'Tar Heel State', '1789'),
        'ND': ('Bismarck', 'Peace Garden State', '1889'),
        'OH': ('Columbus', 'Buckeye State', '1803'),
        'OK': ('Oklahoma City', 'Sooner State', '1907'),
        'OR': ('Salem', 'Beaver State', '1859'),
        'PA': ('Harrisburg', 'Keystone State', '1787'),
        'RI': ('Providence', 'Ocean State', '1790'),
        'SC': ('Columbia', 'Palmetto State', '1788'),
        'SD': ('Pierre', 'Mount Rushmore State', '1889'),
        'TN': ('Nashville', 'Volunteer State', '1796'),
        'TX': ('Austin', 'Lone Star State', '1845'),
        'UT': ('Salt Lake City', 'Beehive State', '1896'),
        'VT': ('Montpelier', 'Green Mountain State', '1791'),
        'VA': ('Richmond', 'Old Dominion', '1788'),
        'WA': ('Olympia', 'Evergreen State', '1889'),
        'WV': ('Charleston', 'Mountain State', '1863'),
        'WI': ('Madison', 'Badger State', '1848'),
        'WY': ('Cheyenne', 'Equality State', '1890'),
        'DC': ('Washington', 'District of Columbia', '1790')
    }
    
    enhanced_states = []
    
    for state in states:
        country, code, name, state_type, region = state
        
        # Get additional info
        if code in additional_info:
            capital, nickname, year = additional_info[code]
        else:
            capital = name  # For territories, capital same as name
            nickname = state_type
            year = 'N/A'
        
        enhanced_states.append([
            country, code, name, state_type, region, capital, nickname, year
        ])
    
    # Create SSV format
    enhanced_lines = []
    enhanced_lines.append(';'.join(enhanced_headers))
    
    for state in enhanced_states:
        clean_state = []
        for cell in state:
            clean_cell = str(cell).replace(';', ',')
            clean_state.append(clean_cell)
        enhanced_lines.append(';'.join(clean_state))
    
    with open('us_states_ENHANCED_SSV.csv', 'w', encoding='utf-8') as f:
        f.write('\n'.join(enhanced_lines))
    
    print(f"💾 Rozšířená verze: us_states_ENHANCED_SSV.csv")
    return enhanced_lines

def main():
    """Hlavní funkce"""
    
    print("🇺🇸 FORMÁTOVÁNÍ US STÁTŮ PRO GOOGLE SHEETS")
    print("=" * 50)
    
    # Process the raw data
    headers, states = format_us_states()
    
    # Create basic SSV format
    ssv_lines = create_ssv_format(headers, states)
    
    # Create enhanced version with more info
    enhanced_lines = create_enhanced_format(headers, states)
    
    print("\n" + "=" * 50)
    print("✅ HOTOVO!")
    print("=" * 50)
    
    print("\n📊 VYTVOŘENÉ SOUBORY:")
    print("1. 🥇 us_states_SSV_ready.csv        (základní verze)")
    print("2. ⭐ us_states_ENHANCED_SSV.csv     (rozšířená s hlavními městy)")
    
    print(f"\n📈 STATISTIKY:")
    print(f"   Států/teritorií: {len(states)}")
    print(f"   Skutečné státy: {len([s for s in states if s[3] == 'State'])}")
    print(f"   Teritoria: {len([s for s in states if s[3] == 'Territory'])}")
    print(f"   Ostatní: {len([s for s in states if s[3] not in ['State', 'Territory']])}")
    
    print(f"\n🎯 KOPÍROVÁNÍ DO GOOGLE SHEETS:")
    print("1. Otevři: us_states_SSV_ready.csv (nebo ENHANCED verzi)")
    print("2. Ctrl+A → Ctrl+C")
    print("3. Google Sheets → A1 → Ctrl+V")
    print("4. ✅ Sloupce se automaticky rozdělí!")
    
    print(f"\n📋 NÁHLED základní verze:")
    print("=" * 30)
    for i, line in enumerate(ssv_lines[:6]):
        if i == 0:
            print(f"HLAVIČKY: {line.replace(';', ' | ')}")
            print("-" * 30)
        else:
            parts = line.split(';')
            print(f"{i}. {parts[2]} ({parts[1]}) - {parts[3]} - {parts[4]}")
    
    print(f"\n... a dalších {len(ssv_lines) - 6} položek")
    
    print("\n🌟 DOPORUČUJI ENHANCED VERZI s hlavními městy a přezdívkami!")

if __name__ == "__main__":
    main()