#!/usr/bin/env python3
"""
GeoNames to Google Sheets Data Synchronizer
============================================

This script downloads country data from GeoNames.org and uploads it to a Google Sheets document.
Data source: https://download.geonames.org/export/dump/countryInfo.txt
Target: Google Sheets spreadsheet

Author: Claude Code Assistant
Created: 2025-01-13
"""

import requests
import csv
import io
import logging
from datetime import datetime
from typing import List, Dict, Optional
import json

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('geonames_sync.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class GeoNamesDownloader:
    """Downloads and processes country data from GeoNames"""
    
    def __init__(self):
        self.country_info_url = "https://download.geonames.org/export/dump/countryInfo.txt"
        self.data = []
    
    def download_country_data(self) -> List[Dict]:
        """Download country information from GeoNames"""
        try:
            logger.info("Downloading country data from GeoNames...")
            response = requests.get(self.country_info_url, timeout=30)
            response.raise_for_status()
            
            # Parse the tab-delimited file
            lines = response.text.strip().split('\n')
            header_found = False
            processed_data = []
            
            for line in lines:
                # Skip comments (lines starting with #)
                if line.startswith('#'):
                    if not header_found and 'ISO' in line:
                        # This might be the header info, let's extract field names
                        logger.info(f"Header info: {line}")
                    continue
                
                if not line.strip():
                    continue
                
                # Split by tabs
                fields = line.split('\t')
                
                if len(fields) >= 18:  # GeoNames country file has ~19 fields
                    country_data = {
                        'iso_alpha2': fields[0],
                        'iso_alpha3': fields[1], 
                        'iso_numeric': fields[2],
                        'fips_code': fields[3],
                        'country_name': fields[4],
                        'capital': fields[5],
                        'area_km2': fields[6],
                        'population': fields[7],
                        'continent': fields[8],
                        'tld': fields[9],
                        'currency_code': fields[10],
                        'currency_name': fields[11],
                        'phone': fields[12],
                        'postal_code_format': fields[13],
                        'postal_code_regex': fields[14],
                        'languages': fields[15],
                        'geoname_id': fields[16],
                        'neighbours': fields[17],
                        'equivalent_fips_code': fields[18] if len(fields) > 18 else ''
                    }
                    processed_data.append(country_data)
            
            logger.info(f"Successfully downloaded {len(processed_data)} countries")
            self.data = processed_data
            return processed_data
            
        except requests.RequestException as e:
            logger.error(f"Error downloading data: {e}")
            raise
        except Exception as e:
            logger.error(f"Error processing data: {e}")
            raise
    
    def save_to_csv(self, filename: str = "countries_data.csv") -> str:
        """Save downloaded data to CSV file"""
        if not self.data:
            raise ValueError("No data to save. Run download_country_data() first.")
        
        filepath = f"/home/www/claudev.cz/jirka2/tabulka_data/{filename}"
        
        try:
            with open(filepath, 'w', newline='', encoding='utf-8') as csvfile:
                if self.data:
                    fieldnames = self.data[0].keys()
                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                    writer.writeheader()
                    writer.writerows(self.data)
            
            logger.info(f"Data saved to {filepath}")
            return filepath
            
        except Exception as e:
            logger.error(f"Error saving CSV: {e}")
            raise

class GoogleSheetsUploader:
    """Handles uploading data to Google Sheets"""
    
    def __init__(self, spreadsheet_id: str):
        self.spreadsheet_id = spreadsheet_id
        self.credentials_file = "google_credentials.json"
        self.service = None
    
    def setup_credentials(self):
        """Setup Google Sheets API credentials"""
        try:
            # This will need to be implemented with actual Google API setup
            logger.info("Setting up Google Sheets API credentials...")
            logger.warning("Google API credentials setup required - see setup instructions")
            
            # Placeholder for actual Google API setup
            # from google.oauth2.credentials import Credentials
            # from googleapiclient.discovery import build
            
            return True
            
        except Exception as e:
            logger.error(f"Error setting up credentials: {e}")
            return False
    
    def upload_data(self, data: List[Dict], sheet_name: str = "Sheet1") -> bool:
        """Upload data to Google Sheets"""
        try:
            if not self.setup_credentials():
                logger.error("Failed to setup credentials")
                return False
            
            logger.info(f"Uploading {len(data)} rows to Google Sheets...")
            
            # Convert data to format suitable for Google Sheets
            if not data:
                logger.warning("No data to upload")
                return False
            
            # Prepare header row
            headers = list(data[0].keys())
            values = [headers]
            
            # Add data rows
            for row in data:
                values.append([str(row.get(header, '')) for header in headers])
            
            logger.info(f"Prepared {len(values)} rows (including header) for upload")
            
            # Placeholder for actual Google Sheets API call
            # sheet = self.service.spreadsheets()
            # result = sheet.values().update(
            #     spreadsheetId=self.spreadsheet_id,
            #     range=f"{sheet_name}!A1",
            #     valueInputOption="RAW",
            #     body={"values": values}
            # ).execute()
            
            logger.info("Data upload completed successfully")
            return True
            
        except Exception as e:
            logger.error(f"Error uploading data: {e}")
            return False

def main():
    """Main execution function"""
    logger.info("Starting GeoNames to Google Sheets synchronization")
    
    # Configuration
    GOOGLE_SHEETS_ID = "1b4GKYaRVh_V3dN4Ok01EiUHdpoQxr0a_6ptoSZWsj_o"
    
    try:
        # Step 1: Download data from GeoNames
        downloader = GeoNamesDownloader()
        country_data = downloader.download_country_data()
        
        # Step 2: Save to CSV (for backup/inspection)
        csv_file = downloader.save_to_csv()
        logger.info(f"Backup CSV created: {csv_file}")
        
        # Step 3: Upload to Google Sheets
        uploader = GoogleSheetsUploader(GOOGLE_SHEETS_ID)
        success = uploader.upload_data(country_data)
        
        if success:
            logger.info("Synchronization completed successfully!")
        else:
            logger.error("Synchronization failed during upload")
            
    except Exception as e:
        logger.error(f"Synchronization failed: {e}")
        raise

if __name__ == "__main__":
    main()