#!/usr/bin/env python3
"""
GEO URL Data Analyzer
Analyzes CSV data to find positions of moy.auraodin.com URLs within country groups
"""

import csv
import sys
import os
from collections import defaultdict
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('analyzer.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class GeoUrlAnalyzer:
    def __init__(self, csv_file_path):
        self.csv_file_path = csv_file_path
        self.data = []
        self.headers = []
        self.results = []
        
    def load_csv(self):
        """Load and parse CSV file"""
        try:
            logger.info(f"Loading CSV file: {self.csv_file_path}")
            
            with open(self.csv_file_path, 'r', encoding='utf-8') as file:
                # Try to detect delimiter
                sample = file.read(1024)
                file.seek(0)
                
                delimiter = ','
                if ';' in sample and sample.count(';') > sample.count(','):
                    delimiter = ';'
                
                reader = csv.DictReader(file, delimiter=delimiter)
                self.headers = reader.fieldnames
                
                logger.info(f"Detected columns: {self.headers}")
                
                for row_num, row in enumerate(reader, 1):
                    self.data.append(row)
                
            logger.info(f"Loaded {len(self.data)} rows")
            return True
            
        except Exception as e:
            logger.error(f"Error loading CSV: {e}")
            return False
    
    def identify_columns(self):
        """Identify key columns in the data"""
        country_column = None
        url_column = None
        order_column = None
        
        # Try to identify columns
        for header in self.headers:
            header_lower = header.lower()
            
            # Country column
            if any(term in header_lower for term in ['zeme', 'country', 'geo']):
                country_column = header
                logger.info(f"Identified country column: {header}")
            
            # URL column  
            if any(term in header_lower for term in ['url', 'link']):
                url_column = header
                logger.info(f"Identified URL column: {header}")
            
            # Order column
            if any(term in header_lower for term in ['poradi', 'order', 'position']):
                order_column = header
                logger.info(f"Identified order column: {header}")
        
        # If not found automatically, try manual detection
        if not country_column:
            # Look for columns with country-like data
            for header in self.headers:
                sample_values = [row[header] for row in self.data[:10] if row[header]]
                if sample_values and all(len(val) <= 5 for val in sample_values):
                    country_column = header
                    logger.info(f"Guessed country column: {header}")
                    break
        
        if not url_column:
            # Look for columns with URL-like data
            for header in self.headers:
                sample_values = [row[header] for row in self.data[:10] if row[header]]
                if sample_values and any('http' in val or '.' in val for val in sample_values):
                    url_column = header
                    logger.info(f"Guessed URL column: {header}")
                    break
        
        if not order_column:
            # Look for columns with numeric data
            for header in self.headers:
                sample_values = [row[header] for row in self.data[:10] if row[header]]
                try:
                    if sample_values and all(val.isdigit() for val in sample_values):
                        order_column = header
                        logger.info(f"Guessed order column: {header}")
                        break
                except:
                    continue
        
        return country_column, url_column, order_column
    
    def analyze_data(self):
        """Main analysis function"""
        logger.info("Starting data analysis...")
        
        country_col, url_col, order_col = self.identify_columns()
        
        if not all([country_col, url_col, order_col]):
            logger.error(f"Could not identify all required columns:")
            logger.error(f"  Country: {country_col}")
            logger.error(f"  URL: {url_col}")
            logger.error(f"  Order: {order_col}")
            return False
        
        # Find rows with moy.auraodin.com BUT NOT bid=1650
        moy_rows = []
        for row in self.data:
            url_value = str(row.get(url_col, ''))
            if 'moy.auraodin.com' in url_value and 'bid=1650' not in url_value:
                moy_rows.append(row)
        
        logger.info(f"Found {len(moy_rows)} rows with 'moy.auraodin.com' AND NOT 'bid=1650'")
        
        if not moy_rows:
            logger.warning("No rows with 'moy.auraodin.com' AND NOT 'bid=1650' found!")
            return False
        
        # Group data by country
        countries_data = defaultdict(list)
        for row in self.data:
            country = row.get(country_col, '').strip()
            if country:
                countries_data[country].append(row)
        
        logger.info(f"Found {len(countries_data)} unique countries")
        
        # Process each country
        for country, rows in countries_data.items():
            # Sort rows by order
            try:
                sorted_rows = sorted(rows, key=lambda x: int(x.get(order_col, 0) or 0))
            except ValueError:
                # If order column is not numeric, sort alphabetically
                sorted_rows = sorted(rows, key=lambda x: str(x.get(order_col, '')))
            
            # Find ALL positions of moy.auraodin.com-NOT-bid=1650 rows
            moy_positions = []
            
            for i, row in enumerate(sorted_rows, 1):
                url_value = str(row.get(url_col, ''))
                if 'moy.auraodin.com' in url_value and 'bid=1650' not in url_value:
                    moy_positions.append({
                        'position': i,
                        'url': row.get(url_col, ''),
                        'original_order': row.get(order_col, '')
                    })
            
            # Take only the HIGHEST position (lowest number) for each country
            if moy_positions:
                # Sort by position to get the highest (lowest number) first
                best_position = min(moy_positions, key=lambda x: x['position'])
                
                result = {
                    'country': country,
                    'position': best_position['position'],
                    'total_rows': len(sorted_rows),
                    'moy_url': best_position['url'],
                    'original_order': best_position['original_order']
                }
                
                self.results.append(result)
                logger.info(f"Country {country}: BEST moy.auraodin.com-NOT-bid=1650 at position {best_position['position']}/{len(sorted_rows)} (order: {best_position['original_order']}) - found {len(moy_positions)} total matches")
        
        logger.info(f"Analysis complete. Found positions for {len(self.results)} countries")
        return True
    
    def save_results(self):
        """Save results to CSV for further processing"""
        results_file = 'analysis_results.csv'
        
        try:
            with open(results_file, 'w', newline='', encoding='utf-8') as file:
                fieldnames = ['country', 'position', 'total_rows', 'moy_url', 'original_order']
                writer = csv.DictWriter(file, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(self.results)
            
            logger.info(f"Results saved to {results_file}")
            return results_file
            
        except Exception as e:
            logger.error(f"Error saving results: {e}")
            return None
    
    def print_summary(self):
        """Print analysis summary"""
        print("\n" + "="*60)
        print("📊 ANALYSIS SUMMARY")
        print("="*60)
        
        print(f"Total rows processed: {len(self.data)}")
        print(f"Countries with moy.auraodin.com: {len(self.results)}")
        
        print(f"\n📋 Results by country:")
        print("-" * 40)
        
        for result in sorted(self.results, key=lambda x: x['country']):
            print(f"{result['country']:8} | Position: {result['position']:2}/{result['total_rows']:2} | Order: {result['original_order']}")
        
        print("\n✅ Analysis completed successfully!")

def main():
    if len(sys.argv) != 2:
        print("Usage: python3 analyzer.py <csv_file_path>")
        sys.exit(1)
    
    csv_file = sys.argv[1]
    
    if not os.path.exists(csv_file):
        print(f"Error: File {csv_file} not found")
        sys.exit(1)
    
    analyzer = GeoUrlAnalyzer(csv_file)
    
    # Load and analyze data
    if not analyzer.load_csv():
        print("Error: Could not load CSV file")
        sys.exit(1)
    
    if not analyzer.analyze_data():
        print("Error: Analysis failed")
        sys.exit(1)
    
    # Save and display results
    results_file = analyzer.save_results()
    analyzer.print_summary()
    
    if results_file:
        print(f"\n💾 Results saved to: {results_file}")
        print("🌐 Ready for Google Sheets upload!")

if __name__ == "__main__":
    main()