""" Airport data resolution by country. Handles loading and filtering airport data from OpenFlights dataset. """ import json import csv from pathlib import Path from typing import Optional import urllib.request # Try to import pycountry, fall back to manual mapping if not available try: import pycountry HAS_PYCOUNTRY = True except ImportError: HAS_PYCOUNTRY = False AIRPORTS_JSON_PATH = Path(__file__).parent / "data" / "airports_by_country.json" OPENFLIGHTS_URL = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat" # Manual mapping for common countries (fallback if pycountry not available) COUNTRY_NAME_TO_ISO = { "Germany": "DE", "United States": "US", "United Kingdom": "GB", "France": "FR", "Spain": "ES", "Italy": "IT", "Netherlands": "NL", "Belgium": "BE", "Austria": "AT", "Switzerland": "CH", "Poland": "PL", "Czech Republic": "CZ", "Denmark": "DK", "Sweden": "SE", "Norway": "NO", "Finland": "FI", "Ireland": "IE", "Portugal": "PT", "Greece": "GR", "Turkey": "TR", "Japan": "JP", "China": "CN", "South Korea": "KR", "India": "IN", "Australia": "AU", "New Zealand": "NZ", "Canada": "CA", "Mexico": "MX", "Brazil": "BR", "Argentina": "AR", "Chile": "CL", "Colombia": "CO", "Peru": "PE", "South Africa": "ZA", "Egypt": "EG", "United Arab Emirates": "AE", "Thailand": "TH", "Singapore": "SG", "Malaysia": "MY", "Indonesia": "ID", "Philippines": "PH", "Vietnam": "VN", } def country_name_to_iso_code(country_name: str) -> Optional[str]: """ Convert country name to ISO 2-letter code. Args: country_name: Full country name Returns: ISO 2-letter code or None if not found """ if HAS_PYCOUNTRY: try: country = pycountry.countries.search_fuzzy(country_name)[0] return country.alpha_2 except (LookupError, AttributeError): pass # Fallback to manual mapping return COUNTRY_NAME_TO_ISO.get(country_name) def download_and_build_airport_data(force_rebuild: bool = False) -> None: """ Download OpenFlights dataset and build airports_by_country.json. Filters to airports with valid IATA codes only. Groups by ISO 2-letter country code. Args: force_rebuild: If True, rebuild even if file exists """ if AIRPORTS_JSON_PATH.exists() and not force_rebuild: return print(f"Downloading OpenFlights airport data from {OPENFLIGHTS_URL}...") # Download the data response = urllib.request.urlopen(OPENFLIGHTS_URL) data = response.read().decode('utf-8') # Parse CSV # Format: AirportID,Name,City,Country,IATA,ICAO,Lat,Lon,Alt,Timezone,DST,Tz,Type,Source airports_by_country = {} for line in data.strip().split('\n'): # Use csv reader to handle quoted fields properly row = next(csv.reader([line])) if len(row) < 5: continue airport_id = row[0] name = row[1] city = row[2] country_name = row[3] iata = row[4] icao = row[5] if len(row) > 5 else "" # Skip if no valid IATA code if not iata or iata == "\\N" or len(iata) != 3: continue # Skip if country name is missing if not country_name or country_name == "\\N": continue # Convert country name to ISO code country_code = country_name_to_iso_code(country_name) if not country_code: # Skip if we can't map the country continue # Build airport entry airport = { "iata": iata, "name": name, "city": city, "icao": icao if icao != "\\N" else "" } # Group by country ISO code if country_code not in airports_by_country: airports_by_country[country_code] = [] airports_by_country[country_code].append(airport) # Ensure data directory exists AIRPORTS_JSON_PATH.parent.mkdir(parents=True, exist_ok=True) # Write to JSON file with open(AIRPORTS_JSON_PATH, 'w', encoding='utf-8') as f: json.dump(airports_by_country, f, indent=2, ensure_ascii=False) total_airports = sum(len(v) for v in airports_by_country.values()) print(f"✓ Built airport data: {len(airports_by_country)} countries, {total_airports} airports") def get_airports_for_country(country_code: str) -> list[dict]: """ Get list of airports for a given country code. Args: country_code: ISO 2-letter country code (e.g., "DE", "US") Returns: List of airport dicts with keys: iata, name, city, icao Raises: FileNotFoundError: If airports data file doesn't exist ValueError: If country code not found """ # Ensure data file exists if not AIRPORTS_JSON_PATH.exists(): download_and_build_airport_data() # Load from JSON with open(AIRPORTS_JSON_PATH, 'r', encoding='utf-8') as f: airports_by_country = json.load(f) country_code = country_code.upper() if country_code not in airports_by_country: available = sorted(airports_by_country.keys())[:10] raise ValueError( f"Country code '{country_code}' not found. " f"Available codes (sample): {', '.join(available)}..." ) return airports_by_country[country_code] def resolve_airport_list(country: Optional[str], from_airports: Optional[str]) -> list[dict]: """ Resolve the final list of origin airports to scan. Args: country: ISO 2-letter country code (if --from not provided) from_airports: Comma-separated IATA codes (overrides country) Returns: List of airport dicts with keys: iata, name, city Raises: ValueError: If neither country nor from_airports provided, or if invalid """ if from_airports: # Parse custom airport list iata_codes = [code.strip().upper() for code in from_airports.split(',')] # Create minimal airport dicts return [{"iata": code, "name": code, "city": ""} for code in iata_codes] if country: return get_airports_for_country(country) raise ValueError("Either --country or --from must be provided") if __name__ == "__main__": # Build the dataset if run directly download_and_build_airport_data(force_rebuild=True) print("\nSample data for Germany (DE):") de_airports = get_airports_for_country("DE") for airport in de_airports[:5]: print(f" {airport['iata']} - {airport['name']} ({airport['city']})") print(f" ... and {len(de_airports) - 5} more")