Files
ciaovolo/flight-comparator/airports.py
domverse ef5a27097d fix: enrich route destination names from airport DB when not stored
Specific-airports mode scans never resolved full airport names — they
stored the IATA code as destination_name. Fixed in two places:

- airports.py: add lookup_airport(iata) cached helper
- api_server.py: enrich destination_name/city on the fly in the routes
  endpoint when the stored value equals the IATA code (fixes all past scans)
- scan_processor.py: resolve airport names at scan time in specific-airports
  mode using lookup_airport (fixes future scans at the DB level)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-27 21:04:46 +01:00

256 lines
7.2 KiB
Python

"""
Airport data resolution by country.
Handles loading and filtering airport data from OpenFlights dataset.
"""
import json
import csv
from functools import lru_cache
from pathlib import Path
from typing import Optional
import urllib.request
# Try to import pycountry, fall back to manual mapping if not available
try:
import pycountry
HAS_PYCOUNTRY = True
except ImportError:
HAS_PYCOUNTRY = False
AIRPORTS_JSON_PATH = Path(__file__).parent / "data" / "airports_by_country.json"
OPENFLIGHTS_URL = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat"
# Manual mapping for common countries (fallback if pycountry not available)
COUNTRY_NAME_TO_ISO = {
"Germany": "DE",
"United States": "US",
"United Kingdom": "GB",
"France": "FR",
"Spain": "ES",
"Italy": "IT",
"Netherlands": "NL",
"Belgium": "BE",
"Austria": "AT",
"Switzerland": "CH",
"Poland": "PL",
"Czech Republic": "CZ",
"Denmark": "DK",
"Sweden": "SE",
"Norway": "NO",
"Finland": "FI",
"Ireland": "IE",
"Portugal": "PT",
"Greece": "GR",
"Turkey": "TR",
"Japan": "JP",
"China": "CN",
"South Korea": "KR",
"India": "IN",
"Australia": "AU",
"New Zealand": "NZ",
"Canada": "CA",
"Mexico": "MX",
"Brazil": "BR",
"Argentina": "AR",
"Chile": "CL",
"Colombia": "CO",
"Peru": "PE",
"South Africa": "ZA",
"Egypt": "EG",
"United Arab Emirates": "AE",
"Thailand": "TH",
"Singapore": "SG",
"Malaysia": "MY",
"Indonesia": "ID",
"Philippines": "PH",
"Vietnam": "VN",
}
def country_name_to_iso_code(country_name: str) -> Optional[str]:
"""
Convert country name to ISO 2-letter code.
Args:
country_name: Full country name
Returns:
ISO 2-letter code or None if not found
"""
if HAS_PYCOUNTRY:
try:
country = pycountry.countries.search_fuzzy(country_name)[0]
return country.alpha_2
except (LookupError, AttributeError):
pass
# Fallback to manual mapping
return COUNTRY_NAME_TO_ISO.get(country_name)
def download_and_build_airport_data(force_rebuild: bool = False) -> None:
"""
Download OpenFlights dataset and build airports_by_country.json.
Filters to airports with valid IATA codes only.
Groups by ISO 2-letter country code.
Args:
force_rebuild: If True, rebuild even if file exists
"""
if AIRPORTS_JSON_PATH.exists() and not force_rebuild:
return
print(f"Downloading OpenFlights airport data from {OPENFLIGHTS_URL}...")
# Download the data
response = urllib.request.urlopen(OPENFLIGHTS_URL)
data = response.read().decode('utf-8')
# Parse CSV
# Format: AirportID,Name,City,Country,IATA,ICAO,Lat,Lon,Alt,Timezone,DST,Tz,Type,Source
airports_by_country = {}
for line in data.strip().split('\n'):
# Use csv reader to handle quoted fields properly
row = next(csv.reader([line]))
if len(row) < 5:
continue
airport_id = row[0]
name = row[1]
city = row[2]
country_name = row[3]
iata = row[4]
icao = row[5] if len(row) > 5 else ""
# Skip if no valid IATA code
if not iata or iata == "\\N" or len(iata) != 3:
continue
# Skip if country name is missing
if not country_name or country_name == "\\N":
continue
# Convert country name to ISO code
country_code = country_name_to_iso_code(country_name)
if not country_code:
# Skip if we can't map the country
continue
# Build airport entry
airport = {
"iata": iata,
"name": name,
"city": city,
"icao": icao if icao != "\\N" else ""
}
# Group by country ISO code
if country_code not in airports_by_country:
airports_by_country[country_code] = []
airports_by_country[country_code].append(airport)
# Ensure data directory exists
AIRPORTS_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
# Write to JSON file
with open(AIRPORTS_JSON_PATH, 'w', encoding='utf-8') as f:
json.dump(airports_by_country, f, indent=2, ensure_ascii=False)
total_airports = sum(len(v) for v in airports_by_country.values())
print(f"✓ Built airport data: {len(airports_by_country)} countries, {total_airports} airports")
def get_airports_for_country(country_code: str) -> list[dict]:
"""
Get list of airports for a given country code.
Args:
country_code: ISO 2-letter country code (e.g., "DE", "US")
Returns:
List of airport dicts with keys: iata, name, city, icao
Raises:
FileNotFoundError: If airports data file doesn't exist
ValueError: If country code not found
"""
# Ensure data file exists
if not AIRPORTS_JSON_PATH.exists():
download_and_build_airport_data()
# Load from JSON
with open(AIRPORTS_JSON_PATH, 'r', encoding='utf-8') as f:
airports_by_country = json.load(f)
country_code = country_code.upper()
if country_code not in airports_by_country:
available = sorted(airports_by_country.keys())[:10]
raise ValueError(
f"Country code '{country_code}' not found. "
f"Available codes (sample): {', '.join(available)}..."
)
return airports_by_country[country_code]
def resolve_airport_list(country: Optional[str], from_airports: Optional[str]) -> list[dict]:
"""
Resolve the final list of origin airports to scan.
Args:
country: ISO 2-letter country code (if --from not provided)
from_airports: Comma-separated IATA codes (overrides country)
Returns:
List of airport dicts with keys: iata, name, city
Raises:
ValueError: If neither country nor from_airports provided, or if invalid
"""
if from_airports:
# Parse custom airport list
iata_codes = [code.strip().upper() for code in from_airports.split(',')]
# Create minimal airport dicts
return [{"iata": code, "name": code, "city": ""} for code in iata_codes]
if country:
return get_airports_for_country(country)
raise ValueError("Either --country or --from must be provided")
@lru_cache(maxsize=1)
def _all_airports_by_iata() -> dict:
"""Return {iata: airport_dict} for every airport. Cached after first load."""
if not AIRPORTS_JSON_PATH.exists():
download_and_build_airport_data()
with open(AIRPORTS_JSON_PATH, 'r', encoding='utf-8') as f:
airports_by_country = json.load(f)
return {
a['iata']: a
for airports in airports_by_country.values()
for a in airports
}
def lookup_airport(iata: str) -> dict | None:
"""Look up a single airport by IATA code. Returns None if not found."""
return _all_airports_by_iata().get(iata.upper())
if __name__ == "__main__":
# Build the dataset if run directly
download_and_build_airport_data(force_rebuild=True)
print("\nSample data for Germany (DE):")
de_airports = get_airports_for_country("DE")
for airport in de_airports[:5]:
print(f" {airport['iata']} - {airport['name']} ({airport['city']})")
print(f" ... and {len(de_airports) - 5} more")