All checks were successful
Deploy / deploy (push) Successful in 21s
OpenFlights dataset predates BER's 2020 opening. The patch already existed in api_server.py for the search UI, but scan_processor.py uses airports.py directly, so Germany scans silently skipped BER. Added _MISSING_AIRPORTS to airports.py, patched both get_airports_for_country() and _all_airports_by_iata() to inject the extras, making BER available to scans and lookups. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
273 lines
8.0 KiB
Python
273 lines
8.0 KiB
Python
"""
|
|
Airport data resolution by country.
|
|
|
|
Handles loading and filtering airport data from OpenFlights dataset.
|
|
"""
|
|
|
|
import json
|
|
import csv
|
|
from functools import lru_cache
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
import urllib.request
|
|
|
|
# Try to import pycountry, fall back to manual mapping if not available
|
|
try:
|
|
import pycountry
|
|
HAS_PYCOUNTRY = True
|
|
except ImportError:
|
|
HAS_PYCOUNTRY = False
|
|
|
|
|
|
AIRPORTS_JSON_PATH = Path(__file__).parent / "data" / "airports_by_country.json"
|
|
OPENFLIGHTS_URL = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat"
|
|
|
|
# Manual mapping for common countries (fallback if pycountry not available)
|
|
COUNTRY_NAME_TO_ISO = {
|
|
"Germany": "DE",
|
|
"United States": "US",
|
|
"United Kingdom": "GB",
|
|
"France": "FR",
|
|
"Spain": "ES",
|
|
"Italy": "IT",
|
|
"Netherlands": "NL",
|
|
"Belgium": "BE",
|
|
"Austria": "AT",
|
|
"Switzerland": "CH",
|
|
"Poland": "PL",
|
|
"Czech Republic": "CZ",
|
|
"Denmark": "DK",
|
|
"Sweden": "SE",
|
|
"Norway": "NO",
|
|
"Finland": "FI",
|
|
"Ireland": "IE",
|
|
"Portugal": "PT",
|
|
"Greece": "GR",
|
|
"Turkey": "TR",
|
|
"Japan": "JP",
|
|
"China": "CN",
|
|
"South Korea": "KR",
|
|
"India": "IN",
|
|
"Australia": "AU",
|
|
"New Zealand": "NZ",
|
|
"Canada": "CA",
|
|
"Mexico": "MX",
|
|
"Brazil": "BR",
|
|
"Argentina": "AR",
|
|
"Chile": "CL",
|
|
"Colombia": "CO",
|
|
"Peru": "PE",
|
|
"South Africa": "ZA",
|
|
"Egypt": "EG",
|
|
"United Arab Emirates": "AE",
|
|
"Thailand": "TH",
|
|
"Singapore": "SG",
|
|
"Malaysia": "MY",
|
|
"Indonesia": "ID",
|
|
"Philippines": "PH",
|
|
"Vietnam": "VN",
|
|
}
|
|
|
|
|
|
# Airports missing from the OpenFlights dataset (opened or renamed after dataset was last updated).
|
|
# Keyed by ISO country code; dicts match the airports_by_country.json schema (iata/name/city/icao).
|
|
_MISSING_AIRPORTS: dict[str, list[dict]] = {
|
|
'DE': [{'iata': 'BER', 'name': 'Berlin Brandenburg Airport', 'city': 'Berlin', 'icao': 'EDDB'}],
|
|
}
|
|
|
|
|
|
def country_name_to_iso_code(country_name: str) -> Optional[str]:
|
|
"""
|
|
Convert country name to ISO 2-letter code.
|
|
|
|
Args:
|
|
country_name: Full country name
|
|
|
|
Returns:
|
|
ISO 2-letter code or None if not found
|
|
"""
|
|
if HAS_PYCOUNTRY:
|
|
try:
|
|
country = pycountry.countries.search_fuzzy(country_name)[0]
|
|
return country.alpha_2
|
|
except (LookupError, AttributeError):
|
|
pass
|
|
|
|
# Fallback to manual mapping
|
|
return COUNTRY_NAME_TO_ISO.get(country_name)
|
|
|
|
|
|
def download_and_build_airport_data(force_rebuild: bool = False) -> None:
|
|
"""
|
|
Download OpenFlights dataset and build airports_by_country.json.
|
|
|
|
Filters to airports with valid IATA codes only.
|
|
Groups by ISO 2-letter country code.
|
|
|
|
Args:
|
|
force_rebuild: If True, rebuild even if file exists
|
|
"""
|
|
if AIRPORTS_JSON_PATH.exists() and not force_rebuild:
|
|
return
|
|
|
|
print(f"Downloading OpenFlights airport data from {OPENFLIGHTS_URL}...")
|
|
|
|
# Download the data
|
|
response = urllib.request.urlopen(OPENFLIGHTS_URL)
|
|
data = response.read().decode('utf-8')
|
|
|
|
# Parse CSV
|
|
# Format: AirportID,Name,City,Country,IATA,ICAO,Lat,Lon,Alt,Timezone,DST,Tz,Type,Source
|
|
airports_by_country = {}
|
|
|
|
for line in data.strip().split('\n'):
|
|
# Use csv reader to handle quoted fields properly
|
|
row = next(csv.reader([line]))
|
|
|
|
if len(row) < 5:
|
|
continue
|
|
|
|
airport_id = row[0]
|
|
name = row[1]
|
|
city = row[2]
|
|
country_name = row[3]
|
|
iata = row[4]
|
|
icao = row[5] if len(row) > 5 else ""
|
|
|
|
# Skip if no valid IATA code
|
|
if not iata or iata == "\\N" or len(iata) != 3:
|
|
continue
|
|
|
|
# Skip if country name is missing
|
|
if not country_name or country_name == "\\N":
|
|
continue
|
|
|
|
# Convert country name to ISO code
|
|
country_code = country_name_to_iso_code(country_name)
|
|
if not country_code:
|
|
# Skip if we can't map the country
|
|
continue
|
|
|
|
# Build airport entry
|
|
airport = {
|
|
"iata": iata,
|
|
"name": name,
|
|
"city": city,
|
|
"icao": icao if icao != "\\N" else ""
|
|
}
|
|
|
|
# Group by country ISO code
|
|
if country_code not in airports_by_country:
|
|
airports_by_country[country_code] = []
|
|
|
|
airports_by_country[country_code].append(airport)
|
|
|
|
# Ensure data directory exists
|
|
AIRPORTS_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Write to JSON file
|
|
with open(AIRPORTS_JSON_PATH, 'w', encoding='utf-8') as f:
|
|
json.dump(airports_by_country, f, indent=2, ensure_ascii=False)
|
|
|
|
total_airports = sum(len(v) for v in airports_by_country.values())
|
|
print(f"✓ Built airport data: {len(airports_by_country)} countries, {total_airports} airports")
|
|
|
|
|
|
def get_airports_for_country(country_code: str) -> list[dict]:
|
|
"""
|
|
Get list of airports for a given country code.
|
|
|
|
Args:
|
|
country_code: ISO 2-letter country code (e.g., "DE", "US")
|
|
|
|
Returns:
|
|
List of airport dicts with keys: iata, name, city, icao
|
|
|
|
Raises:
|
|
FileNotFoundError: If airports data file doesn't exist
|
|
ValueError: If country code not found
|
|
"""
|
|
# Ensure data file exists
|
|
if not AIRPORTS_JSON_PATH.exists():
|
|
download_and_build_airport_data()
|
|
|
|
# Load from JSON
|
|
with open(AIRPORTS_JSON_PATH, 'r', encoding='utf-8') as f:
|
|
airports_by_country = json.load(f)
|
|
|
|
country_code = country_code.upper()
|
|
|
|
if country_code not in airports_by_country:
|
|
available = sorted(airports_by_country.keys())[:10]
|
|
raise ValueError(
|
|
f"Country code '{country_code}' not found. "
|
|
f"Available codes (sample): {', '.join(available)}..."
|
|
)
|
|
|
|
result = list(airports_by_country[country_code])
|
|
existing_iatas = {a['iata'] for a in result}
|
|
for extra in _MISSING_AIRPORTS.get(country_code, []):
|
|
if extra['iata'] not in existing_iatas:
|
|
result.append(extra)
|
|
return result
|
|
|
|
|
|
def resolve_airport_list(country: Optional[str], from_airports: Optional[str]) -> list[dict]:
|
|
"""
|
|
Resolve the final list of origin airports to scan.
|
|
|
|
Args:
|
|
country: ISO 2-letter country code (if --from not provided)
|
|
from_airports: Comma-separated IATA codes (overrides country)
|
|
|
|
Returns:
|
|
List of airport dicts with keys: iata, name, city
|
|
|
|
Raises:
|
|
ValueError: If neither country nor from_airports provided, or if invalid
|
|
"""
|
|
if from_airports:
|
|
# Parse custom airport list
|
|
iata_codes = [code.strip().upper() for code in from_airports.split(',')]
|
|
# Create minimal airport dicts
|
|
return [{"iata": code, "name": code, "city": ""} for code in iata_codes]
|
|
|
|
if country:
|
|
return get_airports_for_country(country)
|
|
|
|
raise ValueError("Either --country or --from must be provided")
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def _all_airports_by_iata() -> dict:
|
|
"""Return {iata: airport_dict} for every airport. Cached after first load."""
|
|
if not AIRPORTS_JSON_PATH.exists():
|
|
download_and_build_airport_data()
|
|
with open(AIRPORTS_JSON_PATH, 'r', encoding='utf-8') as f:
|
|
airports_by_country = json.load(f)
|
|
result = {
|
|
a['iata']: a
|
|
for airports in airports_by_country.values()
|
|
for a in airports
|
|
}
|
|
for extras in _MISSING_AIRPORTS.values():
|
|
for extra in extras:
|
|
if extra['iata'] not in result:
|
|
result[extra['iata']] = extra
|
|
return result
|
|
|
|
|
|
def lookup_airport(iata: str) -> dict | None:
|
|
"""Look up a single airport by IATA code. Returns None if not found."""
|
|
return _all_airports_by_iata().get(iata.upper())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Build the dataset if run directly
|
|
download_and_build_airport_data(force_rebuild=True)
|
|
print("\nSample data for Germany (DE):")
|
|
de_airports = get_airports_for_country("DE")
|
|
for airport in de_airports[:5]:
|
|
print(f" {airport['iata']} - {airport['name']} ({airport['city']})")
|
|
print(f" ... and {len(de_airports) - 5} more")
|