Files
ciaovolo/flight-comparator/airports.py
domverse 6421f83ca7 Add flight comparator web app with full scan pipeline
Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass):

Backend (FastAPI + SQLite):
- REST API with rate limiting, Pydantic v2 validation, paginated responses
- Scan pipeline: resolves airports, queries every day in the window, saves
  individual flights + aggregate route stats to SQLite
- Background async scan processor with real-time progress tracking
- Airport search endpoint backed by OpenFlights dataset
- Daily scan window (all dates, not monthly samples)

Frontend (React 19 + TypeScript + Tailwind CSS v4):
- Dashboard with live scan status and recent scans
- Create scan form: country mode or specific airports (searchable dropdown)
- Scan detail page with expandable route rows showing individual flights
  (date, airline, departure, arrival, price) loaded on demand
- AirportSearch component with debounced live search and multi-select

Database:
- scans → routes → flights schema with FK cascade and auto-update triggers
- Migrations for schema evolution (relaxed country constraint)

Tests:
- 74 tests: unit + integration, isolated per-test SQLite DB
- Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights,
  BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026)
- Integration tests parametrized from confirmed routes

Docker:
- Multi-stage builds, Compose orchestration, Nginx reverse proxy

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-26 17:11:51 +01:00

236 lines
6.6 KiB
Python

"""
Airport data resolution by country.
Handles loading and filtering airport data from OpenFlights dataset.
"""
import json
import csv
from pathlib import Path
from typing import Optional
import urllib.request
# Try to import pycountry, fall back to manual mapping if not available
try:
import pycountry
HAS_PYCOUNTRY = True
except ImportError:
HAS_PYCOUNTRY = False
AIRPORTS_JSON_PATH = Path(__file__).parent / "data" / "airports_by_country.json"
OPENFLIGHTS_URL = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat"
# Manual mapping for common countries (fallback if pycountry not available)
COUNTRY_NAME_TO_ISO = {
"Germany": "DE",
"United States": "US",
"United Kingdom": "GB",
"France": "FR",
"Spain": "ES",
"Italy": "IT",
"Netherlands": "NL",
"Belgium": "BE",
"Austria": "AT",
"Switzerland": "CH",
"Poland": "PL",
"Czech Republic": "CZ",
"Denmark": "DK",
"Sweden": "SE",
"Norway": "NO",
"Finland": "FI",
"Ireland": "IE",
"Portugal": "PT",
"Greece": "GR",
"Turkey": "TR",
"Japan": "JP",
"China": "CN",
"South Korea": "KR",
"India": "IN",
"Australia": "AU",
"New Zealand": "NZ",
"Canada": "CA",
"Mexico": "MX",
"Brazil": "BR",
"Argentina": "AR",
"Chile": "CL",
"Colombia": "CO",
"Peru": "PE",
"South Africa": "ZA",
"Egypt": "EG",
"United Arab Emirates": "AE",
"Thailand": "TH",
"Singapore": "SG",
"Malaysia": "MY",
"Indonesia": "ID",
"Philippines": "PH",
"Vietnam": "VN",
}
def country_name_to_iso_code(country_name: str) -> Optional[str]:
"""
Convert country name to ISO 2-letter code.
Args:
country_name: Full country name
Returns:
ISO 2-letter code or None if not found
"""
if HAS_PYCOUNTRY:
try:
country = pycountry.countries.search_fuzzy(country_name)[0]
return country.alpha_2
except (LookupError, AttributeError):
pass
# Fallback to manual mapping
return COUNTRY_NAME_TO_ISO.get(country_name)
def download_and_build_airport_data(force_rebuild: bool = False) -> None:
"""
Download OpenFlights dataset and build airports_by_country.json.
Filters to airports with valid IATA codes only.
Groups by ISO 2-letter country code.
Args:
force_rebuild: If True, rebuild even if file exists
"""
if AIRPORTS_JSON_PATH.exists() and not force_rebuild:
return
print(f"Downloading OpenFlights airport data from {OPENFLIGHTS_URL}...")
# Download the data
response = urllib.request.urlopen(OPENFLIGHTS_URL)
data = response.read().decode('utf-8')
# Parse CSV
# Format: AirportID,Name,City,Country,IATA,ICAO,Lat,Lon,Alt,Timezone,DST,Tz,Type,Source
airports_by_country = {}
for line in data.strip().split('\n'):
# Use csv reader to handle quoted fields properly
row = next(csv.reader([line]))
if len(row) < 5:
continue
airport_id = row[0]
name = row[1]
city = row[2]
country_name = row[3]
iata = row[4]
icao = row[5] if len(row) > 5 else ""
# Skip if no valid IATA code
if not iata or iata == "\\N" or len(iata) != 3:
continue
# Skip if country name is missing
if not country_name or country_name == "\\N":
continue
# Convert country name to ISO code
country_code = country_name_to_iso_code(country_name)
if not country_code:
# Skip if we can't map the country
continue
# Build airport entry
airport = {
"iata": iata,
"name": name,
"city": city,
"icao": icao if icao != "\\N" else ""
}
# Group by country ISO code
if country_code not in airports_by_country:
airports_by_country[country_code] = []
airports_by_country[country_code].append(airport)
# Ensure data directory exists
AIRPORTS_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
# Write to JSON file
with open(AIRPORTS_JSON_PATH, 'w', encoding='utf-8') as f:
json.dump(airports_by_country, f, indent=2, ensure_ascii=False)
total_airports = sum(len(v) for v in airports_by_country.values())
print(f"✓ Built airport data: {len(airports_by_country)} countries, {total_airports} airports")
def get_airports_for_country(country_code: str) -> list[dict]:
"""
Get list of airports for a given country code.
Args:
country_code: ISO 2-letter country code (e.g., "DE", "US")
Returns:
List of airport dicts with keys: iata, name, city, icao
Raises:
FileNotFoundError: If airports data file doesn't exist
ValueError: If country code not found
"""
# Ensure data file exists
if not AIRPORTS_JSON_PATH.exists():
download_and_build_airport_data()
# Load from JSON
with open(AIRPORTS_JSON_PATH, 'r', encoding='utf-8') as f:
airports_by_country = json.load(f)
country_code = country_code.upper()
if country_code not in airports_by_country:
available = sorted(airports_by_country.keys())[:10]
raise ValueError(
f"Country code '{country_code}' not found. "
f"Available codes (sample): {', '.join(available)}..."
)
return airports_by_country[country_code]
def resolve_airport_list(country: Optional[str], from_airports: Optional[str]) -> list[dict]:
"""
Resolve the final list of origin airports to scan.
Args:
country: ISO 2-letter country code (if --from not provided)
from_airports: Comma-separated IATA codes (overrides country)
Returns:
List of airport dicts with keys: iata, name, city
Raises:
ValueError: If neither country nor from_airports provided, or if invalid
"""
if from_airports:
# Parse custom airport list
iata_codes = [code.strip().upper() for code in from_airports.split(',')]
# Create minimal airport dicts
return [{"iata": code, "name": code, "city": ""} for code in iata_codes]
if country:
return get_airports_for_country(country)
raise ValueError("Either --country or --from must be provided")
if __name__ == "__main__":
# Build the dataset if run directly
download_and_build_airport_data(force_rebuild=True)
print("\nSample data for Germany (DE):")
de_airports = get_airports_for_country("DE")
for airport in de_airports[:5]:
print(f" {airport['iata']} - {airport['name']} ({airport['city']})")
print(f" ... and {len(de_airports) - 5} more")