Add flight comparator web app with full scan pipeline

Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass):

Backend (FastAPI + SQLite):
- REST API with rate limiting, Pydantic v2 validation, paginated responses
- Scan pipeline: resolves airports, queries every day in the window, saves
  individual flights + aggregate route stats to SQLite
- Background async scan processor with real-time progress tracking
- Airport search endpoint backed by OpenFlights dataset
- Daily scan window (all dates, not monthly samples)

Frontend (React 19 + TypeScript + Tailwind CSS v4):
- Dashboard with live scan status and recent scans
- Create scan form: country mode or specific airports (searchable dropdown)
- Scan detail page with expandable route rows showing individual flights
  (date, airline, departure, arrival, price) loaded on demand
- AirportSearch component with debounced live search and multi-select

Database:
- scans → routes → flights schema with FK cascade and auto-update triggers
- Migrations for schema evolution (relaxed country constraint)

Tests:
- 74 tests: unit + integration, isolated per-test SQLite DB
- Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights,
  BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026)
- Integration tests parametrized from confirmed routes

Docker:
- Multi-stage builds, Compose orchestration, Nginx reverse proxy

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 17:11:51 +01:00
parent aea7590874
commit 6421f83ca7
67 changed files with 37173 additions and 0 deletions

View File

@@ -0,0 +1,235 @@
"""
Airport data resolution by country.
Handles loading and filtering airport data from OpenFlights dataset.
"""
import json
import csv
from pathlib import Path
from typing import Optional
import urllib.request
# Try to import pycountry, fall back to manual mapping if not available
try:
import pycountry
HAS_PYCOUNTRY = True
except ImportError:
HAS_PYCOUNTRY = False
AIRPORTS_JSON_PATH = Path(__file__).parent / "data" / "airports_by_country.json"
OPENFLIGHTS_URL = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat"
# Manual mapping for common countries (fallback if pycountry not available)
COUNTRY_NAME_TO_ISO = {
"Germany": "DE",
"United States": "US",
"United Kingdom": "GB",
"France": "FR",
"Spain": "ES",
"Italy": "IT",
"Netherlands": "NL",
"Belgium": "BE",
"Austria": "AT",
"Switzerland": "CH",
"Poland": "PL",
"Czech Republic": "CZ",
"Denmark": "DK",
"Sweden": "SE",
"Norway": "NO",
"Finland": "FI",
"Ireland": "IE",
"Portugal": "PT",
"Greece": "GR",
"Turkey": "TR",
"Japan": "JP",
"China": "CN",
"South Korea": "KR",
"India": "IN",
"Australia": "AU",
"New Zealand": "NZ",
"Canada": "CA",
"Mexico": "MX",
"Brazil": "BR",
"Argentina": "AR",
"Chile": "CL",
"Colombia": "CO",
"Peru": "PE",
"South Africa": "ZA",
"Egypt": "EG",
"United Arab Emirates": "AE",
"Thailand": "TH",
"Singapore": "SG",
"Malaysia": "MY",
"Indonesia": "ID",
"Philippines": "PH",
"Vietnam": "VN",
}
def country_name_to_iso_code(country_name: str) -> Optional[str]:
"""
Convert country name to ISO 2-letter code.
Args:
country_name: Full country name
Returns:
ISO 2-letter code or None if not found
"""
if HAS_PYCOUNTRY:
try:
country = pycountry.countries.search_fuzzy(country_name)[0]
return country.alpha_2
except (LookupError, AttributeError):
pass
# Fallback to manual mapping
return COUNTRY_NAME_TO_ISO.get(country_name)
def download_and_build_airport_data(force_rebuild: bool = False) -> None:
"""
Download OpenFlights dataset and build airports_by_country.json.
Filters to airports with valid IATA codes only.
Groups by ISO 2-letter country code.
Args:
force_rebuild: If True, rebuild even if file exists
"""
if AIRPORTS_JSON_PATH.exists() and not force_rebuild:
return
print(f"Downloading OpenFlights airport data from {OPENFLIGHTS_URL}...")
# Download the data
response = urllib.request.urlopen(OPENFLIGHTS_URL)
data = response.read().decode('utf-8')
# Parse CSV
# Format: AirportID,Name,City,Country,IATA,ICAO,Lat,Lon,Alt,Timezone,DST,Tz,Type,Source
airports_by_country = {}
for line in data.strip().split('\n'):
# Use csv reader to handle quoted fields properly
row = next(csv.reader([line]))
if len(row) < 5:
continue
airport_id = row[0]
name = row[1]
city = row[2]
country_name = row[3]
iata = row[4]
icao = row[5] if len(row) > 5 else ""
# Skip if no valid IATA code
if not iata or iata == "\\N" or len(iata) != 3:
continue
# Skip if country name is missing
if not country_name or country_name == "\\N":
continue
# Convert country name to ISO code
country_code = country_name_to_iso_code(country_name)
if not country_code:
# Skip if we can't map the country
continue
# Build airport entry
airport = {
"iata": iata,
"name": name,
"city": city,
"icao": icao if icao != "\\N" else ""
}
# Group by country ISO code
if country_code not in airports_by_country:
airports_by_country[country_code] = []
airports_by_country[country_code].append(airport)
# Ensure data directory exists
AIRPORTS_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
# Write to JSON file
with open(AIRPORTS_JSON_PATH, 'w', encoding='utf-8') as f:
json.dump(airports_by_country, f, indent=2, ensure_ascii=False)
total_airports = sum(len(v) for v in airports_by_country.values())
print(f"✓ Built airport data: {len(airports_by_country)} countries, {total_airports} airports")
def get_airports_for_country(country_code: str) -> list[dict]:
"""
Get list of airports for a given country code.
Args:
country_code: ISO 2-letter country code (e.g., "DE", "US")
Returns:
List of airport dicts with keys: iata, name, city, icao
Raises:
FileNotFoundError: If airports data file doesn't exist
ValueError: If country code not found
"""
# Ensure data file exists
if not AIRPORTS_JSON_PATH.exists():
download_and_build_airport_data()
# Load from JSON
with open(AIRPORTS_JSON_PATH, 'r', encoding='utf-8') as f:
airports_by_country = json.load(f)
country_code = country_code.upper()
if country_code not in airports_by_country:
available = sorted(airports_by_country.keys())[:10]
raise ValueError(
f"Country code '{country_code}' not found. "
f"Available codes (sample): {', '.join(available)}..."
)
return airports_by_country[country_code]
def resolve_airport_list(country: Optional[str], from_airports: Optional[str]) -> list[dict]:
"""
Resolve the final list of origin airports to scan.
Args:
country: ISO 2-letter country code (if --from not provided)
from_airports: Comma-separated IATA codes (overrides country)
Returns:
List of airport dicts with keys: iata, name, city
Raises:
ValueError: If neither country nor from_airports provided, or if invalid
"""
if from_airports:
# Parse custom airport list
iata_codes = [code.strip().upper() for code in from_airports.split(',')]
# Create minimal airport dicts
return [{"iata": code, "name": code, "city": ""} for code in iata_codes]
if country:
return get_airports_for_country(country)
raise ValueError("Either --country or --from must be provided")
if __name__ == "__main__":
# Build the dataset if run directly
download_and_build_airport_data(force_rebuild=True)
print("\nSample data for Germany (DE):")
de_airports = get_airports_for_country("DE")
for airport in de_airports[:5]:
print(f" {airport['iata']} - {airport['name']} ({airport['city']})")
print(f" ... and {len(de_airports) - 5} more")