Add flight comparator web app with full scan pipeline

Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass):

Backend (FastAPI + SQLite):
- REST API with rate limiting, Pydantic v2 validation, paginated responses
- Scan pipeline: resolves airports, queries every day in the window, saves
  individual flights + aggregate route stats to SQLite
- Background async scan processor with real-time progress tracking
- Airport search endpoint backed by OpenFlights dataset
- Daily scan window (all dates, not monthly samples)

Frontend (React 19 + TypeScript + Tailwind CSS v4):
- Dashboard with live scan status and recent scans
- Create scan form: country mode or specific airports (searchable dropdown)
- Scan detail page with expandable route rows showing individual flights
  (date, airline, departure, arrival, price) loaded on demand
- AirportSearch component with debounced live search and multi-select

Database:
- scans → routes → flights schema with FK cascade and auto-update triggers
- Migrations for schema evolution (relaxed country constraint)

Tests:
- 74 tests: unit + integration, isolated per-test SQLite DB
- Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights,
  BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026)
- Integration tests parametrized from confirmed routes

Docker:
- Multi-stage builds, Compose orchestration, Nginx reverse proxy

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 17:11:51 +01:00
parent aea7590874
commit 6421f83ca7
67 changed files with 37173 additions and 0 deletions

View File

@@ -0,0 +1,281 @@
#!/usr/bin/env python3
"""
Route Discovery Tool
Phase 1: Quickly discover which routes have direct flights
- Scans one sample date per month across the window
- Identifies which destination airports have ANY flights
- Saves results to discovered_routes.json
Phase 2: Targeted daily scans (use main.py)
- Run detailed daily scans only on discovered routes
- Much faster than scanning all airports
Example workflow:
# Phase 1: Discover routes (fast)
python discover_routes.py --from BDS --to-country DE --window 3
# Phase 2: Daily scan each discovered route (targeted)
python main.py --from BDS --to DUS --daily-scan --window 3
python main.py --from BDS --to FMM --daily-scan --window 3
...
"""
import asyncio
import json
import sys
from datetime import date
from dateutil.relativedelta import relativedelta
from typing import Optional
try:
import click
except ImportError:
print("Error: click library not installed. Install with: pip install click")
sys.exit(1)
from airports import resolve_airport_list, download_and_build_airport_data
try:
from searcher_v3 import search_multiple_routes
except ImportError:
print("Error: searcher_v3 not found")
sys.exit(1)
def generate_discovery_dates(window_months: int) -> list[str]:
"""
Generate sample dates for route discovery.
Uses one date per month (15th) to quickly check which routes exist.
Args:
window_months: Number of months to check
Returns:
List of date strings (YYYY-MM-DD)
"""
today = date.today()
dates = []
for i in range(1, window_months + 1):
target_date = today + relativedelta(months=i)
try:
target_date = target_date.replace(day=15)
except ValueError:
# Handle months with fewer days
target_date = target_date.replace(day=1) + relativedelta(months=1) - relativedelta(days=1)
dates.append(target_date.strftime('%Y-%m-%d'))
return dates
@click.command()
@click.option('--from', 'origin', required=True, help='Origin airport IATA code (e.g., BDS)')
@click.option('--to-country', 'country', required=True, help='Destination country ISO code (e.g., DE)')
@click.option('--window', default=3, type=int, help='Months to scan (default: 3)')
@click.option('--output', default='discovered_routes.json', help='Output file (default: discovered_routes.json)')
@click.option('--workers', default=5, type=int, help='Concurrency level (default: 5)')
def discover(origin: str, country: str, window: int, output: str, workers: int):
"""
Discover which routes have direct flights.
Quickly scans sample dates to find which destination airports have ANY flights.
Much faster than daily scanning all airports.
Example:
python discover_routes.py --from BDS --to-country DE --window 3
"""
print()
print("=" * 70)
print("ROUTE DISCOVERY SCAN")
print("=" * 70)
print(f"Origin: {origin}")
print(f"Destinations: All airports in {country}")
print(f"Strategy: Sample one date per month for {window} months")
print()
# Ensure airport data exists
try:
download_and_build_airport_data()
except Exception as e:
click.echo(f"Error building airport data: {e}", err=True)
sys.exit(1)
# Get all destination airports
try:
destination_airports = resolve_airport_list(country, None)
except ValueError as e:
click.echo(f"Error: {e}", err=True)
sys.exit(1)
print(f"Found {len(destination_airports)} airports in {country}")
# Generate sample dates (one per month)
sample_dates = generate_discovery_dates(window)
print(f"Sample dates: {', '.join(sample_dates)}")
print()
# Build routes to scan
routes = []
for airport in destination_airports:
for sample_date in sample_dates:
routes.append((origin, airport['iata'], sample_date))
total_routes = len(routes)
print(f"Scanning {total_routes} routes ({len(destination_airports)} airports × {len(sample_dates)} dates)...")
print()
# Execute discovery scan
try:
results = asyncio.run(
search_multiple_routes(
routes,
seat_class="economy",
adults=1,
max_workers=workers,
cache_threshold_hours=24,
use_cache=True,
progress_callback=None, # Suppress detailed progress
)
)
except Exception as e:
click.echo(f"Error during scan: {e}", err=True)
sys.exit(1)
# Analyze results to find which destinations have flights
destinations_with_flights = set()
destination_details = {}
for (orig, dest, query_date), flights in results.items():
if flights: # Has at least one flight
destinations_with_flights.add(dest)
if dest not in destination_details:
destination_details[dest] = {
"iata": dest,
"flights_found": 0,
"airlines": set(),
"sample_dates": [],
"price_range": {"min": None, "max": None},
}
destination_details[dest]["flights_found"] += len(flights)
destination_details[dest]["sample_dates"].append(query_date)
for flight in flights:
destination_details[dest]["airlines"].add(flight.get("airline", "Unknown"))
price = flight.get("price")
if price:
if destination_details[dest]["price_range"]["min"] is None:
destination_details[dest]["price_range"]["min"] = price
destination_details[dest]["price_range"]["max"] = price
else:
destination_details[dest]["price_range"]["min"] = min(
destination_details[dest]["price_range"]["min"], price
)
destination_details[dest]["price_range"]["max"] = max(
destination_details[dest]["price_range"]["max"], price
)
# Convert sets to lists for JSON serialization
for dest in destination_details:
destination_details[dest]["airlines"] = sorted(list(destination_details[dest]["airlines"]))
# Get airport names
airport_map = {ap['iata']: ap for ap in destination_airports}
# Prepare output
discovered_routes = {
"scan_date": date.today().strftime('%Y-%m-%d'),
"origin": origin,
"country": country,
"window_months": window,
"total_airports_scanned": len(destination_airports),
"destinations_with_flights": len(destinations_with_flights),
"sample_dates": sample_dates,
"routes": []
}
for dest in sorted(destinations_with_flights):
details = destination_details[dest]
airport_info = airport_map.get(dest, {})
route_info = {
"destination": dest,
"destination_name": airport_info.get('name', 'Unknown'),
"destination_city": airport_info.get('city', ''),
"flights_found": details["flights_found"],
"airlines": details["airlines"],
"dates_with_flights": sorted(details["sample_dates"]),
"price_range": details["price_range"],
}
discovered_routes["routes"].append(route_info)
# Save to file
with open(output, 'w') as f:
json.dump(discovered_routes, f, indent=2)
# Display results
print()
print("=" * 70)
print("DISCOVERY RESULTS")
print("=" * 70)
print(f"Total airports scanned: {len(destination_airports)}")
print(f"Destinations with flights: {len(destinations_with_flights)}")
print(f"Success rate: {len(destinations_with_flights) / len(destination_airports) * 100:.1f}%")
print()
if destinations_with_flights:
print("Routes with direct flights:")
print()
print(f"{'IATA':<6} {'City':<25} {'Airlines':<30} {'Flights':<8} {'Price Range'}")
print("-" * 90)
for route in discovered_routes["routes"]:
airlines_str = ", ".join(route["airlines"][:3]) # Show up to 3 airlines
if len(route["airlines"]) > 3:
airlines_str += f" +{len(route['airlines']) - 3}"
price_min = route["price_range"]["min"]
price_max = route["price_range"]["max"]
if price_min and price_max:
price_range = f"{price_min}-€{price_max}"
else:
price_range = ""
print(f"{route['destination']:<6} {route['destination_city'][:24]:<25} "
f"{airlines_str[:29]:<30} {route['flights_found']:<8} {price_range}")
print()
print(f"✅ Saved to: {output}")
print()
print("=" * 70)
print("NEXT STEP: Targeted Daily Scans")
print("=" * 70)
print("Run detailed daily scans on discovered routes:")
print()
for route in discovered_routes["routes"][:5]: # Show first 5 examples
dest = route['destination']
print(f"python main.py --from {origin} --to {dest} --daily-scan --window {window}")
if len(discovered_routes["routes"]) > 5:
print(f"... and {len(discovered_routes['routes']) - 5} more routes")
print()
print("Or use the automated batch script:")
print(f"python scan_discovered_routes.py {output}")
else:
print("⚠️ No routes with direct flights found")
print()
print("This could mean:")
print(" - No direct flights exist for these routes")
print(" - API errors prevented detection")
print(" - Try expanding the date range with --window")
print()
if __name__ == '__main__':
discover()