Files
ciaovolo/flight-comparator/discover_routes.py
domverse 6421f83ca7 Add flight comparator web app with full scan pipeline
Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass):

Backend (FastAPI + SQLite):
- REST API with rate limiting, Pydantic v2 validation, paginated responses
- Scan pipeline: resolves airports, queries every day in the window, saves
  individual flights + aggregate route stats to SQLite
- Background async scan processor with real-time progress tracking
- Airport search endpoint backed by OpenFlights dataset
- Daily scan window (all dates, not monthly samples)

Frontend (React 19 + TypeScript + Tailwind CSS v4):
- Dashboard with live scan status and recent scans
- Create scan form: country mode or specific airports (searchable dropdown)
- Scan detail page with expandable route rows showing individual flights
  (date, airline, departure, arrival, price) loaded on demand
- AirportSearch component with debounced live search and multi-select

Database:
- scans → routes → flights schema with FK cascade and auto-update triggers
- Migrations for schema evolution (relaxed country constraint)

Tests:
- 74 tests: unit + integration, isolated per-test SQLite DB
- Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights,
  BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026)
- Integration tests parametrized from confirmed routes

Docker:
- Multi-stage builds, Compose orchestration, Nginx reverse proxy

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-26 17:11:51 +01:00

282 lines
9.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Route Discovery Tool
Phase 1: Quickly discover which routes have direct flights
- Scans one sample date per month across the window
- Identifies which destination airports have ANY flights
- Saves results to discovered_routes.json
Phase 2: Targeted daily scans (use main.py)
- Run detailed daily scans only on discovered routes
- Much faster than scanning all airports
Example workflow:
# Phase 1: Discover routes (fast)
python discover_routes.py --from BDS --to-country DE --window 3
# Phase 2: Daily scan each discovered route (targeted)
python main.py --from BDS --to DUS --daily-scan --window 3
python main.py --from BDS --to FMM --daily-scan --window 3
...
"""
import asyncio
import json
import sys
from datetime import date
from dateutil.relativedelta import relativedelta
from typing import Optional
try:
import click
except ImportError:
print("Error: click library not installed. Install with: pip install click")
sys.exit(1)
from airports import resolve_airport_list, download_and_build_airport_data
try:
from searcher_v3 import search_multiple_routes
except ImportError:
print("Error: searcher_v3 not found")
sys.exit(1)
def generate_discovery_dates(window_months: int) -> list[str]:
"""
Generate sample dates for route discovery.
Uses one date per month (15th) to quickly check which routes exist.
Args:
window_months: Number of months to check
Returns:
List of date strings (YYYY-MM-DD)
"""
today = date.today()
dates = []
for i in range(1, window_months + 1):
target_date = today + relativedelta(months=i)
try:
target_date = target_date.replace(day=15)
except ValueError:
# Handle months with fewer days
target_date = target_date.replace(day=1) + relativedelta(months=1) - relativedelta(days=1)
dates.append(target_date.strftime('%Y-%m-%d'))
return dates
@click.command()
@click.option('--from', 'origin', required=True, help='Origin airport IATA code (e.g., BDS)')
@click.option('--to-country', 'country', required=True, help='Destination country ISO code (e.g., DE)')
@click.option('--window', default=3, type=int, help='Months to scan (default: 3)')
@click.option('--output', default='discovered_routes.json', help='Output file (default: discovered_routes.json)')
@click.option('--workers', default=5, type=int, help='Concurrency level (default: 5)')
def discover(origin: str, country: str, window: int, output: str, workers: int):
"""
Discover which routes have direct flights.
Quickly scans sample dates to find which destination airports have ANY flights.
Much faster than daily scanning all airports.
Example:
python discover_routes.py --from BDS --to-country DE --window 3
"""
print()
print("=" * 70)
print("ROUTE DISCOVERY SCAN")
print("=" * 70)
print(f"Origin: {origin}")
print(f"Destinations: All airports in {country}")
print(f"Strategy: Sample one date per month for {window} months")
print()
# Ensure airport data exists
try:
download_and_build_airport_data()
except Exception as e:
click.echo(f"Error building airport data: {e}", err=True)
sys.exit(1)
# Get all destination airports
try:
destination_airports = resolve_airport_list(country, None)
except ValueError as e:
click.echo(f"Error: {e}", err=True)
sys.exit(1)
print(f"Found {len(destination_airports)} airports in {country}")
# Generate sample dates (one per month)
sample_dates = generate_discovery_dates(window)
print(f"Sample dates: {', '.join(sample_dates)}")
print()
# Build routes to scan
routes = []
for airport in destination_airports:
for sample_date in sample_dates:
routes.append((origin, airport['iata'], sample_date))
total_routes = len(routes)
print(f"Scanning {total_routes} routes ({len(destination_airports)} airports × {len(sample_dates)} dates)...")
print()
# Execute discovery scan
try:
results = asyncio.run(
search_multiple_routes(
routes,
seat_class="economy",
adults=1,
max_workers=workers,
cache_threshold_hours=24,
use_cache=True,
progress_callback=None, # Suppress detailed progress
)
)
except Exception as e:
click.echo(f"Error during scan: {e}", err=True)
sys.exit(1)
# Analyze results to find which destinations have flights
destinations_with_flights = set()
destination_details = {}
for (orig, dest, query_date), flights in results.items():
if flights: # Has at least one flight
destinations_with_flights.add(dest)
if dest not in destination_details:
destination_details[dest] = {
"iata": dest,
"flights_found": 0,
"airlines": set(),
"sample_dates": [],
"price_range": {"min": None, "max": None},
}
destination_details[dest]["flights_found"] += len(flights)
destination_details[dest]["sample_dates"].append(query_date)
for flight in flights:
destination_details[dest]["airlines"].add(flight.get("airline", "Unknown"))
price = flight.get("price")
if price:
if destination_details[dest]["price_range"]["min"] is None:
destination_details[dest]["price_range"]["min"] = price
destination_details[dest]["price_range"]["max"] = price
else:
destination_details[dest]["price_range"]["min"] = min(
destination_details[dest]["price_range"]["min"], price
)
destination_details[dest]["price_range"]["max"] = max(
destination_details[dest]["price_range"]["max"], price
)
# Convert sets to lists for JSON serialization
for dest in destination_details:
destination_details[dest]["airlines"] = sorted(list(destination_details[dest]["airlines"]))
# Get airport names
airport_map = {ap['iata']: ap for ap in destination_airports}
# Prepare output
discovered_routes = {
"scan_date": date.today().strftime('%Y-%m-%d'),
"origin": origin,
"country": country,
"window_months": window,
"total_airports_scanned": len(destination_airports),
"destinations_with_flights": len(destinations_with_flights),
"sample_dates": sample_dates,
"routes": []
}
for dest in sorted(destinations_with_flights):
details = destination_details[dest]
airport_info = airport_map.get(dest, {})
route_info = {
"destination": dest,
"destination_name": airport_info.get('name', 'Unknown'),
"destination_city": airport_info.get('city', ''),
"flights_found": details["flights_found"],
"airlines": details["airlines"],
"dates_with_flights": sorted(details["sample_dates"]),
"price_range": details["price_range"],
}
discovered_routes["routes"].append(route_info)
# Save to file
with open(output, 'w') as f:
json.dump(discovered_routes, f, indent=2)
# Display results
print()
print("=" * 70)
print("DISCOVERY RESULTS")
print("=" * 70)
print(f"Total airports scanned: {len(destination_airports)}")
print(f"Destinations with flights: {len(destinations_with_flights)}")
print(f"Success rate: {len(destinations_with_flights) / len(destination_airports) * 100:.1f}%")
print()
if destinations_with_flights:
print("Routes with direct flights:")
print()
print(f"{'IATA':<6} {'City':<25} {'Airlines':<30} {'Flights':<8} {'Price Range'}")
print("-" * 90)
for route in discovered_routes["routes"]:
airlines_str = ", ".join(route["airlines"][:3]) # Show up to 3 airlines
if len(route["airlines"]) > 3:
airlines_str += f" +{len(route['airlines']) - 3}"
price_min = route["price_range"]["min"]
price_max = route["price_range"]["max"]
if price_min and price_max:
price_range = f"{price_min}-€{price_max}"
else:
price_range = ""
print(f"{route['destination']:<6} {route['destination_city'][:24]:<25} "
f"{airlines_str[:29]:<30} {route['flights_found']:<8} {price_range}")
print()
print(f"✅ Saved to: {output}")
print()
print("=" * 70)
print("NEXT STEP: Targeted Daily Scans")
print("=" * 70)
print("Run detailed daily scans on discovered routes:")
print()
for route in discovered_routes["routes"][:5]: # Show first 5 examples
dest = route['destination']
print(f"python main.py --from {origin} --to {dest} --daily-scan --window {window}")
if len(discovered_routes["routes"]) > 5:
print(f"... and {len(discovered_routes['routes']) - 5} more routes")
print()
print("Or use the automated batch script:")
print(f"python scan_discovered_routes.py {output}")
else:
print("⚠️ No routes with direct flights found")
print()
print("This could mean:")
print(" - No direct flights exist for these routes")
print(" - API errors prevented detection")
print(" - Try expanding the date range with --window")
print()
if __name__ == '__main__':
discover()