Add flight comparator web app with full scan pipeline
Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass): Backend (FastAPI + SQLite): - REST API with rate limiting, Pydantic v2 validation, paginated responses - Scan pipeline: resolves airports, queries every day in the window, saves individual flights + aggregate route stats to SQLite - Background async scan processor with real-time progress tracking - Airport search endpoint backed by OpenFlights dataset - Daily scan window (all dates, not monthly samples) Frontend (React 19 + TypeScript + Tailwind CSS v4): - Dashboard with live scan status and recent scans - Create scan form: country mode or specific airports (searchable dropdown) - Scan detail page with expandable route rows showing individual flights (date, airline, departure, arrival, price) loaded on demand - AirportSearch component with debounced live search and multi-select Database: - scans → routes → flights schema with FK cascade and auto-update triggers - Migrations for schema evolution (relaxed country constraint) Tests: - 74 tests: unit + integration, isolated per-test SQLite DB - Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights, BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026) - Integration tests parametrized from confirmed routes Docker: - Multi-stage builds, Compose orchestration, Nginx reverse proxy Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
281
flight-comparator/discover_routes.py
Normal file
281
flight-comparator/discover_routes.py
Normal file
@@ -0,0 +1,281 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Route Discovery Tool
|
||||
|
||||
Phase 1: Quickly discover which routes have direct flights
|
||||
- Scans one sample date per month across the window
|
||||
- Identifies which destination airports have ANY flights
|
||||
- Saves results to discovered_routes.json
|
||||
|
||||
Phase 2: Targeted daily scans (use main.py)
|
||||
- Run detailed daily scans only on discovered routes
|
||||
- Much faster than scanning all airports
|
||||
|
||||
Example workflow:
|
||||
# Phase 1: Discover routes (fast)
|
||||
python discover_routes.py --from BDS --to-country DE --window 3
|
||||
|
||||
# Phase 2: Daily scan each discovered route (targeted)
|
||||
python main.py --from BDS --to DUS --daily-scan --window 3
|
||||
python main.py --from BDS --to FMM --daily-scan --window 3
|
||||
...
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from datetime import date
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from typing import Optional
|
||||
|
||||
try:
|
||||
import click
|
||||
except ImportError:
|
||||
print("Error: click library not installed. Install with: pip install click")
|
||||
sys.exit(1)
|
||||
|
||||
from airports import resolve_airport_list, download_and_build_airport_data
|
||||
try:
|
||||
from searcher_v3 import search_multiple_routes
|
||||
except ImportError:
|
||||
print("Error: searcher_v3 not found")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def generate_discovery_dates(window_months: int) -> list[str]:
|
||||
"""
|
||||
Generate sample dates for route discovery.
|
||||
Uses one date per month (15th) to quickly check which routes exist.
|
||||
|
||||
Args:
|
||||
window_months: Number of months to check
|
||||
|
||||
Returns:
|
||||
List of date strings (YYYY-MM-DD)
|
||||
"""
|
||||
today = date.today()
|
||||
dates = []
|
||||
|
||||
for i in range(1, window_months + 1):
|
||||
target_date = today + relativedelta(months=i)
|
||||
try:
|
||||
target_date = target_date.replace(day=15)
|
||||
except ValueError:
|
||||
# Handle months with fewer days
|
||||
target_date = target_date.replace(day=1) + relativedelta(months=1) - relativedelta(days=1)
|
||||
|
||||
dates.append(target_date.strftime('%Y-%m-%d'))
|
||||
|
||||
return dates
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--from', 'origin', required=True, help='Origin airport IATA code (e.g., BDS)')
|
||||
@click.option('--to-country', 'country', required=True, help='Destination country ISO code (e.g., DE)')
|
||||
@click.option('--window', default=3, type=int, help='Months to scan (default: 3)')
|
||||
@click.option('--output', default='discovered_routes.json', help='Output file (default: discovered_routes.json)')
|
||||
@click.option('--workers', default=5, type=int, help='Concurrency level (default: 5)')
|
||||
def discover(origin: str, country: str, window: int, output: str, workers: int):
|
||||
"""
|
||||
Discover which routes have direct flights.
|
||||
|
||||
Quickly scans sample dates to find which destination airports have ANY flights.
|
||||
Much faster than daily scanning all airports.
|
||||
|
||||
Example:
|
||||
python discover_routes.py --from BDS --to-country DE --window 3
|
||||
"""
|
||||
print()
|
||||
print("=" * 70)
|
||||
print("ROUTE DISCOVERY SCAN")
|
||||
print("=" * 70)
|
||||
print(f"Origin: {origin}")
|
||||
print(f"Destinations: All airports in {country}")
|
||||
print(f"Strategy: Sample one date per month for {window} months")
|
||||
print()
|
||||
|
||||
# Ensure airport data exists
|
||||
try:
|
||||
download_and_build_airport_data()
|
||||
except Exception as e:
|
||||
click.echo(f"Error building airport data: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
# Get all destination airports
|
||||
try:
|
||||
destination_airports = resolve_airport_list(country, None)
|
||||
except ValueError as e:
|
||||
click.echo(f"Error: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Found {len(destination_airports)} airports in {country}")
|
||||
|
||||
# Generate sample dates (one per month)
|
||||
sample_dates = generate_discovery_dates(window)
|
||||
print(f"Sample dates: {', '.join(sample_dates)}")
|
||||
print()
|
||||
|
||||
# Build routes to scan
|
||||
routes = []
|
||||
for airport in destination_airports:
|
||||
for sample_date in sample_dates:
|
||||
routes.append((origin, airport['iata'], sample_date))
|
||||
|
||||
total_routes = len(routes)
|
||||
print(f"Scanning {total_routes} routes ({len(destination_airports)} airports × {len(sample_dates)} dates)...")
|
||||
print()
|
||||
|
||||
# Execute discovery scan
|
||||
try:
|
||||
results = asyncio.run(
|
||||
search_multiple_routes(
|
||||
routes,
|
||||
seat_class="economy",
|
||||
adults=1,
|
||||
max_workers=workers,
|
||||
cache_threshold_hours=24,
|
||||
use_cache=True,
|
||||
progress_callback=None, # Suppress detailed progress
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
click.echo(f"Error during scan: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
# Analyze results to find which destinations have flights
|
||||
destinations_with_flights = set()
|
||||
destination_details = {}
|
||||
|
||||
for (orig, dest, query_date), flights in results.items():
|
||||
if flights: # Has at least one flight
|
||||
destinations_with_flights.add(dest)
|
||||
|
||||
if dest not in destination_details:
|
||||
destination_details[dest] = {
|
||||
"iata": dest,
|
||||
"flights_found": 0,
|
||||
"airlines": set(),
|
||||
"sample_dates": [],
|
||||
"price_range": {"min": None, "max": None},
|
||||
}
|
||||
|
||||
destination_details[dest]["flights_found"] += len(flights)
|
||||
destination_details[dest]["sample_dates"].append(query_date)
|
||||
|
||||
for flight in flights:
|
||||
destination_details[dest]["airlines"].add(flight.get("airline", "Unknown"))
|
||||
|
||||
price = flight.get("price")
|
||||
if price:
|
||||
if destination_details[dest]["price_range"]["min"] is None:
|
||||
destination_details[dest]["price_range"]["min"] = price
|
||||
destination_details[dest]["price_range"]["max"] = price
|
||||
else:
|
||||
destination_details[dest]["price_range"]["min"] = min(
|
||||
destination_details[dest]["price_range"]["min"], price
|
||||
)
|
||||
destination_details[dest]["price_range"]["max"] = max(
|
||||
destination_details[dest]["price_range"]["max"], price
|
||||
)
|
||||
|
||||
# Convert sets to lists for JSON serialization
|
||||
for dest in destination_details:
|
||||
destination_details[dest]["airlines"] = sorted(list(destination_details[dest]["airlines"]))
|
||||
|
||||
# Get airport names
|
||||
airport_map = {ap['iata']: ap for ap in destination_airports}
|
||||
|
||||
# Prepare output
|
||||
discovered_routes = {
|
||||
"scan_date": date.today().strftime('%Y-%m-%d'),
|
||||
"origin": origin,
|
||||
"country": country,
|
||||
"window_months": window,
|
||||
"total_airports_scanned": len(destination_airports),
|
||||
"destinations_with_flights": len(destinations_with_flights),
|
||||
"sample_dates": sample_dates,
|
||||
"routes": []
|
||||
}
|
||||
|
||||
for dest in sorted(destinations_with_flights):
|
||||
details = destination_details[dest]
|
||||
airport_info = airport_map.get(dest, {})
|
||||
|
||||
route_info = {
|
||||
"destination": dest,
|
||||
"destination_name": airport_info.get('name', 'Unknown'),
|
||||
"destination_city": airport_info.get('city', ''),
|
||||
"flights_found": details["flights_found"],
|
||||
"airlines": details["airlines"],
|
||||
"dates_with_flights": sorted(details["sample_dates"]),
|
||||
"price_range": details["price_range"],
|
||||
}
|
||||
discovered_routes["routes"].append(route_info)
|
||||
|
||||
# Save to file
|
||||
with open(output, 'w') as f:
|
||||
json.dump(discovered_routes, f, indent=2)
|
||||
|
||||
# Display results
|
||||
print()
|
||||
print("=" * 70)
|
||||
print("DISCOVERY RESULTS")
|
||||
print("=" * 70)
|
||||
print(f"Total airports scanned: {len(destination_airports)}")
|
||||
print(f"Destinations with flights: {len(destinations_with_flights)}")
|
||||
print(f"Success rate: {len(destinations_with_flights) / len(destination_airports) * 100:.1f}%")
|
||||
print()
|
||||
|
||||
if destinations_with_flights:
|
||||
print("Routes with direct flights:")
|
||||
print()
|
||||
print(f"{'IATA':<6} {'City':<25} {'Airlines':<30} {'Flights':<8} {'Price Range'}")
|
||||
print("-" * 90)
|
||||
|
||||
for route in discovered_routes["routes"]:
|
||||
airlines_str = ", ".join(route["airlines"][:3]) # Show up to 3 airlines
|
||||
if len(route["airlines"]) > 3:
|
||||
airlines_str += f" +{len(route['airlines']) - 3}"
|
||||
|
||||
price_min = route["price_range"]["min"]
|
||||
price_max = route["price_range"]["max"]
|
||||
if price_min and price_max:
|
||||
price_range = f"€{price_min}-€{price_max}"
|
||||
else:
|
||||
price_range = "—"
|
||||
|
||||
print(f"{route['destination']:<6} {route['destination_city'][:24]:<25} "
|
||||
f"{airlines_str[:29]:<30} {route['flights_found']:<8} {price_range}")
|
||||
|
||||
print()
|
||||
print(f"✅ Saved to: {output}")
|
||||
print()
|
||||
print("=" * 70)
|
||||
print("NEXT STEP: Targeted Daily Scans")
|
||||
print("=" * 70)
|
||||
print("Run detailed daily scans on discovered routes:")
|
||||
print()
|
||||
|
||||
for route in discovered_routes["routes"][:5]: # Show first 5 examples
|
||||
dest = route['destination']
|
||||
print(f"python main.py --from {origin} --to {dest} --daily-scan --window {window}")
|
||||
|
||||
if len(discovered_routes["routes"]) > 5:
|
||||
print(f"... and {len(discovered_routes['routes']) - 5} more routes")
|
||||
|
||||
print()
|
||||
print("Or use the automated batch script:")
|
||||
print(f"python scan_discovered_routes.py {output}")
|
||||
else:
|
||||
print("⚠️ No routes with direct flights found")
|
||||
print()
|
||||
print("This could mean:")
|
||||
print(" - No direct flights exist for these routes")
|
||||
print(" - API errors prevented detection")
|
||||
print(" - Try expanding the date range with --window")
|
||||
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
discover()
|
||||
Reference in New Issue
Block a user