Add flight comparator web app with full scan pipeline
Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass): Backend (FastAPI + SQLite): - REST API with rate limiting, Pydantic v2 validation, paginated responses - Scan pipeline: resolves airports, queries every day in the window, saves individual flights + aggregate route stats to SQLite - Background async scan processor with real-time progress tracking - Airport search endpoint backed by OpenFlights dataset - Daily scan window (all dates, not monthly samples) Frontend (React 19 + TypeScript + Tailwind CSS v4): - Dashboard with live scan status and recent scans - Create scan form: country mode or specific airports (searchable dropdown) - Scan detail page with expandable route rows showing individual flights (date, airline, departure, arrival, price) loaded on demand - AirportSearch component with debounced live search and multi-select Database: - scans → routes → flights schema with FK cascade and auto-update triggers - Migrations for schema evolution (relaxed country constraint) Tests: - 74 tests: unit + integration, isolated per-test SQLite DB - Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights, BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026) - Integration tests parametrized from confirmed routes Docker: - Multi-stage builds, Compose orchestration, Nginx reverse proxy Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
411
flight-comparator/main.py
Executable file
411
flight-comparator/main.py
Executable file
@@ -0,0 +1,411 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Flight Airport Comparator CLI
|
||||
|
||||
Compares direct flight options from multiple airports in a country to a single destination.
|
||||
Supports both single-date queries and seasonal scanning across multiple months.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
try:
|
||||
import click
|
||||
except ImportError:
|
||||
print("Error: click library not installed. Install with: pip install click")
|
||||
sys.exit(1)
|
||||
|
||||
from date_resolver import resolve_dates, resolve_dates_daily, detect_new_connections, SEARCH_WINDOW_MONTHS
|
||||
from airports import resolve_airport_list, download_and_build_airport_data
|
||||
try:
|
||||
from searcher_v3 import search_multiple_routes
|
||||
print("✓ Using fast-flights v3.0rc1 with SOCS cookie integration")
|
||||
except ImportError:
|
||||
try:
|
||||
from searcher import search_multiple_routes
|
||||
print("⚠️ Using legacy searcher (v2.2) - consider upgrading to v3.0rc1")
|
||||
except ImportError:
|
||||
print("✗ No searcher module found!")
|
||||
sys.exit(1)
|
||||
from formatter import format_table_single_date, format_table_seasonal, format_json, format_csv
|
||||
from progress import SearchProgress
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--to', 'destination', help='Destination airport IATA code (e.g., JFK)')
|
||||
@click.option('--to-country', 'destination_country', help='Destination country ISO code for reverse search (e.g., DE, US)')
|
||||
@click.option('--country', help='Origin country ISO code (e.g., DE, US)')
|
||||
@click.option('--date', help='Departure date YYYY-MM-DD. Omit for seasonal scan.')
|
||||
@click.option('--window', default=SEARCH_WINDOW_MONTHS, type=int, help=f'Months to scan in seasonal mode (default: {SEARCH_WINDOW_MONTHS})')
|
||||
@click.option('--daily-scan', is_flag=True, help='Scan every day (Mon-Sun) instead of just the 15th of each month')
|
||||
@click.option('--start-date', help='Start date for daily scan (YYYY-MM-DD). Default: tomorrow')
|
||||
@click.option('--end-date', help='End date for daily scan (YYYY-MM-DD). Default: start + window months')
|
||||
@click.option('--seat', default='economy', type=click.Choice(['economy', 'premium', 'business', 'first']), help='Cabin class')
|
||||
@click.option('--adults', default=1, type=int, help='Number of passengers')
|
||||
@click.option('--sort', default='price', type=click.Choice(['price', 'duration']), help='Sort order')
|
||||
@click.option('--from', 'from_airports', help='Comma-separated IATA codes (overrides --country)')
|
||||
@click.option('--top', default=3, type=int, help='Max results per airport')
|
||||
@click.option('--output', default='table', type=click.Choice(['table', 'json', 'csv']), help='Output format')
|
||||
@click.option('--workers', default=5, type=int, help='Concurrency level')
|
||||
@click.option('--cache-threshold', default=24, type=int, help='Cache validity in hours (default: 24)')
|
||||
@click.option('--no-cache', is_flag=True, help='Disable cache, force fresh API queries')
|
||||
@click.option('--dry-run', is_flag=True, help='List airports and dates without API calls')
|
||||
def main(
|
||||
destination: Optional[str],
|
||||
destination_country: Optional[str],
|
||||
country: Optional[str],
|
||||
date: Optional[str],
|
||||
window: int,
|
||||
daily_scan: bool,
|
||||
start_date: Optional[str],
|
||||
end_date: Optional[str],
|
||||
seat: str,
|
||||
adults: int,
|
||||
sort: str,
|
||||
from_airports: Optional[str],
|
||||
top: int,
|
||||
output: str,
|
||||
workers: int,
|
||||
cache_threshold: int,
|
||||
no_cache: bool,
|
||||
dry_run: bool,
|
||||
):
|
||||
"""
|
||||
Flight Airport Comparator - Find the best departure or arrival airport.
|
||||
|
||||
TWO MODES:
|
||||
1. NORMAL: Multiple origins → Single destination
|
||||
Compares flights from all airports in a country to one destination
|
||||
|
||||
2. REVERSE: Single origin → Multiple destinations
|
||||
Compares flights from one airport to all airports in a country
|
||||
|
||||
Supports seasonal scanning to discover new routes and price trends.
|
||||
Uses SQLite caching to reduce API calls and avoid rate limiting.
|
||||
|
||||
SCANNING STRATEGIES:
|
||||
- Single date: --date YYYY-MM-DD (one specific day)
|
||||
- Seasonal: Omit --date (queries 15th of each month for N months)
|
||||
- Daily: --daily-scan (queries EVERY day Mon-Sun for N months)
|
||||
|
||||
Examples:
|
||||
|
||||
# NORMAL MODE: Country to single destination
|
||||
python main.py --to JFK --country DE --date 2026-06-15
|
||||
python main.py --to JFK --from FRA,MUC,BER --date 2026-06-15
|
||||
|
||||
# REVERSE MODE: Single airport to country
|
||||
python main.py --from BDS --to-country DE --date 2026-06-15
|
||||
python main.py --from BDS --to-country DE # Seasonal scan
|
||||
|
||||
# Seasonal scan (6 months, one day per month)
|
||||
python main.py --to JFK --country DE
|
||||
|
||||
# Daily scan (every day for 3 months)
|
||||
python main.py --from BDS --to DUS --daily-scan --window 3
|
||||
|
||||
# Daily scan with custom date range
|
||||
python main.py --from BDS --to-country DE --daily-scan --start-date 2026-04-01 --end-date 2026-06-30
|
||||
|
||||
# Force fresh queries (ignore cache)
|
||||
python main.py --to JFK --country DE --no-cache
|
||||
|
||||
# Use 48-hour cache threshold
|
||||
python main.py --to JFK --country DE --cache-threshold 48
|
||||
|
||||
# Dry run to preview scan scope
|
||||
python main.py --to JFK --country DE --dry-run
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Validate inputs - determine search mode
|
||||
# Mode 1: Normal (many origins → single destination)
|
||||
# Mode 2: Reverse (single origin → many destinations)
|
||||
|
||||
if destination and destination_country:
|
||||
click.echo("Error: Cannot use both --to and --to-country. Choose one.", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
if not destination and not destination_country:
|
||||
click.echo("Error: Either --to (single destination) or --to-country (destination country) must be provided", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
# Determine mode
|
||||
reverse_mode = destination_country is not None
|
||||
|
||||
if reverse_mode:
|
||||
# Reverse mode: single origin → multiple destinations
|
||||
if not from_airports:
|
||||
click.echo("Error: Reverse mode (--to-country) requires --from with a single airport", err=True)
|
||||
sys.exit(1)
|
||||
if ',' in from_airports:
|
||||
click.echo("Error: Reverse mode requires a single origin airport in --from (no commas)", err=True)
|
||||
sys.exit(1)
|
||||
if country:
|
||||
click.echo("Warning: --country is ignored in reverse mode (using --to-country instead)", err=True)
|
||||
else:
|
||||
# Normal mode: multiple origins → single destination
|
||||
if not country and not from_airports:
|
||||
click.echo("Error: Either --country or --from must be provided for origin airports", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
# Ensure airport data exists
|
||||
try:
|
||||
download_and_build_airport_data()
|
||||
except Exception as e:
|
||||
click.echo(f"Error building airport data: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
# Resolve airport list and routes based on mode
|
||||
if reverse_mode:
|
||||
# Reverse mode: single origin → multiple destinations in country
|
||||
origin = from_airports # Single airport code
|
||||
try:
|
||||
destination_airports = resolve_airport_list(destination_country, None)
|
||||
except ValueError as e:
|
||||
click.echo(f"Error: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
airports = destination_airports
|
||||
search_label = f"{origin} → {destination_country}"
|
||||
location_label = destination_country
|
||||
else:
|
||||
# Normal mode: multiple origins → single destination
|
||||
try:
|
||||
origin_airports = resolve_airport_list(country, from_airports)
|
||||
except ValueError as e:
|
||||
click.echo(f"Error: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
airports = origin_airports
|
||||
search_label = f"{country or 'Custom'} → {destination}"
|
||||
location_label = country or 'Custom'
|
||||
|
||||
# Resolve dates
|
||||
if date:
|
||||
# Single date mode - explicit date provided
|
||||
dates = [date]
|
||||
elif daily_scan:
|
||||
# Daily scan mode - query every day in the range
|
||||
dates = resolve_dates_daily(start_date, end_date, window)
|
||||
click.echo(f"Daily scan mode: {len(dates)} days from {dates[0]} to {dates[-1]}")
|
||||
else:
|
||||
# Seasonal mode - query one day per month (default: 15th)
|
||||
dates = resolve_dates(None, window)
|
||||
|
||||
# Dry run mode - just show what would be scanned
|
||||
if dry_run:
|
||||
click.echo()
|
||||
click.echo(f"Dry run: {search_label}")
|
||||
click.echo(f"Mode: {'REVERSE (one → many)' if reverse_mode else 'NORMAL (many → one)'}")
|
||||
click.echo()
|
||||
click.echo(f"Airports to scan ({len(airports)}):")
|
||||
for airport in airports[:10]:
|
||||
click.echo(f" • {airport['iata']} - {airport['name']} ({airport.get('city', '')})")
|
||||
if len(airports) > 10:
|
||||
click.echo(f" ... and {len(airports) - 10} more")
|
||||
click.echo()
|
||||
click.echo(f"Dates to scan ({len(dates)}):")
|
||||
for d in dates:
|
||||
click.echo(f" • {d}")
|
||||
click.echo()
|
||||
click.echo(f"Total API calls: {len(airports)} airports × {len(dates)} dates = {len(airports) * len(dates)} requests")
|
||||
click.echo(f"Estimated time: ~{(len(airports) * len(dates) * 1.0 / workers):.0f}s at {workers} workers")
|
||||
click.echo()
|
||||
return
|
||||
|
||||
# Build route list (airport × date combinations)
|
||||
routes = []
|
||||
if reverse_mode:
|
||||
# Reverse: from single origin to each destination airport
|
||||
for airport in airports:
|
||||
for query_date in dates:
|
||||
routes.append((from_airports, airport['iata'], query_date))
|
||||
else:
|
||||
# Normal: from each origin airport to single destination
|
||||
for airport in airports:
|
||||
for query_date in dates:
|
||||
routes.append((airport['iata'], destination, query_date))
|
||||
|
||||
click.echo()
|
||||
click.echo(f"Searching {len(routes)} routes ({len(airports)} airports × {len(dates)} dates)...")
|
||||
click.echo()
|
||||
|
||||
# Execute searches (with caching and progress display)
|
||||
use_cache = not no_cache
|
||||
|
||||
try:
|
||||
with SearchProgress(total_routes=len(routes), show_progress=True) as progress:
|
||||
def progress_callback(origin, dest, date, status, count, error=None):
|
||||
progress.update(origin, dest, date, status, count, error)
|
||||
|
||||
results = asyncio.run(
|
||||
search_multiple_routes(
|
||||
routes,
|
||||
seat_class=seat,
|
||||
adults=adults,
|
||||
max_workers=workers,
|
||||
cache_threshold_hours=cache_threshold,
|
||||
use_cache=use_cache,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
click.echo(f"Error during search: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
|
||||
# Process results
|
||||
if len(dates) == 1:
|
||||
# Single-date mode
|
||||
single_date = dates[0]
|
||||
|
||||
# Group by airport
|
||||
results_by_airport = {}
|
||||
|
||||
if reverse_mode:
|
||||
# In reverse mode, results are keyed by (origin, destination, date)
|
||||
# Group by destination
|
||||
for (origin, dest, query_date), flights in results.items():
|
||||
if query_date == single_date and flights:
|
||||
if dest not in results_by_airport:
|
||||
results_by_airport[dest] = []
|
||||
results_by_airport[dest].extend(flights)
|
||||
|
||||
# Sort and limit each destination's flights
|
||||
for dest in results_by_airport:
|
||||
sorted_flights = sorted(
|
||||
results_by_airport[dest],
|
||||
key=lambda f: f.get('price', 999999) if sort == 'price' else f.get('duration_minutes', 999999)
|
||||
)
|
||||
results_by_airport[dest] = sorted_flights[:top]
|
||||
else:
|
||||
# Normal mode: group by origin
|
||||
for (origin, dest, query_date), flights in results.items():
|
||||
if query_date == single_date:
|
||||
if flights: # Only include if there are flights
|
||||
# Take top N flights
|
||||
sorted_flights = sorted(
|
||||
flights,
|
||||
key=lambda f: f.get('price', 999999) if sort == 'price' else f.get('duration_minutes', 999999)
|
||||
)
|
||||
results_by_airport[origin] = sorted_flights[:top]
|
||||
else:
|
||||
results_by_airport[origin] = []
|
||||
|
||||
# Format output
|
||||
if output == 'json':
|
||||
format_json(results_by_airport)
|
||||
elif output == 'csv':
|
||||
format_csv(results_by_airport)
|
||||
else: # table
|
||||
# Determine what to show in the table header
|
||||
if reverse_mode:
|
||||
display_destination = destination_country
|
||||
display_origin = from_airports
|
||||
else:
|
||||
display_destination = destination
|
||||
display_origin = country or 'Custom'
|
||||
|
||||
format_table_single_date(
|
||||
results_by_airport,
|
||||
display_destination if not reverse_mode else from_airports,
|
||||
display_origin if not reverse_mode else destination_country,
|
||||
single_date,
|
||||
seat,
|
||||
sort,
|
||||
len(airports),
|
||||
elapsed_time,
|
||||
)
|
||||
|
||||
else:
|
||||
# Seasonal mode
|
||||
results_by_month = {}
|
||||
|
||||
if reverse_mode:
|
||||
# In reverse mode, group by destination airport
|
||||
for (origin, dest, query_date), flights in results.items():
|
||||
month_key = query_date[:7]
|
||||
|
||||
if month_key not in results_by_month:
|
||||
results_by_month[month_key] = {}
|
||||
|
||||
if flights:
|
||||
if dest not in results_by_month[month_key]:
|
||||
results_by_month[month_key][dest] = []
|
||||
results_by_month[month_key][dest].extend(flights)
|
||||
|
||||
# Sort and limit flights for each destination
|
||||
for month_key in results_by_month:
|
||||
for dest in results_by_month[month_key]:
|
||||
sorted_flights = sorted(
|
||||
results_by_month[month_key][dest],
|
||||
key=lambda f: f.get('price', 999999)
|
||||
)
|
||||
results_by_month[month_key][dest] = sorted_flights[:top]
|
||||
else:
|
||||
# Normal mode: group by origin
|
||||
for (origin, dest, query_date), flights in results.items():
|
||||
month_key = query_date[:7]
|
||||
|
||||
if month_key not in results_by_month:
|
||||
results_by_month[month_key] = {}
|
||||
|
||||
if flights:
|
||||
sorted_flights = sorted(flights, key=lambda f: f.get('price', 999999))
|
||||
results_by_month[month_key][origin] = sorted_flights[:top]
|
||||
|
||||
# Detect new connections
|
||||
# Convert to format expected by detect_new_connections
|
||||
monthly_flights_for_detection = {}
|
||||
for month_key, airports_dict in results_by_month.items():
|
||||
flights_list = []
|
||||
for airport_code, flights in airports_dict.items():
|
||||
for flight in flights:
|
||||
flights_list.append({
|
||||
'origin': flight['origin'],
|
||||
'destination': flight['destination'],
|
||||
})
|
||||
monthly_flights_for_detection[month_key] = flights_list
|
||||
|
||||
new_connections = detect_new_connections(monthly_flights_for_detection)
|
||||
|
||||
# Format output
|
||||
if output == 'json':
|
||||
format_json({
|
||||
'results_by_month': results_by_month,
|
||||
'new_connections': new_connections,
|
||||
})
|
||||
elif output == 'csv':
|
||||
# Flatten seasonal results for CSV
|
||||
flattened = {}
|
||||
for month_key, airports_dict in results_by_month.items():
|
||||
for airport_code, flights in airports_dict.items():
|
||||
key = f"{airport_code}_{month_key}"
|
||||
flattened[key] = flights
|
||||
format_csv(flattened)
|
||||
else: # table
|
||||
# Determine what to show in the table header
|
||||
if reverse_mode:
|
||||
display_destination = destination_country
|
||||
display_origin = from_airports
|
||||
else:
|
||||
display_destination = destination
|
||||
display_origin = country or 'Custom'
|
||||
|
||||
format_table_seasonal(
|
||||
results_by_month,
|
||||
new_connections,
|
||||
display_destination if not reverse_mode else f"from {from_airports}",
|
||||
display_origin if not reverse_mode else destination_country,
|
||||
seat,
|
||||
len(airports),
|
||||
elapsed_time,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user