Files
ciaovolo/flight-comparator/scan_discovered_routes.py
domverse 6421f83ca7 Add flight comparator web app with full scan pipeline
Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass):

Backend (FastAPI + SQLite):
- REST API with rate limiting, Pydantic v2 validation, paginated responses
- Scan pipeline: resolves airports, queries every day in the window, saves
  individual flights + aggregate route stats to SQLite
- Background async scan processor with real-time progress tracking
- Airport search endpoint backed by OpenFlights dataset
- Daily scan window (all dates, not monthly samples)

Frontend (React 19 + TypeScript + Tailwind CSS v4):
- Dashboard with live scan status and recent scans
- Create scan form: country mode or specific airports (searchable dropdown)
- Scan detail page with expandable route rows showing individual flights
  (date, airline, departure, arrival, price) loaded on demand
- AirportSearch component with debounced live search and multi-select

Database:
- scans → routes → flights schema with FK cascade and auto-update triggers
- Migrations for schema evolution (relaxed country constraint)

Tests:
- 74 tests: unit + integration, isolated per-test SQLite DB
- Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights,
  BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026)
- Integration tests parametrized from confirmed routes

Docker:
- Multi-stage builds, Compose orchestration, Nginx reverse proxy

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-26 17:11:51 +01:00

225 lines
7.2 KiB
Python

#!/usr/bin/env python3
"""
Automated Daily Scans for Discovered Routes
Reads discovered_routes.json and runs targeted daily scans.
Much faster than scanning all airports because it only queries known routes.
Usage:
# First, discover routes
python discover_routes.py --from BDS --to-country DE --window 3
# Then, run targeted daily scans
python scan_discovered_routes.py discovered_routes.json --daily-scan
"""
import json
import subprocess
import sys
import click
from datetime import datetime
@click.command()
@click.argument('routes_file', type=click.Path(exists=True))
@click.option('--daily-scan', is_flag=True, help='Run daily scans (vs seasonal)')
@click.option('--start-date', help='Start date for daily scan (YYYY-MM-DD)')
@click.option('--end-date', help='End date for daily scan (YYYY-MM-DD)')
@click.option('--window', type=int, help='Override window months from discovery')
@click.option('--workers', default=5, type=int, help='Concurrency level (default: 5)')
@click.option('--output-dir', default='results', help='Directory to save results (default: results)')
@click.option('--dry-run', is_flag=True, help='Show what would be scanned without executing')
def scan_discovered(routes_file, daily_scan, start_date, end_date, window, workers, output_dir, dry_run):
"""
Run targeted scans on discovered routes.
Example:
python scan_discovered_routes.py discovered_routes.json --daily-scan
"""
# Load discovered routes
with open(routes_file, 'r') as f:
data = json.load(f)
origin = data['origin']
routes = data['routes']
default_window = data.get('window_months', 3)
if window is None:
window = default_window
print()
print("=" * 70)
print("TARGETED SCAN OF DISCOVERED ROUTES")
print("=" * 70)
print(f"Origin: {origin}")
print(f"Discovered routes: {len(routes)}")
print(f"Mode: {'Daily scan' if daily_scan else 'Seasonal scan'}")
if daily_scan and start_date and end_date:
print(f"Date range: {start_date} to {end_date}")
else:
print(f"Window: {window} months")
print(f"Workers: {workers}")
print()
if not routes:
print("⚠️ No routes to scan!")
print(f"Discovery file {routes_file} contains no routes with flights.")
sys.exit(1)
# Display routes to scan
print("Routes to scan:")
for i, route in enumerate(routes, 1):
dest = route['destination']
city = route['destination_city']
airlines = ', '.join(route['airlines'][:2])
if len(route['airlines']) > 2:
airlines += f" +{len(route['airlines']) - 2}"
print(f" {i}. {origin}{dest} ({city}) - {airlines}")
print()
if dry_run:
print("=" * 70)
print("DRY RUN - Commands that would be executed:")
print("=" * 70)
print()
# Build and execute commands
results_summary = {
"scan_date": datetime.now().isoformat(),
"origin": origin,
"routes_scanned": len(routes),
"mode": "daily" if daily_scan else "seasonal",
"results": []
}
for i, route in enumerate(routes, 1):
dest = route['destination']
city = route['destination_city']
# Build command
cmd_parts = [
"python", "main.py",
"--from", origin,
"--to", dest,
]
if daily_scan:
cmd_parts.append("--daily-scan")
if start_date:
cmd_parts.extend(["--start-date", start_date])
if end_date:
cmd_parts.extend(["--end-date", end_date])
if not start_date and not end_date:
cmd_parts.extend(["--window", str(window)])
else:
cmd_parts.extend(["--window", str(window)])
cmd_parts.extend(["--workers", str(workers)])
# Add output file
output_file = f"{output_dir}/{origin}_{dest}_{'daily' if daily_scan else 'seasonal'}.json"
cmd_parts.extend(["--output", "json"])
command = " ".join(cmd_parts)
print(f"[{i}/{len(routes)}] Scanning {origin}{dest} ({city})")
if dry_run:
print(f" Command: {command}")
print()
continue
try:
# Execute command
result = subprocess.run(
command,
shell=True,
capture_output=True,
text=True,
timeout=600 # 10 minute timeout per route
)
output = result.stdout + result.stderr
# Parse results (look for flight count)
import re
flights_match = re.search(r'Flights Found:\s+(\d+)', output)
flights_found = int(flights_match.group(1)) if flights_match else 0
# Save output to file
import os
os.makedirs(output_dir, exist_ok=True)
with open(output_file, 'w') as f:
f.write(output)
results_summary["results"].append({
"destination": dest,
"destination_city": city,
"flights_found": flights_found,
"output_file": output_file,
"success": result.returncode == 0
})
print(f" ✅ Complete - {flights_found} flights found")
print(f" 📄 Saved to: {output_file}")
except subprocess.TimeoutExpired:
print(f" ⏱️ Timeout - scan took too long")
results_summary["results"].append({
"destination": dest,
"destination_city": city,
"error": "timeout",
"success": False
})
except Exception as e:
print(f" ❌ Error: {e}")
results_summary["results"].append({
"destination": dest,
"destination_city": city,
"error": str(e),
"success": False
})
print()
if not dry_run:
# Save summary
summary_file = f"{output_dir}/scan_summary.json"
with open(summary_file, 'w') as f:
json.dump(results_summary, f, indent=2)
# Display summary
print("=" * 70)
print("SCAN SUMMARY")
print("=" * 70)
total_scanned = len(routes)
successful = sum(1 for r in results_summary["results"] if r.get("success", False))
total_flights = sum(r.get("flights_found", 0) for r in results_summary["results"])
print(f"Routes scanned: {total_scanned}")
print(f"Successful: {successful}/{total_scanned}")
print(f"Total flights found: {total_flights}")
print()
print(f"Results saved to: {output_dir}/")
print(f"Summary: {summary_file}")
print()
# Show top routes by flight count
sorted_results = sorted(
results_summary["results"],
key=lambda x: x.get("flights_found", 0),
reverse=True
)
print("Top routes by flight count:")
for route in sorted_results[:5]:
if route.get("flights_found", 0) > 0:
print(f" {origin}{route['destination']}: {route['flights_found']} flights")
print()
if __name__ == '__main__':
scan_discovered()