#!/usr/bin/env python3 """ Route Discovery Tool Phase 1: Quickly discover which routes have direct flights - Scans one sample date per month across the window - Identifies which destination airports have ANY flights - Saves results to discovered_routes.json Phase 2: Targeted daily scans (use main.py) - Run detailed daily scans only on discovered routes - Much faster than scanning all airports Example workflow: # Phase 1: Discover routes (fast) python discover_routes.py --from BDS --to-country DE --window 3 # Phase 2: Daily scan each discovered route (targeted) python main.py --from BDS --to DUS --daily-scan --window 3 python main.py --from BDS --to FMM --daily-scan --window 3 ... """ import asyncio import json import sys from datetime import date from dateutil.relativedelta import relativedelta from typing import Optional try: import click except ImportError: print("Error: click library not installed. Install with: pip install click") sys.exit(1) from airports import resolve_airport_list, download_and_build_airport_data try: from searcher_v3 import search_multiple_routes except ImportError: print("Error: searcher_v3 not found") sys.exit(1) def generate_discovery_dates(window_months: int) -> list[str]: """ Generate sample dates for route discovery. Uses one date per month (15th) to quickly check which routes exist. Args: window_months: Number of months to check Returns: List of date strings (YYYY-MM-DD) """ today = date.today() dates = [] for i in range(1, window_months + 1): target_date = today + relativedelta(months=i) try: target_date = target_date.replace(day=15) except ValueError: # Handle months with fewer days target_date = target_date.replace(day=1) + relativedelta(months=1) - relativedelta(days=1) dates.append(target_date.strftime('%Y-%m-%d')) return dates @click.command() @click.option('--from', 'origin', required=True, help='Origin airport IATA code (e.g., BDS)') @click.option('--to-country', 'country', required=True, help='Destination country ISO code (e.g., DE)') @click.option('--window', default=3, type=int, help='Months to scan (default: 3)') @click.option('--output', default='discovered_routes.json', help='Output file (default: discovered_routes.json)') @click.option('--workers', default=5, type=int, help='Concurrency level (default: 5)') def discover(origin: str, country: str, window: int, output: str, workers: int): """ Discover which routes have direct flights. Quickly scans sample dates to find which destination airports have ANY flights. Much faster than daily scanning all airports. Example: python discover_routes.py --from BDS --to-country DE --window 3 """ print() print("=" * 70) print("ROUTE DISCOVERY SCAN") print("=" * 70) print(f"Origin: {origin}") print(f"Destinations: All airports in {country}") print(f"Strategy: Sample one date per month for {window} months") print() # Ensure airport data exists try: download_and_build_airport_data() except Exception as e: click.echo(f"Error building airport data: {e}", err=True) sys.exit(1) # Get all destination airports try: destination_airports = resolve_airport_list(country, None) except ValueError as e: click.echo(f"Error: {e}", err=True) sys.exit(1) print(f"Found {len(destination_airports)} airports in {country}") # Generate sample dates (one per month) sample_dates = generate_discovery_dates(window) print(f"Sample dates: {', '.join(sample_dates)}") print() # Build routes to scan routes = [] for airport in destination_airports: for sample_date in sample_dates: routes.append((origin, airport['iata'], sample_date)) total_routes = len(routes) print(f"Scanning {total_routes} routes ({len(destination_airports)} airports × {len(sample_dates)} dates)...") print() # Execute discovery scan try: results = asyncio.run( search_multiple_routes( routes, seat_class="economy", adults=1, max_workers=workers, cache_threshold_hours=24, use_cache=True, progress_callback=None, # Suppress detailed progress ) ) except Exception as e: click.echo(f"Error during scan: {e}", err=True) sys.exit(1) # Analyze results to find which destinations have flights destinations_with_flights = set() destination_details = {} for (orig, dest, query_date), flights in results.items(): if flights: # Has at least one flight destinations_with_flights.add(dest) if dest not in destination_details: destination_details[dest] = { "iata": dest, "flights_found": 0, "airlines": set(), "sample_dates": [], "price_range": {"min": None, "max": None}, } destination_details[dest]["flights_found"] += len(flights) destination_details[dest]["sample_dates"].append(query_date) for flight in flights: destination_details[dest]["airlines"].add(flight.get("airline", "Unknown")) price = flight.get("price") if price: if destination_details[dest]["price_range"]["min"] is None: destination_details[dest]["price_range"]["min"] = price destination_details[dest]["price_range"]["max"] = price else: destination_details[dest]["price_range"]["min"] = min( destination_details[dest]["price_range"]["min"], price ) destination_details[dest]["price_range"]["max"] = max( destination_details[dest]["price_range"]["max"], price ) # Convert sets to lists for JSON serialization for dest in destination_details: destination_details[dest]["airlines"] = sorted(list(destination_details[dest]["airlines"])) # Get airport names airport_map = {ap['iata']: ap for ap in destination_airports} # Prepare output discovered_routes = { "scan_date": date.today().strftime('%Y-%m-%d'), "origin": origin, "country": country, "window_months": window, "total_airports_scanned": len(destination_airports), "destinations_with_flights": len(destinations_with_flights), "sample_dates": sample_dates, "routes": [] } for dest in sorted(destinations_with_flights): details = destination_details[dest] airport_info = airport_map.get(dest, {}) route_info = { "destination": dest, "destination_name": airport_info.get('name', 'Unknown'), "destination_city": airport_info.get('city', ''), "flights_found": details["flights_found"], "airlines": details["airlines"], "dates_with_flights": sorted(details["sample_dates"]), "price_range": details["price_range"], } discovered_routes["routes"].append(route_info) # Save to file with open(output, 'w') as f: json.dump(discovered_routes, f, indent=2) # Display results print() print("=" * 70) print("DISCOVERY RESULTS") print("=" * 70) print(f"Total airports scanned: {len(destination_airports)}") print(f"Destinations with flights: {len(destinations_with_flights)}") print(f"Success rate: {len(destinations_with_flights) / len(destination_airports) * 100:.1f}%") print() if destinations_with_flights: print("Routes with direct flights:") print() print(f"{'IATA':<6} {'City':<25} {'Airlines':<30} {'Flights':<8} {'Price Range'}") print("-" * 90) for route in discovered_routes["routes"]: airlines_str = ", ".join(route["airlines"][:3]) # Show up to 3 airlines if len(route["airlines"]) > 3: airlines_str += f" +{len(route['airlines']) - 3}" price_min = route["price_range"]["min"] price_max = route["price_range"]["max"] if price_min and price_max: price_range = f"€{price_min}-€{price_max}" else: price_range = "—" print(f"{route['destination']:<6} {route['destination_city'][:24]:<25} " f"{airlines_str[:29]:<30} {route['flights_found']:<8} {price_range}") print() print(f"✅ Saved to: {output}") print() print("=" * 70) print("NEXT STEP: Targeted Daily Scans") print("=" * 70) print("Run detailed daily scans on discovered routes:") print() for route in discovered_routes["routes"][:5]: # Show first 5 examples dest = route['destination'] print(f"python main.py --from {origin} --to {dest} --daily-scan --window {window}") if len(discovered_routes["routes"]) > 5: print(f"... and {len(discovered_routes['routes']) - 5} more routes") print() print("Or use the automated batch script:") print(f"python scan_discovered_routes.py {output}") else: print("⚠️ No routes with direct flights found") print() print("This could mean:") print(" - No direct flights exist for these routes") print(" - API errors prevented detection") print(" - Try expanding the date range with --window") print() if __name__ == '__main__': discover()