""" Integration tests for the full scan pipeline: searcher → processor → database. Confirmed flight data is stored in confirmed_flights.json (generated 2026-02-25 from a live scan of BDS→FMM,DUS across the full Feb 26 – May 27 2026 window). Key confirmed routes: BDS → FMM 39 flights Mar–May 2026 Ryanair ~5-6x/week, two daily slots BDS → DUS 11 flights Apr–May 2026 Eurowings Saturdays only, two time slots These tests make real network calls to Google Flights via fast-flights. Mark: integration, slow """ import asyncio import json import os import sqlite3 import sys import tempfile from pathlib import Path import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) from searcher_v3 import search_multiple_routes from scan_processor import process_scan from database import initialize_database # --------------------------------------------------------------------------- # Load confirmed flight data from JSON fixture # --------------------------------------------------------------------------- _FIXTURE_PATH = Path(__file__).parent / "confirmed_flights.json" with open(_FIXTURE_PATH) as _f: CONFIRMED = json.load(_f) # (origin, destination, date, min_expected_flights, description) # Built from confirmed_dates_for_testing — each entry is a specific (route, date) # pair that returned ≥1 real flight from the live API. KNOWN_ROUTES = [ ( e["origin"], e["destination"], e["date"], e["min_flights"], f"{e['origin']}→{e['destination']} {e['airline']} on {e['date']} (confirmed €{e['price']:.0f})", ) for e in CONFIRMED["confirmed_dates_for_testing"]["entries"] ] # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @pytest.fixture(scope="module") def tmp_db(): """Isolated SQLite database for pipeline tests.""" fd, path = tempfile.mkstemp(suffix=".db") os.close(fd) os.environ["DATABASE_PATH"] = path initialize_database(db_path=Path(path), verbose=False) yield path os.environ.pop("DATABASE_PATH", None) try: os.unlink(path) except OSError: pass def _insert_scan(db_path, origin, country, start_date, end_date, seat_class="economy", adults=1): """Insert a pending scan and return its ID.""" conn = sqlite3.connect(db_path) conn.execute("PRAGMA foreign_keys = ON") cur = conn.cursor() cur.execute( """INSERT INTO scans (origin, country, start_date, end_date, status, seat_class, adults) VALUES (?, ?, ?, ?, 'pending', ?, ?)""", (origin, country, start_date, end_date, seat_class, adults), ) scan_id = cur.lastrowid conn.commit() conn.close() return scan_id def _get_scan(db_path, scan_id): conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row row = conn.execute("SELECT * FROM scans WHERE id=?", (scan_id,)).fetchone() conn.close() return dict(row) if row else None def _get_routes(db_path, scan_id): conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row rows = conn.execute( "SELECT * FROM routes WHERE scan_id=?", (scan_id,) ).fetchall() conn.close() return [dict(r) for r in rows] # --------------------------------------------------------------------------- # Searcher tests — verify live data comes back for confirmed routes # --------------------------------------------------------------------------- class TestSearcherKnownRoutes: """ Directly test search_multiple_routes() against confirmed real routes. Each test uses a date/route pair we know has flights from our earlier scans. """ @pytest.mark.integration @pytest.mark.slow @pytest.mark.parametrize("origin,dest,date,min_flights,desc", KNOWN_ROUTES) def test_returns_flights_for_confirmed_route(self, origin, dest, date, min_flights, desc): """Searcher returns ≥min_flights for a confirmed live route.""" results = asyncio.run( search_multiple_routes( routes=[(origin, dest, date)], seat_class="economy", adults=1, use_cache=False, max_workers=1, ) ) flights = results.get((origin, dest, date), []) assert len(flights) >= min_flights, ( f"{desc}: expected ≥{min_flights} flight(s) on {origin}→{dest} {date}, " f"got {len(flights)}" ) @pytest.mark.integration @pytest.mark.slow def test_flight_has_required_fields(self): """Every returned flight dict has the mandatory fields.""" origin, dest, date = "BDS", "FMM", "2026-04-05" results = asyncio.run( search_multiple_routes( routes=[(origin, dest, date)], seat_class="economy", adults=1, use_cache=False, max_workers=1, ) ) flights = results.get((origin, dest, date), []) assert flights, f"No flights returned for {origin}→{dest} {date}" required = {"origin", "destination", "airline", "departure_time", "arrival_time", "price", "stops"} for flight in flights: missing = required - flight.keys() assert not missing, f"Flight missing fields: {missing}. Got: {flight}" assert flight["stops"] == 0, "Expected direct flight only" assert flight["price"] > 0, "Price must be positive" @pytest.mark.integration @pytest.mark.slow def test_no_results_for_unknown_route(self): """Routes with no service return an empty list, not an error.""" # BDS → JFK: no direct flight exists results = asyncio.run( search_multiple_routes( routes=[("BDS", "JFK", "2026-04-05")], seat_class="economy", adults=1, use_cache=False, max_workers=1, ) ) # Should complete without raising; result may be empty or have 0 flights assert ("BDS", "JFK", "2026-04-05") in results # --------------------------------------------------------------------------- # Pipeline tests — scan processor saves flights to the database # --------------------------------------------------------------------------- class TestScanProcessorSavesRoutes: """ Test that process_scan() correctly saves discovered flights into the routes table. These tests catch the regression where dest_info lookup silently discarded all results. """ @pytest.mark.integration @pytest.mark.slow def test_airports_mode_saves_routes(self, tmp_db): """ Airports mode (comma-separated in country field) must save routes. Regression: after removing get_airport_data() call, destinations=[] caused dest_info to always be None → all routes silently skipped. """ scan_id = _insert_scan( tmp_db, origin="BDS", country="FMM", # single airport in destinations-mode format start_date="2026-04-05", end_date="2026-04-06", ) asyncio.run(process_scan(scan_id)) scan = _get_scan(tmp_db, scan_id) assert scan["status"] == "completed", ( f"Scan failed: {scan.get('error_message')}" ) routes = _get_routes(tmp_db, scan_id) assert len(routes) >= 1, ( "No routes saved for BDS→FMM even though Ryanair flies this route" ) fmm_route = next(r for r in routes if r["destination"] == "FMM") assert fmm_route["flight_count"] >= 1 assert fmm_route["min_price"] > 0 @pytest.mark.integration @pytest.mark.slow def test_airports_mode_unknown_airport_uses_iata_fallback(self, tmp_db): """ When an airport code is not in airports_by_country.json, the route is still saved with the IATA code as its name (not silently dropped). """ scan_id = _insert_scan( tmp_db, origin="BDS", country="FMM", start_date="2026-04-05", end_date="2026-04-06", ) asyncio.run(process_scan(scan_id)) routes = _get_routes(tmp_db, scan_id) for route in routes: # name must be set (IATA code at minimum, not empty/None) assert route["destination_name"], ( f"destination_name is empty for route to {route['destination']}" ) @pytest.mark.integration @pytest.mark.slow def test_country_mode_includes_fmm(self, tmp_db): """ Country mode must scan ALL airports, not just the first 20. Regression: [:20] alphabetical cut-off excluded FMM (#72 in DE list) and STR (#21), which are among the most active BDS routes. """ scan_id = _insert_scan( tmp_db, origin="BDS", country="DE", start_date="2026-04-05", end_date="2026-04-06", ) asyncio.run(process_scan(scan_id)) scan = _get_scan(tmp_db, scan_id) assert scan["status"] == "completed", scan.get("error_message") routes = _get_routes(tmp_db, scan_id) destinations_found = {r["destination"] for r in routes} # FMM and DUS must appear — they have confirmed flights on 2026-04-05 assert "FMM" in destinations_found, ( f"FMM (Ryanair BDS→FMM) missing from results. Found: {destinations_found}" ) @pytest.mark.integration @pytest.mark.slow def test_multi_airport_mode_saves_all_routes(self, tmp_db): """ Comma-separated destinations: all airports with flights must be saved. """ scan_id = _insert_scan( tmp_db, origin="BDS", country="FMM,DUS", # two confirmed routes start_date="2026-04-04", # Saturday (DUS) — range extends to Apr 15 (FMM mid-week) end_date="2026-04-16", # captures 2026-04-04 (Sat) AND 2026-04-15 (Wed) ) asyncio.run(process_scan(scan_id)) scan = _get_scan(tmp_db, scan_id) assert scan["status"] == "completed", scan.get("error_message") routes = _get_routes(tmp_db, scan_id) destinations_found = {r["destination"] for r in routes} assert "FMM" in destinations_found, "FMM route not saved" assert "DUS" in destinations_found, "DUS route not saved (Saturday flight)"