Files
ciaovolo/flight-comparator/tests/test_scan_pipeline.py
domverse 6421f83ca7 Add flight comparator web app with full scan pipeline
Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass):

Backend (FastAPI + SQLite):
- REST API with rate limiting, Pydantic v2 validation, paginated responses
- Scan pipeline: resolves airports, queries every day in the window, saves
  individual flights + aggregate route stats to SQLite
- Background async scan processor with real-time progress tracking
- Airport search endpoint backed by OpenFlights dataset
- Daily scan window (all dates, not monthly samples)

Frontend (React 19 + TypeScript + Tailwind CSS v4):
- Dashboard with live scan status and recent scans
- Create scan form: country mode or specific airports (searchable dropdown)
- Scan detail page with expandable route rows showing individual flights
  (date, airline, departure, arrival, price) loaded on demand
- AirportSearch component with debounced live search and multi-select

Database:
- scans → routes → flights schema with FK cascade and auto-update triggers
- Migrations for schema evolution (relaxed country constraint)

Tests:
- 74 tests: unit + integration, isolated per-test SQLite DB
- Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights,
  BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026)
- Integration tests parametrized from confirmed routes

Docker:
- Multi-stage builds, Compose orchestration, Nginx reverse proxy

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-26 17:11:51 +01:00

297 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Integration tests for the full scan pipeline: searcher → processor → database.
Confirmed flight data is stored in confirmed_flights.json (generated 2026-02-25
from a live scan of BDS→FMM,DUS across the full Feb 26 May 27 2026 window).
Key confirmed routes:
BDS → FMM 39 flights MarMay 2026 Ryanair ~5-6x/week, two daily slots
BDS → DUS 11 flights AprMay 2026 Eurowings Saturdays only, two time slots
These tests make real network calls to Google Flights via fast-flights.
Mark: integration, slow
"""
import asyncio
import json
import os
import sqlite3
import sys
import tempfile
from pathlib import Path
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from searcher_v3 import search_multiple_routes
from scan_processor import process_scan
from database import initialize_database
# ---------------------------------------------------------------------------
# Load confirmed flight data from JSON fixture
# ---------------------------------------------------------------------------
_FIXTURE_PATH = Path(__file__).parent / "confirmed_flights.json"
with open(_FIXTURE_PATH) as _f:
CONFIRMED = json.load(_f)
# (origin, destination, date, min_expected_flights, description)
# Built from confirmed_dates_for_testing — each entry is a specific (route, date)
# pair that returned ≥1 real flight from the live API.
KNOWN_ROUTES = [
(
e["origin"],
e["destination"],
e["date"],
e["min_flights"],
f"{e['origin']}{e['destination']} {e['airline']} on {e['date']} (confirmed €{e['price']:.0f})",
)
for e in CONFIRMED["confirmed_dates_for_testing"]["entries"]
]
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture(scope="module")
def tmp_db():
"""Isolated SQLite database for pipeline tests."""
fd, path = tempfile.mkstemp(suffix=".db")
os.close(fd)
os.environ["DATABASE_PATH"] = path
initialize_database(db_path=Path(path), verbose=False)
yield path
os.environ.pop("DATABASE_PATH", None)
try:
os.unlink(path)
except OSError:
pass
def _insert_scan(db_path, origin, country, start_date, end_date,
seat_class="economy", adults=1):
"""Insert a pending scan and return its ID."""
conn = sqlite3.connect(db_path)
conn.execute("PRAGMA foreign_keys = ON")
cur = conn.cursor()
cur.execute(
"""INSERT INTO scans (origin, country, start_date, end_date, status, seat_class, adults)
VALUES (?, ?, ?, ?, 'pending', ?, ?)""",
(origin, country, start_date, end_date, seat_class, adults),
)
scan_id = cur.lastrowid
conn.commit()
conn.close()
return scan_id
def _get_scan(db_path, scan_id):
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
row = conn.execute("SELECT * FROM scans WHERE id=?", (scan_id,)).fetchone()
conn.close()
return dict(row) if row else None
def _get_routes(db_path, scan_id):
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT * FROM routes WHERE scan_id=?", (scan_id,)
).fetchall()
conn.close()
return [dict(r) for r in rows]
# ---------------------------------------------------------------------------
# Searcher tests — verify live data comes back for confirmed routes
# ---------------------------------------------------------------------------
class TestSearcherKnownRoutes:
"""
Directly test search_multiple_routes() against confirmed real routes.
Each test uses a date/route pair we know has flights from our earlier scans.
"""
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.parametrize("origin,dest,date,min_flights,desc", KNOWN_ROUTES)
def test_returns_flights_for_confirmed_route(self, origin, dest, date, min_flights, desc):
"""Searcher returns ≥min_flights for a confirmed live route."""
results = asyncio.run(
search_multiple_routes(
routes=[(origin, dest, date)],
seat_class="economy",
adults=1,
use_cache=False,
max_workers=1,
)
)
flights = results.get((origin, dest, date), [])
assert len(flights) >= min_flights, (
f"{desc}: expected ≥{min_flights} flight(s) on {origin}{dest} {date}, "
f"got {len(flights)}"
)
@pytest.mark.integration
@pytest.mark.slow
def test_flight_has_required_fields(self):
"""Every returned flight dict has the mandatory fields."""
origin, dest, date = "BDS", "FMM", "2026-04-05"
results = asyncio.run(
search_multiple_routes(
routes=[(origin, dest, date)],
seat_class="economy",
adults=1,
use_cache=False,
max_workers=1,
)
)
flights = results.get((origin, dest, date), [])
assert flights, f"No flights returned for {origin}{dest} {date}"
required = {"origin", "destination", "airline", "departure_time",
"arrival_time", "price", "stops"}
for flight in flights:
missing = required - flight.keys()
assert not missing, f"Flight missing fields: {missing}. Got: {flight}"
assert flight["stops"] == 0, "Expected direct flight only"
assert flight["price"] > 0, "Price must be positive"
@pytest.mark.integration
@pytest.mark.slow
def test_no_results_for_unknown_route(self):
"""Routes with no service return an empty list, not an error."""
# BDS → JFK: no direct flight exists
results = asyncio.run(
search_multiple_routes(
routes=[("BDS", "JFK", "2026-04-05")],
seat_class="economy",
adults=1,
use_cache=False,
max_workers=1,
)
)
# Should complete without raising; result may be empty or have 0 flights
assert ("BDS", "JFK", "2026-04-05") in results
# ---------------------------------------------------------------------------
# Pipeline tests — scan processor saves flights to the database
# ---------------------------------------------------------------------------
class TestScanProcessorSavesRoutes:
"""
Test that process_scan() correctly saves discovered flights into the
routes table. These tests catch the regression where dest_info lookup
silently discarded all results.
"""
@pytest.mark.integration
@pytest.mark.slow
def test_airports_mode_saves_routes(self, tmp_db):
"""
Airports mode (comma-separated in country field) must save routes.
Regression: after removing get_airport_data() call, destinations=[]
caused dest_info to always be None → all routes silently skipped.
"""
scan_id = _insert_scan(
tmp_db,
origin="BDS",
country="FMM", # single airport in destinations-mode format
start_date="2026-04-05",
end_date="2026-04-06",
)
asyncio.run(process_scan(scan_id))
scan = _get_scan(tmp_db, scan_id)
assert scan["status"] == "completed", (
f"Scan failed: {scan.get('error_message')}"
)
routes = _get_routes(tmp_db, scan_id)
assert len(routes) >= 1, (
"No routes saved for BDS→FMM even though Ryanair flies this route"
)
fmm_route = next(r for r in routes if r["destination"] == "FMM")
assert fmm_route["flight_count"] >= 1
assert fmm_route["min_price"] > 0
@pytest.mark.integration
@pytest.mark.slow
def test_airports_mode_unknown_airport_uses_iata_fallback(self, tmp_db):
"""
When an airport code is not in airports_by_country.json, the route
is still saved with the IATA code as its name (not silently dropped).
"""
scan_id = _insert_scan(
tmp_db,
origin="BDS",
country="FMM",
start_date="2026-04-05",
end_date="2026-04-06",
)
asyncio.run(process_scan(scan_id))
routes = _get_routes(tmp_db, scan_id)
for route in routes:
# name must be set (IATA code at minimum, not empty/None)
assert route["destination_name"], (
f"destination_name is empty for route to {route['destination']}"
)
@pytest.mark.integration
@pytest.mark.slow
def test_country_mode_includes_fmm(self, tmp_db):
"""
Country mode must scan ALL airports, not just the first 20.
Regression: [:20] alphabetical cut-off excluded FMM (#72 in DE list)
and STR (#21), which are among the most active BDS routes.
"""
scan_id = _insert_scan(
tmp_db,
origin="BDS",
country="DE",
start_date="2026-04-05",
end_date="2026-04-06",
)
asyncio.run(process_scan(scan_id))
scan = _get_scan(tmp_db, scan_id)
assert scan["status"] == "completed", scan.get("error_message")
routes = _get_routes(tmp_db, scan_id)
destinations_found = {r["destination"] for r in routes}
# FMM and DUS must appear — they have confirmed flights on 2026-04-05
assert "FMM" in destinations_found, (
f"FMM (Ryanair BDS→FMM) missing from results. Found: {destinations_found}"
)
@pytest.mark.integration
@pytest.mark.slow
def test_multi_airport_mode_saves_all_routes(self, tmp_db):
"""
Comma-separated destinations: all airports with flights must be saved.
"""
scan_id = _insert_scan(
tmp_db,
origin="BDS",
country="FMM,DUS", # two confirmed routes
start_date="2026-04-04", # Saturday (DUS) — range extends to Apr 15 (FMM mid-week)
end_date="2026-04-16", # captures 2026-04-04 (Sat) AND 2026-04-15 (Wed)
)
asyncio.run(process_scan(scan_id))
scan = _get_scan(tmp_db, scan_id)
assert scan["status"] == "completed", scan.get("error_message")
routes = _get_routes(tmp_db, scan_id)
destinations_found = {r["destination"] for r in routes}
assert "FMM" in destinations_found, "FMM route not saved"
assert "DUS" in destinations_found, "DUS route not saved (Saturday flight)"