Add flight comparator web app with full scan pipeline

Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass):

Backend (FastAPI + SQLite):
- REST API with rate limiting, Pydantic v2 validation, paginated responses
- Scan pipeline: resolves airports, queries every day in the window, saves
  individual flights + aggregate route stats to SQLite
- Background async scan processor with real-time progress tracking
- Airport search endpoint backed by OpenFlights dataset
- Daily scan window (all dates, not monthly samples)

Frontend (React 19 + TypeScript + Tailwind CSS v4):
- Dashboard with live scan status and recent scans
- Create scan form: country mode or specific airports (searchable dropdown)
- Scan detail page with expandable route rows showing individual flights
  (date, airline, departure, arrival, price) loaded on demand
- AirportSearch component with debounced live search and multi-select

Database:
- scans → routes → flights schema with FK cascade and auto-update triggers
- Migrations for schema evolution (relaxed country constraint)

Tests:
- 74 tests: unit + integration, isolated per-test SQLite DB
- Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights,
  BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026)
- Integration tests parametrized from confirmed routes

Docker:
- Multi-stage builds, Compose orchestration, Nginx reverse proxy

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 17:11:51 +01:00
parent aea7590874
commit 6421f83ca7
67 changed files with 37173 additions and 0 deletions

View File

@@ -0,0 +1,296 @@
"""
Integration tests for the full scan pipeline: searcher → processor → database.
Confirmed flight data is stored in confirmed_flights.json (generated 2026-02-25
from a live scan of BDS→FMM,DUS across the full Feb 26 May 27 2026 window).
Key confirmed routes:
BDS → FMM 39 flights MarMay 2026 Ryanair ~5-6x/week, two daily slots
BDS → DUS 11 flights AprMay 2026 Eurowings Saturdays only, two time slots
These tests make real network calls to Google Flights via fast-flights.
Mark: integration, slow
"""
import asyncio
import json
import os
import sqlite3
import sys
import tempfile
from pathlib import Path
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from searcher_v3 import search_multiple_routes
from scan_processor import process_scan
from database import initialize_database
# ---------------------------------------------------------------------------
# Load confirmed flight data from JSON fixture
# ---------------------------------------------------------------------------
_FIXTURE_PATH = Path(__file__).parent / "confirmed_flights.json"
with open(_FIXTURE_PATH) as _f:
CONFIRMED = json.load(_f)
# (origin, destination, date, min_expected_flights, description)
# Built from confirmed_dates_for_testing — each entry is a specific (route, date)
# pair that returned ≥1 real flight from the live API.
KNOWN_ROUTES = [
(
e["origin"],
e["destination"],
e["date"],
e["min_flights"],
f"{e['origin']}{e['destination']} {e['airline']} on {e['date']} (confirmed €{e['price']:.0f})",
)
for e in CONFIRMED["confirmed_dates_for_testing"]["entries"]
]
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture(scope="module")
def tmp_db():
"""Isolated SQLite database for pipeline tests."""
fd, path = tempfile.mkstemp(suffix=".db")
os.close(fd)
os.environ["DATABASE_PATH"] = path
initialize_database(db_path=Path(path), verbose=False)
yield path
os.environ.pop("DATABASE_PATH", None)
try:
os.unlink(path)
except OSError:
pass
def _insert_scan(db_path, origin, country, start_date, end_date,
seat_class="economy", adults=1):
"""Insert a pending scan and return its ID."""
conn = sqlite3.connect(db_path)
conn.execute("PRAGMA foreign_keys = ON")
cur = conn.cursor()
cur.execute(
"""INSERT INTO scans (origin, country, start_date, end_date, status, seat_class, adults)
VALUES (?, ?, ?, ?, 'pending', ?, ?)""",
(origin, country, start_date, end_date, seat_class, adults),
)
scan_id = cur.lastrowid
conn.commit()
conn.close()
return scan_id
def _get_scan(db_path, scan_id):
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
row = conn.execute("SELECT * FROM scans WHERE id=?", (scan_id,)).fetchone()
conn.close()
return dict(row) if row else None
def _get_routes(db_path, scan_id):
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT * FROM routes WHERE scan_id=?", (scan_id,)
).fetchall()
conn.close()
return [dict(r) for r in rows]
# ---------------------------------------------------------------------------
# Searcher tests — verify live data comes back for confirmed routes
# ---------------------------------------------------------------------------
class TestSearcherKnownRoutes:
"""
Directly test search_multiple_routes() against confirmed real routes.
Each test uses a date/route pair we know has flights from our earlier scans.
"""
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.parametrize("origin,dest,date,min_flights,desc", KNOWN_ROUTES)
def test_returns_flights_for_confirmed_route(self, origin, dest, date, min_flights, desc):
"""Searcher returns ≥min_flights for a confirmed live route."""
results = asyncio.run(
search_multiple_routes(
routes=[(origin, dest, date)],
seat_class="economy",
adults=1,
use_cache=False,
max_workers=1,
)
)
flights = results.get((origin, dest, date), [])
assert len(flights) >= min_flights, (
f"{desc}: expected ≥{min_flights} flight(s) on {origin}{dest} {date}, "
f"got {len(flights)}"
)
@pytest.mark.integration
@pytest.mark.slow
def test_flight_has_required_fields(self):
"""Every returned flight dict has the mandatory fields."""
origin, dest, date = "BDS", "FMM", "2026-04-05"
results = asyncio.run(
search_multiple_routes(
routes=[(origin, dest, date)],
seat_class="economy",
adults=1,
use_cache=False,
max_workers=1,
)
)
flights = results.get((origin, dest, date), [])
assert flights, f"No flights returned for {origin}{dest} {date}"
required = {"origin", "destination", "airline", "departure_time",
"arrival_time", "price", "stops"}
for flight in flights:
missing = required - flight.keys()
assert not missing, f"Flight missing fields: {missing}. Got: {flight}"
assert flight["stops"] == 0, "Expected direct flight only"
assert flight["price"] > 0, "Price must be positive"
@pytest.mark.integration
@pytest.mark.slow
def test_no_results_for_unknown_route(self):
"""Routes with no service return an empty list, not an error."""
# BDS → JFK: no direct flight exists
results = asyncio.run(
search_multiple_routes(
routes=[("BDS", "JFK", "2026-04-05")],
seat_class="economy",
adults=1,
use_cache=False,
max_workers=1,
)
)
# Should complete without raising; result may be empty or have 0 flights
assert ("BDS", "JFK", "2026-04-05") in results
# ---------------------------------------------------------------------------
# Pipeline tests — scan processor saves flights to the database
# ---------------------------------------------------------------------------
class TestScanProcessorSavesRoutes:
"""
Test that process_scan() correctly saves discovered flights into the
routes table. These tests catch the regression where dest_info lookup
silently discarded all results.
"""
@pytest.mark.integration
@pytest.mark.slow
def test_airports_mode_saves_routes(self, tmp_db):
"""
Airports mode (comma-separated in country field) must save routes.
Regression: after removing get_airport_data() call, destinations=[]
caused dest_info to always be None → all routes silently skipped.
"""
scan_id = _insert_scan(
tmp_db,
origin="BDS",
country="FMM", # single airport in destinations-mode format
start_date="2026-04-05",
end_date="2026-04-06",
)
asyncio.run(process_scan(scan_id))
scan = _get_scan(tmp_db, scan_id)
assert scan["status"] == "completed", (
f"Scan failed: {scan.get('error_message')}"
)
routes = _get_routes(tmp_db, scan_id)
assert len(routes) >= 1, (
"No routes saved for BDS→FMM even though Ryanair flies this route"
)
fmm_route = next(r for r in routes if r["destination"] == "FMM")
assert fmm_route["flight_count"] >= 1
assert fmm_route["min_price"] > 0
@pytest.mark.integration
@pytest.mark.slow
def test_airports_mode_unknown_airport_uses_iata_fallback(self, tmp_db):
"""
When an airport code is not in airports_by_country.json, the route
is still saved with the IATA code as its name (not silently dropped).
"""
scan_id = _insert_scan(
tmp_db,
origin="BDS",
country="FMM",
start_date="2026-04-05",
end_date="2026-04-06",
)
asyncio.run(process_scan(scan_id))
routes = _get_routes(tmp_db, scan_id)
for route in routes:
# name must be set (IATA code at minimum, not empty/None)
assert route["destination_name"], (
f"destination_name is empty for route to {route['destination']}"
)
@pytest.mark.integration
@pytest.mark.slow
def test_country_mode_includes_fmm(self, tmp_db):
"""
Country mode must scan ALL airports, not just the first 20.
Regression: [:20] alphabetical cut-off excluded FMM (#72 in DE list)
and STR (#21), which are among the most active BDS routes.
"""
scan_id = _insert_scan(
tmp_db,
origin="BDS",
country="DE",
start_date="2026-04-05",
end_date="2026-04-06",
)
asyncio.run(process_scan(scan_id))
scan = _get_scan(tmp_db, scan_id)
assert scan["status"] == "completed", scan.get("error_message")
routes = _get_routes(tmp_db, scan_id)
destinations_found = {r["destination"] for r in routes}
# FMM and DUS must appear — they have confirmed flights on 2026-04-05
assert "FMM" in destinations_found, (
f"FMM (Ryanair BDS→FMM) missing from results. Found: {destinations_found}"
)
@pytest.mark.integration
@pytest.mark.slow
def test_multi_airport_mode_saves_all_routes(self, tmp_db):
"""
Comma-separated destinations: all airports with flights must be saved.
"""
scan_id = _insert_scan(
tmp_db,
origin="BDS",
country="FMM,DUS", # two confirmed routes
start_date="2026-04-04", # Saturday (DUS) — range extends to Apr 15 (FMM mid-week)
end_date="2026-04-16", # captures 2026-04-04 (Sat) AND 2026-04-15 (Wed)
)
asyncio.run(process_scan(scan_id))
scan = _get_scan(tmp_db, scan_id)
assert scan["status"] == "completed", scan.get("error_message")
routes = _get_routes(tmp_db, scan_id)
destinations_found = {r["destination"] for r in routes}
assert "FMM" in destinations_found, "FMM route not saved"
assert "DUS" in destinations_found, "DUS route not saved (Saturday flight)"