Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass): Backend (FastAPI + SQLite): - REST API with rate limiting, Pydantic v2 validation, paginated responses - Scan pipeline: resolves airports, queries every day in the window, saves individual flights + aggregate route stats to SQLite - Background async scan processor with real-time progress tracking - Airport search endpoint backed by OpenFlights dataset - Daily scan window (all dates, not monthly samples) Frontend (React 19 + TypeScript + Tailwind CSS v4): - Dashboard with live scan status and recent scans - Create scan form: country mode or specific airports (searchable dropdown) - Scan detail page with expandable route rows showing individual flights (date, airline, departure, arrival, price) loaded on demand - AirportSearch component with debounced live search and multi-select Database: - scans → routes → flights schema with FK cascade and auto-update triggers - Migrations for schema evolution (relaxed country constraint) Tests: - 74 tests: unit + integration, isolated per-test SQLite DB - Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights, BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026) - Integration tests parametrized from confirmed routes Docker: - Multi-stage builds, Compose orchestration, Nginx reverse proxy Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
297 lines
10 KiB
Python
297 lines
10 KiB
Python
"""
|
||
Integration tests for the full scan pipeline: searcher → processor → database.
|
||
|
||
Confirmed flight data is stored in confirmed_flights.json (generated 2026-02-25
|
||
from a live scan of BDS→FMM,DUS across the full Feb 26 – May 27 2026 window).
|
||
|
||
Key confirmed routes:
|
||
BDS → FMM 39 flights Mar–May 2026 Ryanair ~5-6x/week, two daily slots
|
||
BDS → DUS 11 flights Apr–May 2026 Eurowings Saturdays only, two time slots
|
||
|
||
These tests make real network calls to Google Flights via fast-flights.
|
||
Mark: integration, slow
|
||
"""
|
||
|
||
import asyncio
|
||
import json
|
||
import os
|
||
import sqlite3
|
||
import sys
|
||
import tempfile
|
||
from pathlib import Path
|
||
|
||
import pytest
|
||
|
||
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
||
|
||
from searcher_v3 import search_multiple_routes
|
||
from scan_processor import process_scan
|
||
from database import initialize_database
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Load confirmed flight data from JSON fixture
|
||
# ---------------------------------------------------------------------------
|
||
|
||
_FIXTURE_PATH = Path(__file__).parent / "confirmed_flights.json"
|
||
with open(_FIXTURE_PATH) as _f:
|
||
CONFIRMED = json.load(_f)
|
||
|
||
# (origin, destination, date, min_expected_flights, description)
|
||
# Built from confirmed_dates_for_testing — each entry is a specific (route, date)
|
||
# pair that returned ≥1 real flight from the live API.
|
||
KNOWN_ROUTES = [
|
||
(
|
||
e["origin"],
|
||
e["destination"],
|
||
e["date"],
|
||
e["min_flights"],
|
||
f"{e['origin']}→{e['destination']} {e['airline']} on {e['date']} (confirmed €{e['price']:.0f})",
|
||
)
|
||
for e in CONFIRMED["confirmed_dates_for_testing"]["entries"]
|
||
]
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Fixtures
|
||
# ---------------------------------------------------------------------------
|
||
|
||
@pytest.fixture(scope="module")
|
||
def tmp_db():
|
||
"""Isolated SQLite database for pipeline tests."""
|
||
fd, path = tempfile.mkstemp(suffix=".db")
|
||
os.close(fd)
|
||
os.environ["DATABASE_PATH"] = path
|
||
initialize_database(db_path=Path(path), verbose=False)
|
||
yield path
|
||
os.environ.pop("DATABASE_PATH", None)
|
||
try:
|
||
os.unlink(path)
|
||
except OSError:
|
||
pass
|
||
|
||
|
||
def _insert_scan(db_path, origin, country, start_date, end_date,
|
||
seat_class="economy", adults=1):
|
||
"""Insert a pending scan and return its ID."""
|
||
conn = sqlite3.connect(db_path)
|
||
conn.execute("PRAGMA foreign_keys = ON")
|
||
cur = conn.cursor()
|
||
cur.execute(
|
||
"""INSERT INTO scans (origin, country, start_date, end_date, status, seat_class, adults)
|
||
VALUES (?, ?, ?, ?, 'pending', ?, ?)""",
|
||
(origin, country, start_date, end_date, seat_class, adults),
|
||
)
|
||
scan_id = cur.lastrowid
|
||
conn.commit()
|
||
conn.close()
|
||
return scan_id
|
||
|
||
|
||
def _get_scan(db_path, scan_id):
|
||
conn = sqlite3.connect(db_path)
|
||
conn.row_factory = sqlite3.Row
|
||
row = conn.execute("SELECT * FROM scans WHERE id=?", (scan_id,)).fetchone()
|
||
conn.close()
|
||
return dict(row) if row else None
|
||
|
||
|
||
def _get_routes(db_path, scan_id):
|
||
conn = sqlite3.connect(db_path)
|
||
conn.row_factory = sqlite3.Row
|
||
rows = conn.execute(
|
||
"SELECT * FROM routes WHERE scan_id=?", (scan_id,)
|
||
).fetchall()
|
||
conn.close()
|
||
return [dict(r) for r in rows]
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Searcher tests — verify live data comes back for confirmed routes
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestSearcherKnownRoutes:
|
||
"""
|
||
Directly test search_multiple_routes() against confirmed real routes.
|
||
Each test uses a date/route pair we know has flights from our earlier scans.
|
||
"""
|
||
|
||
@pytest.mark.integration
|
||
@pytest.mark.slow
|
||
@pytest.mark.parametrize("origin,dest,date,min_flights,desc", KNOWN_ROUTES)
|
||
def test_returns_flights_for_confirmed_route(self, origin, dest, date, min_flights, desc):
|
||
"""Searcher returns ≥min_flights for a confirmed live route."""
|
||
results = asyncio.run(
|
||
search_multiple_routes(
|
||
routes=[(origin, dest, date)],
|
||
seat_class="economy",
|
||
adults=1,
|
||
use_cache=False,
|
||
max_workers=1,
|
||
)
|
||
)
|
||
|
||
flights = results.get((origin, dest, date), [])
|
||
assert len(flights) >= min_flights, (
|
||
f"{desc}: expected ≥{min_flights} flight(s) on {origin}→{dest} {date}, "
|
||
f"got {len(flights)}"
|
||
)
|
||
|
||
@pytest.mark.integration
|
||
@pytest.mark.slow
|
||
def test_flight_has_required_fields(self):
|
||
"""Every returned flight dict has the mandatory fields."""
|
||
origin, dest, date = "BDS", "FMM", "2026-04-05"
|
||
results = asyncio.run(
|
||
search_multiple_routes(
|
||
routes=[(origin, dest, date)],
|
||
seat_class="economy",
|
||
adults=1,
|
||
use_cache=False,
|
||
max_workers=1,
|
||
)
|
||
)
|
||
flights = results.get((origin, dest, date), [])
|
||
assert flights, f"No flights returned for {origin}→{dest} {date}"
|
||
|
||
required = {"origin", "destination", "airline", "departure_time",
|
||
"arrival_time", "price", "stops"}
|
||
for flight in flights:
|
||
missing = required - flight.keys()
|
||
assert not missing, f"Flight missing fields: {missing}. Got: {flight}"
|
||
assert flight["stops"] == 0, "Expected direct flight only"
|
||
assert flight["price"] > 0, "Price must be positive"
|
||
|
||
@pytest.mark.integration
|
||
@pytest.mark.slow
|
||
def test_no_results_for_unknown_route(self):
|
||
"""Routes with no service return an empty list, not an error."""
|
||
# BDS → JFK: no direct flight exists
|
||
results = asyncio.run(
|
||
search_multiple_routes(
|
||
routes=[("BDS", "JFK", "2026-04-05")],
|
||
seat_class="economy",
|
||
adults=1,
|
||
use_cache=False,
|
||
max_workers=1,
|
||
)
|
||
)
|
||
# Should complete without raising; result may be empty or have 0 flights
|
||
assert ("BDS", "JFK", "2026-04-05") in results
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Pipeline tests — scan processor saves flights to the database
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestScanProcessorSavesRoutes:
|
||
"""
|
||
Test that process_scan() correctly saves discovered flights into the
|
||
routes table. These tests catch the regression where dest_info lookup
|
||
silently discarded all results.
|
||
"""
|
||
|
||
@pytest.mark.integration
|
||
@pytest.mark.slow
|
||
def test_airports_mode_saves_routes(self, tmp_db):
|
||
"""
|
||
Airports mode (comma-separated in country field) must save routes.
|
||
|
||
Regression: after removing get_airport_data() call, destinations=[]
|
||
caused dest_info to always be None → all routes silently skipped.
|
||
"""
|
||
scan_id = _insert_scan(
|
||
tmp_db,
|
||
origin="BDS",
|
||
country="FMM", # single airport in destinations-mode format
|
||
start_date="2026-04-05",
|
||
end_date="2026-04-06",
|
||
)
|
||
asyncio.run(process_scan(scan_id))
|
||
|
||
scan = _get_scan(tmp_db, scan_id)
|
||
assert scan["status"] == "completed", (
|
||
f"Scan failed: {scan.get('error_message')}"
|
||
)
|
||
|
||
routes = _get_routes(tmp_db, scan_id)
|
||
assert len(routes) >= 1, (
|
||
"No routes saved for BDS→FMM even though Ryanair flies this route"
|
||
)
|
||
fmm_route = next(r for r in routes if r["destination"] == "FMM")
|
||
assert fmm_route["flight_count"] >= 1
|
||
assert fmm_route["min_price"] > 0
|
||
|
||
@pytest.mark.integration
|
||
@pytest.mark.slow
|
||
def test_airports_mode_unknown_airport_uses_iata_fallback(self, tmp_db):
|
||
"""
|
||
When an airport code is not in airports_by_country.json, the route
|
||
is still saved with the IATA code as its name (not silently dropped).
|
||
"""
|
||
scan_id = _insert_scan(
|
||
tmp_db,
|
||
origin="BDS",
|
||
country="FMM",
|
||
start_date="2026-04-05",
|
||
end_date="2026-04-06",
|
||
)
|
||
asyncio.run(process_scan(scan_id))
|
||
|
||
routes = _get_routes(tmp_db, scan_id)
|
||
for route in routes:
|
||
# name must be set (IATA code at minimum, not empty/None)
|
||
assert route["destination_name"], (
|
||
f"destination_name is empty for route to {route['destination']}"
|
||
)
|
||
|
||
@pytest.mark.integration
|
||
@pytest.mark.slow
|
||
def test_country_mode_includes_fmm(self, tmp_db):
|
||
"""
|
||
Country mode must scan ALL airports, not just the first 20.
|
||
|
||
Regression: [:20] alphabetical cut-off excluded FMM (#72 in DE list)
|
||
and STR (#21), which are among the most active BDS routes.
|
||
"""
|
||
scan_id = _insert_scan(
|
||
tmp_db,
|
||
origin="BDS",
|
||
country="DE",
|
||
start_date="2026-04-05",
|
||
end_date="2026-04-06",
|
||
)
|
||
asyncio.run(process_scan(scan_id))
|
||
|
||
scan = _get_scan(tmp_db, scan_id)
|
||
assert scan["status"] == "completed", scan.get("error_message")
|
||
|
||
routes = _get_routes(tmp_db, scan_id)
|
||
destinations_found = {r["destination"] for r in routes}
|
||
# FMM and DUS must appear — they have confirmed flights on 2026-04-05
|
||
assert "FMM" in destinations_found, (
|
||
f"FMM (Ryanair BDS→FMM) missing from results. Found: {destinations_found}"
|
||
)
|
||
|
||
@pytest.mark.integration
|
||
@pytest.mark.slow
|
||
def test_multi_airport_mode_saves_all_routes(self, tmp_db):
|
||
"""
|
||
Comma-separated destinations: all airports with flights must be saved.
|
||
"""
|
||
scan_id = _insert_scan(
|
||
tmp_db,
|
||
origin="BDS",
|
||
country="FMM,DUS", # two confirmed routes
|
||
start_date="2026-04-04", # Saturday (DUS) — range extends to Apr 15 (FMM mid-week)
|
||
end_date="2026-04-16", # captures 2026-04-04 (Sat) AND 2026-04-15 (Wed)
|
||
)
|
||
asyncio.run(process_scan(scan_id))
|
||
|
||
scan = _get_scan(tmp_db, scan_id)
|
||
assert scan["status"] == "completed", scan.get("error_message")
|
||
|
||
routes = _get_routes(tmp_db, scan_id)
|
||
destinations_found = {r["destination"] for r in routes}
|
||
assert "FMM" in destinations_found, "FMM route not saved"
|
||
assert "DUS" in destinations_found, "DUS route not saved (Saturday flight)"
|