Files
domverse 6421f83ca7 Add flight comparator web app with full scan pipeline
Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass):

Backend (FastAPI + SQLite):
- REST API with rate limiting, Pydantic v2 validation, paginated responses
- Scan pipeline: resolves airports, queries every day in the window, saves
  individual flights + aggregate route stats to SQLite
- Background async scan processor with real-time progress tracking
- Airport search endpoint backed by OpenFlights dataset
- Daily scan window (all dates, not monthly samples)

Frontend (React 19 + TypeScript + Tailwind CSS v4):
- Dashboard with live scan status and recent scans
- Create scan form: country mode or specific airports (searchable dropdown)
- Scan detail page with expandable route rows showing individual flights
  (date, airline, departure, arrival, price) loaded on demand
- AirportSearch component with debounced live search and multi-select

Database:
- scans → routes → flights schema with FK cascade and auto-update triggers
- Migrations for schema evolution (relaxed country constraint)

Tests:
- 74 tests: unit + integration, isolated per-test SQLite DB
- Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights,
  BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026)
- Integration tests parametrized from confirmed routes

Docker:
- Multi-stage builds, Compose orchestration, Nginx reverse proxy

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-26 17:11:51 +01:00

312 lines
8.3 KiB
Python

"""
SQLite caching layer for flight search results.
Stores search results with timestamps to avoid unnecessary API calls
and reduce rate limiting issues.
"""
import sqlite3
import hashlib
import json
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional
# Cache database location
CACHE_DB_PATH = Path(__file__).parent / "data" / "flight_cache.db"
# Default cache threshold in hours
DEFAULT_CACHE_THRESHOLD_HOURS = 24
def init_database():
"""Initialize SQLite database with required tables."""
CACHE_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(CACHE_DB_PATH)
cursor = conn.cursor()
# Table for search queries
cursor.execute("""
CREATE TABLE IF NOT EXISTS flight_searches (
id INTEGER PRIMARY KEY AUTOINCREMENT,
query_hash TEXT NOT NULL UNIQUE,
origin TEXT NOT NULL,
destination TEXT NOT NULL,
search_date TEXT NOT NULL,
seat_class TEXT NOT NULL,
adults INTEGER NOT NULL,
query_timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
)
""")
# Table for flight results
cursor.execute("""
CREATE TABLE IF NOT EXISTS flight_results (
id INTEGER PRIMARY KEY AUTOINCREMENT,
search_id INTEGER NOT NULL,
airline TEXT,
departure_time TEXT,
arrival_time TEXT,
duration_minutes INTEGER,
price REAL,
currency TEXT,
plane_type TEXT,
FOREIGN KEY (search_id) REFERENCES flight_searches(id) ON DELETE CASCADE
)
""")
# Indexes for performance
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_query_hash
ON flight_searches(query_hash)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_query_timestamp
ON flight_searches(query_timestamp)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_search_id
ON flight_results(search_id)
""")
conn.commit()
conn.close()
def get_cache_key(origin: str, destination: str, date: str, seat_class: str, adults: int) -> str:
"""
Generate a unique cache key for a flight search query.
Args:
origin: Origin airport IATA code
destination: Destination airport IATA code
date: Search date (YYYY-MM-DD)
seat_class: Cabin class
adults: Number of passengers
Returns:
SHA256 hash of the query parameters
"""
query_string = f"{origin}|{destination}|{date}|{seat_class}|{adults}"
return hashlib.sha256(query_string.encode()).hexdigest()
def get_cached_results(
origin: str,
destination: str,
date: str,
seat_class: str,
adults: int,
threshold_hours: int = DEFAULT_CACHE_THRESHOLD_HOURS,
) -> Optional[list[dict]]:
"""
Retrieve cached flight results if they exist and are recent enough.
Args:
origin: Origin airport IATA code
destination: Destination airport IATA code
date: Search date (YYYY-MM-DD)
seat_class: Cabin class
adults: Number of passengers
threshold_hours: Maximum age of cached results in hours
Returns:
List of flight dicts if cache hit, None if cache miss or expired
"""
init_database()
cache_key = get_cache_key(origin, destination, date, seat_class, adults)
threshold_time = datetime.now() - timedelta(hours=threshold_hours)
conn = sqlite3.connect(CACHE_DB_PATH)
cursor = conn.cursor()
# Find recent search
cursor.execute("""
SELECT id, query_timestamp
FROM flight_searches
WHERE query_hash = ?
AND query_timestamp > ?
ORDER BY query_timestamp DESC
LIMIT 1
""", (cache_key, threshold_time.isoformat()))
search_row = cursor.fetchone()
if not search_row:
conn.close()
return None
search_id, timestamp = search_row
# Retrieve flight results
cursor.execute("""
SELECT airline, departure_time, arrival_time, duration_minutes,
price, currency, plane_type
FROM flight_results
WHERE search_id = ?
""", (search_id,))
flight_rows = cursor.fetchall()
conn.close()
# Convert to flight dicts
flights = []
for row in flight_rows:
flights.append({
"origin": origin,
"destination": destination,
"airline": row[0],
"departure_time": row[1],
"arrival_time": row[2],
"duration_minutes": row[3],
"price": row[4],
"currency": row[5],
"plane_type": row[6],
"stops": 0, # Only direct flights are cached
})
return flights
def save_results(
origin: str,
destination: str,
date: str,
seat_class: str,
adults: int,
flights: list[dict],
) -> None:
"""
Save flight search results to cache database.
Args:
origin: Origin airport IATA code
destination: Destination airport IATA code
date: Search date (YYYY-MM-DD)
seat_class: Cabin class
adults: Number of passengers
flights: List of flight dicts to cache
"""
init_database()
cache_key = get_cache_key(origin, destination, date, seat_class, adults)
conn = sqlite3.connect(CACHE_DB_PATH)
cursor = conn.cursor()
try:
# Delete old search with same cache key (replace with fresh data)
cursor.execute("""
DELETE FROM flight_searches
WHERE query_hash = ?
""", (cache_key,))
# Insert search query
cursor.execute("""
INSERT INTO flight_searches
(query_hash, origin, destination, search_date, seat_class, adults)
VALUES (?, ?, ?, ?, ?, ?)
""", (cache_key, origin, destination, date, seat_class, adults))
search_id = cursor.lastrowid
# Insert flight results
for flight in flights:
cursor.execute("""
INSERT INTO flight_results
(search_id, airline, departure_time, arrival_time, duration_minutes,
price, currency, plane_type)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
search_id,
flight.get("airline"),
flight.get("departure_time"),
flight.get("arrival_time"),
flight.get("duration_minutes"),
flight.get("price"),
flight.get("currency"),
flight.get("plane_type"),
))
conn.commit()
except Exception as e:
conn.rollback()
print(f"⚠️ Cache save failed: {e}")
finally:
conn.close()
def clear_old_cache(days: int = 30) -> int:
"""
Delete cached results older than specified number of days.
Args:
days: Maximum age of cached results to keep
Returns:
Number of deleted search records
"""
init_database()
threshold_time = datetime.now() - timedelta(days=days)
conn = sqlite3.connect(CACHE_DB_PATH)
cursor = conn.cursor()
cursor.execute("""
DELETE FROM flight_searches
WHERE query_timestamp < ?
""", (threshold_time.isoformat(),))
deleted_count = cursor.rowcount
conn.commit()
conn.close()
return deleted_count
def get_cache_stats() -> dict:
"""
Get statistics about cached data.
Returns:
Dict with cache statistics
"""
init_database()
conn = sqlite3.connect(CACHE_DB_PATH)
cursor = conn.cursor()
# Count total searches
cursor.execute("SELECT COUNT(*) FROM flight_searches")
total_searches = cursor.fetchone()[0]
# Count total flight results
cursor.execute("SELECT COUNT(*) FROM flight_results")
total_results = cursor.fetchone()[0]
# Get oldest and newest entries
cursor.execute("""
SELECT MIN(query_timestamp), MAX(query_timestamp)
FROM flight_searches
""")
oldest, newest = cursor.fetchone()
# Get database file size
db_size_bytes = CACHE_DB_PATH.stat().st_size if CACHE_DB_PATH.exists() else 0
conn.close()
return {
"total_searches": total_searches,
"total_results": total_results,
"oldest_entry": oldest,
"newest_entry": newest,
"db_size_mb": db_size_bytes / (1024 * 1024),
}