Add flight comparator web app with full scan pipeline
Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass): Backend (FastAPI + SQLite): - REST API with rate limiting, Pydantic v2 validation, paginated responses - Scan pipeline: resolves airports, queries every day in the window, saves individual flights + aggregate route stats to SQLite - Background async scan processor with real-time progress tracking - Airport search endpoint backed by OpenFlights dataset - Daily scan window (all dates, not monthly samples) Frontend (React 19 + TypeScript + Tailwind CSS v4): - Dashboard with live scan status and recent scans - Create scan form: country mode or specific airports (searchable dropdown) - Scan detail page with expandable route rows showing individual flights (date, airline, departure, arrival, price) loaded on demand - AirportSearch component with debounced live search and multi-select Database: - scans → routes → flights schema with FK cascade and auto-update triggers - Migrations for schema evolution (relaxed country constraint) Tests: - 74 tests: unit + integration, isolated per-test SQLite DB - Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights, BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026) - Integration tests parametrized from confirmed routes Docker: - Multi-stage builds, Compose orchestration, Nginx reverse proxy Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
311
flight-comparator/cache.py
Normal file
311
flight-comparator/cache.py
Normal file
@@ -0,0 +1,311 @@
|
||||
"""
|
||||
SQLite caching layer for flight search results.
|
||||
|
||||
Stores search results with timestamps to avoid unnecessary API calls
|
||||
and reduce rate limiting issues.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# Cache database location
|
||||
CACHE_DB_PATH = Path(__file__).parent / "data" / "flight_cache.db"
|
||||
|
||||
# Default cache threshold in hours
|
||||
DEFAULT_CACHE_THRESHOLD_HOURS = 24
|
||||
|
||||
|
||||
def init_database():
|
||||
"""Initialize SQLite database with required tables."""
|
||||
CACHE_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
conn = sqlite3.connect(CACHE_DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Table for search queries
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS flight_searches (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
query_hash TEXT NOT NULL UNIQUE,
|
||||
origin TEXT NOT NULL,
|
||||
destination TEXT NOT NULL,
|
||||
search_date TEXT NOT NULL,
|
||||
seat_class TEXT NOT NULL,
|
||||
adults INTEGER NOT NULL,
|
||||
query_timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
# Table for flight results
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS flight_results (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
search_id INTEGER NOT NULL,
|
||||
airline TEXT,
|
||||
departure_time TEXT,
|
||||
arrival_time TEXT,
|
||||
duration_minutes INTEGER,
|
||||
price REAL,
|
||||
currency TEXT,
|
||||
plane_type TEXT,
|
||||
FOREIGN KEY (search_id) REFERENCES flight_searches(id) ON DELETE CASCADE
|
||||
)
|
||||
""")
|
||||
|
||||
# Indexes for performance
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_query_hash
|
||||
ON flight_searches(query_hash)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_query_timestamp
|
||||
ON flight_searches(query_timestamp)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_search_id
|
||||
ON flight_results(search_id)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_cache_key(origin: str, destination: str, date: str, seat_class: str, adults: int) -> str:
|
||||
"""
|
||||
Generate a unique cache key for a flight search query.
|
||||
|
||||
Args:
|
||||
origin: Origin airport IATA code
|
||||
destination: Destination airport IATA code
|
||||
date: Search date (YYYY-MM-DD)
|
||||
seat_class: Cabin class
|
||||
adults: Number of passengers
|
||||
|
||||
Returns:
|
||||
SHA256 hash of the query parameters
|
||||
"""
|
||||
query_string = f"{origin}|{destination}|{date}|{seat_class}|{adults}"
|
||||
return hashlib.sha256(query_string.encode()).hexdigest()
|
||||
|
||||
|
||||
def get_cached_results(
|
||||
origin: str,
|
||||
destination: str,
|
||||
date: str,
|
||||
seat_class: str,
|
||||
adults: int,
|
||||
threshold_hours: int = DEFAULT_CACHE_THRESHOLD_HOURS,
|
||||
) -> Optional[list[dict]]:
|
||||
"""
|
||||
Retrieve cached flight results if they exist and are recent enough.
|
||||
|
||||
Args:
|
||||
origin: Origin airport IATA code
|
||||
destination: Destination airport IATA code
|
||||
date: Search date (YYYY-MM-DD)
|
||||
seat_class: Cabin class
|
||||
adults: Number of passengers
|
||||
threshold_hours: Maximum age of cached results in hours
|
||||
|
||||
Returns:
|
||||
List of flight dicts if cache hit, None if cache miss or expired
|
||||
"""
|
||||
init_database()
|
||||
|
||||
cache_key = get_cache_key(origin, destination, date, seat_class, adults)
|
||||
threshold_time = datetime.now() - timedelta(hours=threshold_hours)
|
||||
|
||||
conn = sqlite3.connect(CACHE_DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Find recent search
|
||||
cursor.execute("""
|
||||
SELECT id, query_timestamp
|
||||
FROM flight_searches
|
||||
WHERE query_hash = ?
|
||||
AND query_timestamp > ?
|
||||
ORDER BY query_timestamp DESC
|
||||
LIMIT 1
|
||||
""", (cache_key, threshold_time.isoformat()))
|
||||
|
||||
search_row = cursor.fetchone()
|
||||
|
||||
if not search_row:
|
||||
conn.close()
|
||||
return None
|
||||
|
||||
search_id, timestamp = search_row
|
||||
|
||||
# Retrieve flight results
|
||||
cursor.execute("""
|
||||
SELECT airline, departure_time, arrival_time, duration_minutes,
|
||||
price, currency, plane_type
|
||||
FROM flight_results
|
||||
WHERE search_id = ?
|
||||
""", (search_id,))
|
||||
|
||||
flight_rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
# Convert to flight dicts
|
||||
flights = []
|
||||
for row in flight_rows:
|
||||
flights.append({
|
||||
"origin": origin,
|
||||
"destination": destination,
|
||||
"airline": row[0],
|
||||
"departure_time": row[1],
|
||||
"arrival_time": row[2],
|
||||
"duration_minutes": row[3],
|
||||
"price": row[4],
|
||||
"currency": row[5],
|
||||
"plane_type": row[6],
|
||||
"stops": 0, # Only direct flights are cached
|
||||
})
|
||||
|
||||
return flights
|
||||
|
||||
|
||||
def save_results(
|
||||
origin: str,
|
||||
destination: str,
|
||||
date: str,
|
||||
seat_class: str,
|
||||
adults: int,
|
||||
flights: list[dict],
|
||||
) -> None:
|
||||
"""
|
||||
Save flight search results to cache database.
|
||||
|
||||
Args:
|
||||
origin: Origin airport IATA code
|
||||
destination: Destination airport IATA code
|
||||
date: Search date (YYYY-MM-DD)
|
||||
seat_class: Cabin class
|
||||
adults: Number of passengers
|
||||
flights: List of flight dicts to cache
|
||||
"""
|
||||
init_database()
|
||||
|
||||
cache_key = get_cache_key(origin, destination, date, seat_class, adults)
|
||||
|
||||
conn = sqlite3.connect(CACHE_DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# Delete old search with same cache key (replace with fresh data)
|
||||
cursor.execute("""
|
||||
DELETE FROM flight_searches
|
||||
WHERE query_hash = ?
|
||||
""", (cache_key,))
|
||||
|
||||
# Insert search query
|
||||
cursor.execute("""
|
||||
INSERT INTO flight_searches
|
||||
(query_hash, origin, destination, search_date, seat_class, adults)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""", (cache_key, origin, destination, date, seat_class, adults))
|
||||
|
||||
search_id = cursor.lastrowid
|
||||
|
||||
# Insert flight results
|
||||
for flight in flights:
|
||||
cursor.execute("""
|
||||
INSERT INTO flight_results
|
||||
(search_id, airline, departure_time, arrival_time, duration_minutes,
|
||||
price, currency, plane_type)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
search_id,
|
||||
flight.get("airline"),
|
||||
flight.get("departure_time"),
|
||||
flight.get("arrival_time"),
|
||||
flight.get("duration_minutes"),
|
||||
flight.get("price"),
|
||||
flight.get("currency"),
|
||||
flight.get("plane_type"),
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f"⚠️ Cache save failed: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def clear_old_cache(days: int = 30) -> int:
|
||||
"""
|
||||
Delete cached results older than specified number of days.
|
||||
|
||||
Args:
|
||||
days: Maximum age of cached results to keep
|
||||
|
||||
Returns:
|
||||
Number of deleted search records
|
||||
"""
|
||||
init_database()
|
||||
|
||||
threshold_time = datetime.now() - timedelta(days=days)
|
||||
|
||||
conn = sqlite3.connect(CACHE_DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
DELETE FROM flight_searches
|
||||
WHERE query_timestamp < ?
|
||||
""", (threshold_time.isoformat(),))
|
||||
|
||||
deleted_count = cursor.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
return deleted_count
|
||||
|
||||
|
||||
def get_cache_stats() -> dict:
|
||||
"""
|
||||
Get statistics about cached data.
|
||||
|
||||
Returns:
|
||||
Dict with cache statistics
|
||||
"""
|
||||
init_database()
|
||||
|
||||
conn = sqlite3.connect(CACHE_DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Count total searches
|
||||
cursor.execute("SELECT COUNT(*) FROM flight_searches")
|
||||
total_searches = cursor.fetchone()[0]
|
||||
|
||||
# Count total flight results
|
||||
cursor.execute("SELECT COUNT(*) FROM flight_results")
|
||||
total_results = cursor.fetchone()[0]
|
||||
|
||||
# Get oldest and newest entries
|
||||
cursor.execute("""
|
||||
SELECT MIN(query_timestamp), MAX(query_timestamp)
|
||||
FROM flight_searches
|
||||
""")
|
||||
oldest, newest = cursor.fetchone()
|
||||
|
||||
# Get database file size
|
||||
db_size_bytes = CACHE_DB_PATH.stat().st_size if CACHE_DB_PATH.exists() else 0
|
||||
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
"total_searches": total_searches,
|
||||
"total_results": total_results,
|
||||
"oldest_entry": oldest,
|
||||
"newest_entry": newest,
|
||||
"db_size_mb": db_size_bytes / (1024 * 1024),
|
||||
}
|
||||
Reference in New Issue
Block a user