Full-stack flight price scanner built on fast-flights v3 (SOCS cookie bypass): Backend (FastAPI + SQLite): - REST API with rate limiting, Pydantic v2 validation, paginated responses - Scan pipeline: resolves airports, queries every day in the window, saves individual flights + aggregate route stats to SQLite - Background async scan processor with real-time progress tracking - Airport search endpoint backed by OpenFlights dataset - Daily scan window (all dates, not monthly samples) Frontend (React 19 + TypeScript + Tailwind CSS v4): - Dashboard with live scan status and recent scans - Create scan form: country mode or specific airports (searchable dropdown) - Scan detail page with expandable route rows showing individual flights (date, airline, departure, arrival, price) loaded on demand - AirportSearch component with debounced live search and multi-select Database: - scans → routes → flights schema with FK cascade and auto-update triggers - Migrations for schema evolution (relaxed country constraint) Tests: - 74 tests: unit + integration, isolated per-test SQLite DB - Confirmed flight fixtures in tests/confirmed_flights.json (50 real flights, BDS→FMM Ryanair + BDS→DUS Eurowings, scraped Feb 2026) - Integration tests parametrized from confirmed routes Docker: - Multi-stage builds, Compose orchestration, Nginx reverse proxy Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
312 lines
8.3 KiB
Python
312 lines
8.3 KiB
Python
"""
|
|
SQLite caching layer for flight search results.
|
|
|
|
Stores search results with timestamps to avoid unnecessary API calls
|
|
and reduce rate limiting issues.
|
|
"""
|
|
|
|
import sqlite3
|
|
import hashlib
|
|
import json
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
|
|
# Cache database location
|
|
CACHE_DB_PATH = Path(__file__).parent / "data" / "flight_cache.db"
|
|
|
|
# Default cache threshold in hours
|
|
DEFAULT_CACHE_THRESHOLD_HOURS = 24
|
|
|
|
|
|
def init_database():
|
|
"""Initialize SQLite database with required tables."""
|
|
CACHE_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
conn = sqlite3.connect(CACHE_DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
# Table for search queries
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS flight_searches (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
query_hash TEXT NOT NULL UNIQUE,
|
|
origin TEXT NOT NULL,
|
|
destination TEXT NOT NULL,
|
|
search_date TEXT NOT NULL,
|
|
seat_class TEXT NOT NULL,
|
|
adults INTEGER NOT NULL,
|
|
query_timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
""")
|
|
|
|
# Table for flight results
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS flight_results (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
search_id INTEGER NOT NULL,
|
|
airline TEXT,
|
|
departure_time TEXT,
|
|
arrival_time TEXT,
|
|
duration_minutes INTEGER,
|
|
price REAL,
|
|
currency TEXT,
|
|
plane_type TEXT,
|
|
FOREIGN KEY (search_id) REFERENCES flight_searches(id) ON DELETE CASCADE
|
|
)
|
|
""")
|
|
|
|
# Indexes for performance
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_query_hash
|
|
ON flight_searches(query_hash)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_query_timestamp
|
|
ON flight_searches(query_timestamp)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_search_id
|
|
ON flight_results(search_id)
|
|
""")
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
def get_cache_key(origin: str, destination: str, date: str, seat_class: str, adults: int) -> str:
|
|
"""
|
|
Generate a unique cache key for a flight search query.
|
|
|
|
Args:
|
|
origin: Origin airport IATA code
|
|
destination: Destination airport IATA code
|
|
date: Search date (YYYY-MM-DD)
|
|
seat_class: Cabin class
|
|
adults: Number of passengers
|
|
|
|
Returns:
|
|
SHA256 hash of the query parameters
|
|
"""
|
|
query_string = f"{origin}|{destination}|{date}|{seat_class}|{adults}"
|
|
return hashlib.sha256(query_string.encode()).hexdigest()
|
|
|
|
|
|
def get_cached_results(
|
|
origin: str,
|
|
destination: str,
|
|
date: str,
|
|
seat_class: str,
|
|
adults: int,
|
|
threshold_hours: int = DEFAULT_CACHE_THRESHOLD_HOURS,
|
|
) -> Optional[list[dict]]:
|
|
"""
|
|
Retrieve cached flight results if they exist and are recent enough.
|
|
|
|
Args:
|
|
origin: Origin airport IATA code
|
|
destination: Destination airport IATA code
|
|
date: Search date (YYYY-MM-DD)
|
|
seat_class: Cabin class
|
|
adults: Number of passengers
|
|
threshold_hours: Maximum age of cached results in hours
|
|
|
|
Returns:
|
|
List of flight dicts if cache hit, None if cache miss or expired
|
|
"""
|
|
init_database()
|
|
|
|
cache_key = get_cache_key(origin, destination, date, seat_class, adults)
|
|
threshold_time = datetime.now() - timedelta(hours=threshold_hours)
|
|
|
|
conn = sqlite3.connect(CACHE_DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
# Find recent search
|
|
cursor.execute("""
|
|
SELECT id, query_timestamp
|
|
FROM flight_searches
|
|
WHERE query_hash = ?
|
|
AND query_timestamp > ?
|
|
ORDER BY query_timestamp DESC
|
|
LIMIT 1
|
|
""", (cache_key, threshold_time.isoformat()))
|
|
|
|
search_row = cursor.fetchone()
|
|
|
|
if not search_row:
|
|
conn.close()
|
|
return None
|
|
|
|
search_id, timestamp = search_row
|
|
|
|
# Retrieve flight results
|
|
cursor.execute("""
|
|
SELECT airline, departure_time, arrival_time, duration_minutes,
|
|
price, currency, plane_type
|
|
FROM flight_results
|
|
WHERE search_id = ?
|
|
""", (search_id,))
|
|
|
|
flight_rows = cursor.fetchall()
|
|
conn.close()
|
|
|
|
# Convert to flight dicts
|
|
flights = []
|
|
for row in flight_rows:
|
|
flights.append({
|
|
"origin": origin,
|
|
"destination": destination,
|
|
"airline": row[0],
|
|
"departure_time": row[1],
|
|
"arrival_time": row[2],
|
|
"duration_minutes": row[3],
|
|
"price": row[4],
|
|
"currency": row[5],
|
|
"plane_type": row[6],
|
|
"stops": 0, # Only direct flights are cached
|
|
})
|
|
|
|
return flights
|
|
|
|
|
|
def save_results(
|
|
origin: str,
|
|
destination: str,
|
|
date: str,
|
|
seat_class: str,
|
|
adults: int,
|
|
flights: list[dict],
|
|
) -> None:
|
|
"""
|
|
Save flight search results to cache database.
|
|
|
|
Args:
|
|
origin: Origin airport IATA code
|
|
destination: Destination airport IATA code
|
|
date: Search date (YYYY-MM-DD)
|
|
seat_class: Cabin class
|
|
adults: Number of passengers
|
|
flights: List of flight dicts to cache
|
|
"""
|
|
init_database()
|
|
|
|
cache_key = get_cache_key(origin, destination, date, seat_class, adults)
|
|
|
|
conn = sqlite3.connect(CACHE_DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Delete old search with same cache key (replace with fresh data)
|
|
cursor.execute("""
|
|
DELETE FROM flight_searches
|
|
WHERE query_hash = ?
|
|
""", (cache_key,))
|
|
|
|
# Insert search query
|
|
cursor.execute("""
|
|
INSERT INTO flight_searches
|
|
(query_hash, origin, destination, search_date, seat_class, adults)
|
|
VALUES (?, ?, ?, ?, ?, ?)
|
|
""", (cache_key, origin, destination, date, seat_class, adults))
|
|
|
|
search_id = cursor.lastrowid
|
|
|
|
# Insert flight results
|
|
for flight in flights:
|
|
cursor.execute("""
|
|
INSERT INTO flight_results
|
|
(search_id, airline, departure_time, arrival_time, duration_minutes,
|
|
price, currency, plane_type)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
search_id,
|
|
flight.get("airline"),
|
|
flight.get("departure_time"),
|
|
flight.get("arrival_time"),
|
|
flight.get("duration_minutes"),
|
|
flight.get("price"),
|
|
flight.get("currency"),
|
|
flight.get("plane_type"),
|
|
))
|
|
|
|
conn.commit()
|
|
except Exception as e:
|
|
conn.rollback()
|
|
print(f"⚠️ Cache save failed: {e}")
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def clear_old_cache(days: int = 30) -> int:
|
|
"""
|
|
Delete cached results older than specified number of days.
|
|
|
|
Args:
|
|
days: Maximum age of cached results to keep
|
|
|
|
Returns:
|
|
Number of deleted search records
|
|
"""
|
|
init_database()
|
|
|
|
threshold_time = datetime.now() - timedelta(days=days)
|
|
|
|
conn = sqlite3.connect(CACHE_DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("""
|
|
DELETE FROM flight_searches
|
|
WHERE query_timestamp < ?
|
|
""", (threshold_time.isoformat(),))
|
|
|
|
deleted_count = cursor.rowcount
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
return deleted_count
|
|
|
|
|
|
def get_cache_stats() -> dict:
|
|
"""
|
|
Get statistics about cached data.
|
|
|
|
Returns:
|
|
Dict with cache statistics
|
|
"""
|
|
init_database()
|
|
|
|
conn = sqlite3.connect(CACHE_DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
# Count total searches
|
|
cursor.execute("SELECT COUNT(*) FROM flight_searches")
|
|
total_searches = cursor.fetchone()[0]
|
|
|
|
# Count total flight results
|
|
cursor.execute("SELECT COUNT(*) FROM flight_results")
|
|
total_results = cursor.fetchone()[0]
|
|
|
|
# Get oldest and newest entries
|
|
cursor.execute("""
|
|
SELECT MIN(query_timestamp), MAX(query_timestamp)
|
|
FROM flight_searches
|
|
""")
|
|
oldest, newest = cursor.fetchone()
|
|
|
|
# Get database file size
|
|
db_size_bytes = CACHE_DB_PATH.stat().st_size if CACHE_DB_PATH.exists() else 0
|
|
|
|
conn.close()
|
|
|
|
return {
|
|
"total_searches": total_searches,
|
|
"total_results": total_results,
|
|
"oldest_entry": oldest,
|
|
"newest_entry": newest,
|
|
"db_size_mb": db_size_bytes / (1024 * 1024),
|
|
}
|