feat: write routes live during scan instead of bulk-insert at completion

Routes and individual flights are now written to the database as each
query result arrives, rather than after all queries finish. The frontend
already polls /scans/:id/routes while status=running, so routes appear
progressively with no frontend changes needed.

Changes:
- database/schema.sql: UNIQUE INDEX uq_routes_scan_dest(scan_id, destination)
- database/init_db.py: _migrate_add_routes_unique_index() migration
- scan_processor.py: _write_route_incremental() helper; progress_callback
  now writes routes/flights immediately; Phase 2 bulk-write replaced with
  a lightweight totals query
- searcher_v3.py: pass flights= kwarg to progress_callback on cache_hit
  and api_success paths

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-27 20:53:04 +01:00
parent 4926e89e46
commit ce1cf667d2
5 changed files with 592 additions and 93 deletions

View File

@@ -12,7 +12,6 @@ Runs as async background tasks within the FastAPI application.
import asyncio
import logging
from datetime import datetime, date, timedelta
from typing import Dict, List, Optional
import json
from database import get_connection
@@ -23,6 +22,102 @@ from searcher_v3 import search_multiple_routes
logger = logging.getLogger(__name__)
def _write_route_incremental(scan_id: int, destination: str,
dest_name: str, dest_city: str,
new_flights: list):
"""
Write or update a route row and its individual flight rows immediately.
Called from progress_callback each time a (scan_id, destination, date)
query returns results. Merges into the existing route row if one already
exists, using a running weighted average for avg_price.
Opens its own DB connection — safe to call from the event loop thread.
"""
prices = [f.get('price') for f in new_flights if f.get('price')]
if not prices:
return
new_airlines = list({f.get('airline') for f in new_flights if f.get('airline')})
new_count = len(prices)
new_min = min(prices)
new_max = max(prices)
new_avg = sum(prices) / new_count
try:
conn = get_connection()
cursor = conn.cursor()
cursor.execute("""
SELECT id, flight_count, min_price, max_price, avg_price, airlines
FROM routes
WHERE scan_id = ? AND destination = ?
""", (scan_id, destination))
existing = cursor.fetchone()
if existing is None:
cursor.execute("""
INSERT INTO routes (
scan_id, destination, destination_name, destination_city,
flight_count, airlines, min_price, max_price, avg_price
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
scan_id, destination, dest_name, dest_city,
new_count, json.dumps(new_airlines),
new_min, new_max, new_avg,
))
else:
old_count = existing['flight_count'] or 0
old_min = existing['min_price']
old_max = existing['max_price']
old_avg = existing['avg_price'] or 0.0
old_airlines = json.loads(existing['airlines']) if existing['airlines'] else []
merged_count = old_count + new_count
merged_min = min(old_min, new_min) if old_min is not None else new_min
merged_max = max(old_max, new_max) if old_max is not None else new_max
merged_avg = (old_avg * old_count + new_avg * new_count) / merged_count
merged_airlines = json.dumps(list(set(old_airlines) | set(new_airlines)))
cursor.execute("""
UPDATE routes
SET flight_count = ?,
min_price = ?,
max_price = ?,
avg_price = ?,
airlines = ?
WHERE scan_id = ? AND destination = ?
""", (
merged_count, merged_min, merged_max, merged_avg, merged_airlines,
scan_id, destination,
))
for flight in new_flights:
if not flight.get('price'):
continue
cursor.execute("""
INSERT INTO flights (
scan_id, destination, date, airline,
departure_time, arrival_time, price, stops
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
scan_id,
destination,
flight.get('date', ''),
flight.get('airline'),
flight.get('departure_time'),
flight.get('arrival_time'),
flight.get('price'),
flight.get('stops', 0),
))
conn.commit()
conn.close()
except Exception as e:
logger.error(f"[Scan {scan_id}] Failed to write incremental route {destination}: {e}")
async def process_scan(scan_id: int):
"""
Process a pending scan by querying flights and saving routes.
@@ -131,19 +226,28 @@ async def process_scan(scan_id: int):
""", (len(routes_to_scan), scan_id))
conn.commit()
# Progress callback to update database
# Signature: callback(origin, destination, date, status, count, error=None)
# Progress callback updates DB progress counter and writes routes live
# Signature: callback(origin, destination, date, status, count, error=None, flights=None)
routes_scanned_count = 0
def progress_callback(origin: str, destination: str, date: str,
status: str, count: int, error: str = None):
status: str, count: int, error: str = None,
flights: list = None):
nonlocal routes_scanned_count
# Increment counter for each route query (cache hit or API call)
if status in ('cache_hit', 'api_success', 'error'):
routes_scanned_count += 1
# Update progress in database
# Write route + flights to DB immediately if results available
if flights and status in ('cache_hit', 'api_success'):
for f in flights:
f['date'] = date
dest_info = next((d for d in destinations if d['iata'] == destination), None)
dest_name = dest_info.get('name', destination) if dest_info else destination
dest_city = dest_info.get('city', '') if dest_info else ''
_write_route_incremental(scan_id, destination, dest_name, dest_city, flights)
# Update progress counter
try:
progress_conn = get_connection()
progress_cursor = progress_conn.cursor()
@@ -158,7 +262,7 @@ async def process_scan(scan_id: int):
progress_conn.commit()
progress_conn.close()
if routes_scanned_count % 10 == 0: # Log every 10 routes
if routes_scanned_count % 10 == 0:
logger.info(f"[Scan {scan_id}] Progress: {routes_scanned_count}/{len(routes_to_scan)} routes ({status}: {origin}{destination})")
except Exception as e:
@@ -177,89 +281,15 @@ async def process_scan(scan_id: int):
progress_callback=progress_callback
)
logger.info(f"[Scan {scan_id}] Flight queries complete. Processing results...")
logger.info(f"[Scan {scan_id}] Flight queries complete.")
# Group results by destination, preserving date per flight
# Structure: {dest: [(flight_dict, date), ...]}
routes_by_destination: Dict[str, List] = {}
total_flights = 0
for (orig, dest, scan_date), flights in results.items():
if dest not in routes_by_destination:
routes_by_destination[dest] = []
for flight in flights:
routes_by_destination[dest].append((flight, scan_date))
total_flights += len(flights)
logger.info(f"[Scan {scan_id}] Found {total_flights} total flights across {len(routes_by_destination)} destinations")
# Save routes and individual flights to database
routes_saved = 0
for destination, flight_date_pairs in routes_by_destination.items():
if not flight_date_pairs:
continue # Skip destinations with no flights
flights = [f for f, _ in flight_date_pairs]
# Get destination details (fall back to IATA code if not in DB)
dest_info = next((d for d in destinations if d['iata'] == destination), None)
dest_name = dest_info.get('name', destination) if dest_info else destination
dest_city = dest_info.get('city', '') if dest_info else ''
# Calculate statistics
prices = [f.get('price') for f in flights if f.get('price')]
airlines = list(set(f.get('airline') for f in flights if f.get('airline')))
if not prices:
logger.info(f"[Scan {scan_id}] Skipping {destination} - no prices available")
continue
min_price = min(prices)
max_price = max(prices)
avg_price = sum(prices) / len(prices)
# Insert route summary
cursor.execute("""
INSERT INTO routes (
scan_id, destination, destination_name, destination_city,
min_price, max_price, avg_price, flight_count, airlines
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
scan_id,
destination,
dest_name,
dest_city,
min_price,
max_price,
avg_price,
len(flights),
json.dumps(airlines)
))
# Insert individual flights
for flight, flight_date in flight_date_pairs:
if not flight.get('price'):
continue
cursor.execute("""
INSERT INTO flights (
scan_id, destination, date, airline,
departure_time, arrival_time, price, stops
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
scan_id,
destination,
flight_date,
flight.get('airline'),
flight.get('departure_time'),
flight.get('arrival_time'),
flight.get('price'),
flight.get('stops', 0),
))
routes_saved += 1
conn.commit()
# Routes and flights were written incrementally by progress_callback.
routes_saved = cursor.execute(
"SELECT COUNT(*) FROM routes WHERE scan_id = ?", (scan_id,)
).fetchone()[0]
total_flights_saved = cursor.execute(
"SELECT COALESCE(SUM(flight_count), 0) FROM routes WHERE scan_id = ?", (scan_id,)
).fetchone()[0]
# Update scan to completed
cursor.execute("""
@@ -268,10 +298,10 @@ async def process_scan(scan_id: int):
total_flights = ?,
updated_at = CURRENT_TIMESTAMP
WHERE id = ?
""", (total_flights, scan_id))
""", (total_flights_saved, scan_id))
conn.commit()
logger.info(f"[Scan {scan_id}] ✅ Scan completed successfully! {routes_saved} routes saved with {total_flights} flights")
logger.info(f"[Scan {scan_id}] ✅ Scan completed successfully! {routes_saved} routes saved with {total_flights_saved} flights")
except Exception as e:
logger.error(f"[Scan {scan_id}] ❌ Scan failed with error: {str(e)}", exc_info=True)