feat: write routes live during scan instead of bulk-insert at completion
Routes and individual flights are now written to the database as each query result arrives, rather than after all queries finish. The frontend already polls /scans/:id/routes while status=running, so routes appear progressively with no frontend changes needed. Changes: - database/schema.sql: UNIQUE INDEX uq_routes_scan_dest(scan_id, destination) - database/init_db.py: _migrate_add_routes_unique_index() migration - scan_processor.py: _write_route_incremental() helper; progress_callback now writes routes/flights immediately; Phase 2 bulk-write replaced with a lightweight totals query - searcher_v3.py: pass flights= kwarg to progress_callback on cache_hit and api_success paths Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -12,7 +12,6 @@ Runs as async background tasks within the FastAPI application.
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, date, timedelta
|
||||
from typing import Dict, List, Optional
|
||||
import json
|
||||
|
||||
from database import get_connection
|
||||
@@ -23,6 +22,102 @@ from searcher_v3 import search_multiple_routes
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _write_route_incremental(scan_id: int, destination: str,
|
||||
dest_name: str, dest_city: str,
|
||||
new_flights: list):
|
||||
"""
|
||||
Write or update a route row and its individual flight rows immediately.
|
||||
|
||||
Called from progress_callback each time a (scan_id, destination, date)
|
||||
query returns results. Merges into the existing route row if one already
|
||||
exists, using a running weighted average for avg_price.
|
||||
|
||||
Opens its own DB connection — safe to call from the event loop thread.
|
||||
"""
|
||||
prices = [f.get('price') for f in new_flights if f.get('price')]
|
||||
if not prices:
|
||||
return
|
||||
|
||||
new_airlines = list({f.get('airline') for f in new_flights if f.get('airline')})
|
||||
new_count = len(prices)
|
||||
new_min = min(prices)
|
||||
new_max = max(prices)
|
||||
new_avg = sum(prices) / new_count
|
||||
|
||||
try:
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
SELECT id, flight_count, min_price, max_price, avg_price, airlines
|
||||
FROM routes
|
||||
WHERE scan_id = ? AND destination = ?
|
||||
""", (scan_id, destination))
|
||||
existing = cursor.fetchone()
|
||||
|
||||
if existing is None:
|
||||
cursor.execute("""
|
||||
INSERT INTO routes (
|
||||
scan_id, destination, destination_name, destination_city,
|
||||
flight_count, airlines, min_price, max_price, avg_price
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
scan_id, destination, dest_name, dest_city,
|
||||
new_count, json.dumps(new_airlines),
|
||||
new_min, new_max, new_avg,
|
||||
))
|
||||
else:
|
||||
old_count = existing['flight_count'] or 0
|
||||
old_min = existing['min_price']
|
||||
old_max = existing['max_price']
|
||||
old_avg = existing['avg_price'] or 0.0
|
||||
old_airlines = json.loads(existing['airlines']) if existing['airlines'] else []
|
||||
|
||||
merged_count = old_count + new_count
|
||||
merged_min = min(old_min, new_min) if old_min is not None else new_min
|
||||
merged_max = max(old_max, new_max) if old_max is not None else new_max
|
||||
merged_avg = (old_avg * old_count + new_avg * new_count) / merged_count
|
||||
merged_airlines = json.dumps(list(set(old_airlines) | set(new_airlines)))
|
||||
|
||||
cursor.execute("""
|
||||
UPDATE routes
|
||||
SET flight_count = ?,
|
||||
min_price = ?,
|
||||
max_price = ?,
|
||||
avg_price = ?,
|
||||
airlines = ?
|
||||
WHERE scan_id = ? AND destination = ?
|
||||
""", (
|
||||
merged_count, merged_min, merged_max, merged_avg, merged_airlines,
|
||||
scan_id, destination,
|
||||
))
|
||||
|
||||
for flight in new_flights:
|
||||
if not flight.get('price'):
|
||||
continue
|
||||
cursor.execute("""
|
||||
INSERT INTO flights (
|
||||
scan_id, destination, date, airline,
|
||||
departure_time, arrival_time, price, stops
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
scan_id,
|
||||
destination,
|
||||
flight.get('date', ''),
|
||||
flight.get('airline'),
|
||||
flight.get('departure_time'),
|
||||
flight.get('arrival_time'),
|
||||
flight.get('price'),
|
||||
flight.get('stops', 0),
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scan {scan_id}] Failed to write incremental route {destination}: {e}")
|
||||
|
||||
|
||||
async def process_scan(scan_id: int):
|
||||
"""
|
||||
Process a pending scan by querying flights and saving routes.
|
||||
@@ -131,19 +226,28 @@ async def process_scan(scan_id: int):
|
||||
""", (len(routes_to_scan), scan_id))
|
||||
conn.commit()
|
||||
|
||||
# Progress callback to update database
|
||||
# Signature: callback(origin, destination, date, status, count, error=None)
|
||||
# Progress callback — updates DB progress counter and writes routes live
|
||||
# Signature: callback(origin, destination, date, status, count, error=None, flights=None)
|
||||
routes_scanned_count = 0
|
||||
|
||||
def progress_callback(origin: str, destination: str, date: str,
|
||||
status: str, count: int, error: str = None):
|
||||
status: str, count: int, error: str = None,
|
||||
flights: list = None):
|
||||
nonlocal routes_scanned_count
|
||||
|
||||
# Increment counter for each route query (cache hit or API call)
|
||||
if status in ('cache_hit', 'api_success', 'error'):
|
||||
routes_scanned_count += 1
|
||||
|
||||
# Update progress in database
|
||||
# Write route + flights to DB immediately if results available
|
||||
if flights and status in ('cache_hit', 'api_success'):
|
||||
for f in flights:
|
||||
f['date'] = date
|
||||
dest_info = next((d for d in destinations if d['iata'] == destination), None)
|
||||
dest_name = dest_info.get('name', destination) if dest_info else destination
|
||||
dest_city = dest_info.get('city', '') if dest_info else ''
|
||||
_write_route_incremental(scan_id, destination, dest_name, dest_city, flights)
|
||||
|
||||
# Update progress counter
|
||||
try:
|
||||
progress_conn = get_connection()
|
||||
progress_cursor = progress_conn.cursor()
|
||||
@@ -158,7 +262,7 @@ async def process_scan(scan_id: int):
|
||||
progress_conn.commit()
|
||||
progress_conn.close()
|
||||
|
||||
if routes_scanned_count % 10 == 0: # Log every 10 routes
|
||||
if routes_scanned_count % 10 == 0:
|
||||
logger.info(f"[Scan {scan_id}] Progress: {routes_scanned_count}/{len(routes_to_scan)} routes ({status}: {origin}→{destination})")
|
||||
|
||||
except Exception as e:
|
||||
@@ -177,89 +281,15 @@ async def process_scan(scan_id: int):
|
||||
progress_callback=progress_callback
|
||||
)
|
||||
|
||||
logger.info(f"[Scan {scan_id}] Flight queries complete. Processing results...")
|
||||
logger.info(f"[Scan {scan_id}] Flight queries complete.")
|
||||
|
||||
# Group results by destination, preserving date per flight
|
||||
# Structure: {dest: [(flight_dict, date), ...]}
|
||||
routes_by_destination: Dict[str, List] = {}
|
||||
total_flights = 0
|
||||
|
||||
for (orig, dest, scan_date), flights in results.items():
|
||||
if dest not in routes_by_destination:
|
||||
routes_by_destination[dest] = []
|
||||
|
||||
for flight in flights:
|
||||
routes_by_destination[dest].append((flight, scan_date))
|
||||
total_flights += len(flights)
|
||||
|
||||
logger.info(f"[Scan {scan_id}] Found {total_flights} total flights across {len(routes_by_destination)} destinations")
|
||||
|
||||
# Save routes and individual flights to database
|
||||
routes_saved = 0
|
||||
for destination, flight_date_pairs in routes_by_destination.items():
|
||||
if not flight_date_pairs:
|
||||
continue # Skip destinations with no flights
|
||||
|
||||
flights = [f for f, _ in flight_date_pairs]
|
||||
|
||||
# Get destination details (fall back to IATA code if not in DB)
|
||||
dest_info = next((d for d in destinations if d['iata'] == destination), None)
|
||||
dest_name = dest_info.get('name', destination) if dest_info else destination
|
||||
dest_city = dest_info.get('city', '') if dest_info else ''
|
||||
|
||||
# Calculate statistics
|
||||
prices = [f.get('price') for f in flights if f.get('price')]
|
||||
airlines = list(set(f.get('airline') for f in flights if f.get('airline')))
|
||||
|
||||
if not prices:
|
||||
logger.info(f"[Scan {scan_id}] Skipping {destination} - no prices available")
|
||||
continue
|
||||
|
||||
min_price = min(prices)
|
||||
max_price = max(prices)
|
||||
avg_price = sum(prices) / len(prices)
|
||||
|
||||
# Insert route summary
|
||||
cursor.execute("""
|
||||
INSERT INTO routes (
|
||||
scan_id, destination, destination_name, destination_city,
|
||||
min_price, max_price, avg_price, flight_count, airlines
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
scan_id,
|
||||
destination,
|
||||
dest_name,
|
||||
dest_city,
|
||||
min_price,
|
||||
max_price,
|
||||
avg_price,
|
||||
len(flights),
|
||||
json.dumps(airlines)
|
||||
))
|
||||
|
||||
# Insert individual flights
|
||||
for flight, flight_date in flight_date_pairs:
|
||||
if not flight.get('price'):
|
||||
continue
|
||||
cursor.execute("""
|
||||
INSERT INTO flights (
|
||||
scan_id, destination, date, airline,
|
||||
departure_time, arrival_time, price, stops
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
scan_id,
|
||||
destination,
|
||||
flight_date,
|
||||
flight.get('airline'),
|
||||
flight.get('departure_time'),
|
||||
flight.get('arrival_time'),
|
||||
flight.get('price'),
|
||||
flight.get('stops', 0),
|
||||
))
|
||||
|
||||
routes_saved += 1
|
||||
|
||||
conn.commit()
|
||||
# Routes and flights were written incrementally by progress_callback.
|
||||
routes_saved = cursor.execute(
|
||||
"SELECT COUNT(*) FROM routes WHERE scan_id = ?", (scan_id,)
|
||||
).fetchone()[0]
|
||||
total_flights_saved = cursor.execute(
|
||||
"SELECT COALESCE(SUM(flight_count), 0) FROM routes WHERE scan_id = ?", (scan_id,)
|
||||
).fetchone()[0]
|
||||
|
||||
# Update scan to completed
|
||||
cursor.execute("""
|
||||
@@ -268,10 +298,10 @@ async def process_scan(scan_id: int):
|
||||
total_flights = ?,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ?
|
||||
""", (total_flights, scan_id))
|
||||
""", (total_flights_saved, scan_id))
|
||||
conn.commit()
|
||||
|
||||
logger.info(f"[Scan {scan_id}] ✅ Scan completed successfully! {routes_saved} routes saved with {total_flights} flights")
|
||||
logger.info(f"[Scan {scan_id}] ✅ Scan completed successfully! {routes_saved} routes saved with {total_flights_saved} flights")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scan {scan_id}] ❌ Scan failed with error: {str(e)}", exc_info=True)
|
||||
|
||||
Reference in New Issue
Block a user