diff --git a/flight-comparator/PRD_SCHEDULED_SCANS.md b/flight-comparator/PRD_SCHEDULED_SCANS.md new file mode 100644 index 0000000..507a107 --- /dev/null +++ b/flight-comparator/PRD_SCHEDULED_SCANS.md @@ -0,0 +1,409 @@ +# PRD: Scheduled Scans + +**Status:** Draft +**Date:** 2026-02-27 +**Verdict:** Fully feasible — no new dependencies required + +--- + +## 1. Problem + +Every scan is triggered manually. If you want to track prices for a route over time (e.g. BDS → Germany every Monday) you have to remember to click "Re-run" yourself. Price trends are only discoverable by comparing scan history manually. + +--- + +## 2. Goal + +Let users define a recurring schedule for any scan configuration. The server runs the scan automatically at the defined cadence, building a historical record of price data over time. + +--- + +## 3. User Stories + +- **As a user**, I want to schedule a weekly scan of BDS → Germany so I can see how prices change without manually re-running it. +- **As a user**, I want to enable/disable a schedule without deleting it. +- **As a user**, I want to see which scans were created by a schedule and navigate to that schedule from a scan. +- **As a user**, I want to trigger a scheduled scan immediately without waiting for the next interval. + +--- + +## 4. Scheduling Options + +Three frequencies are sufficient for flight price tracking: + +| Frequency | Parameters | Example | +|-----------|-----------|---------| +| `daily` | hour, minute | Every day at 06:00 | +| `weekly` | day_of_week (0=Mon–6=Sun), hour, minute | Every Monday at 06:00 | +| `monthly` | day_of_month (1–28), hour, minute | 1st of every month at 06:00 | + +Day of month capped at 28 to avoid Feb 29/30/31 edge cases. All times stored and executed in UTC. + +--- + +## 5. Architecture + +### 5.1 Scheduler Design + +No new dependencies. A simple asyncio background task wakes every 60 seconds, queries the DB for due schedules, and fires a scan for each. + +``` +lifespan startup + └── asyncio.create_task(_scheduler_loop()) + └── while True: + _check_and_run_due_schedules() # queries DB + await asyncio.sleep(60) +``` + +`_check_and_run_due_schedules()`: +1. `SELECT * FROM scheduled_scans WHERE enabled=1 AND next_run_at <= NOW()` +2. For each result, skip if previous scan for this schedule is still `pending` or `running` +3. Create a new scan row (same INSERT as `POST /scans`) +4. Call `start_scan_processor(scan_id)` +5. Update `last_run_at = NOW()` and compute + store `next_run_at` + +### 5.2 `next_run_at` Computation + +Precomputed in Python after every run (and on create/update). Stored as a TIMESTAMP column with an index — scheduler lookup is a single indexed range query. + +```python +def compute_next_run(frequency, hour, minute, + day_of_week=None, day_of_month=None, + after=None) -> datetime: + now = after or datetime.utcnow() + base = now.replace(hour=hour, minute=minute, second=0, microsecond=0) + + if frequency == 'daily': + return base if base > now else base + timedelta(days=1) + + elif frequency == 'weekly': + days_ahead = (day_of_week - now.weekday()) % 7 + if days_ahead == 0 and base <= now: + days_ahead = 7 + return (now + timedelta(days=days_ahead)).replace( + hour=hour, minute=minute, second=0, microsecond=0) + + elif frequency == 'monthly': + candidate = now.replace(day=day_of_month, hour=hour, minute=minute, second=0, microsecond=0) + if candidate <= now: + m, y = (now.month % 12) + 1, now.year + (1 if now.month == 12 else 0) + candidate = candidate.replace(year=y, month=m) + return candidate +``` + +--- + +## 6. Schema Changes + +### 6.1 New table: `scheduled_scans` + +```sql +CREATE TABLE IF NOT EXISTS scheduled_scans ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + + -- Scan parameters + origin TEXT NOT NULL CHECK(length(origin) = 3), + country TEXT NOT NULL CHECK(length(country) >= 2), + window_months INTEGER NOT NULL DEFAULT 1 + CHECK(window_months >= 1 AND window_months <= 12), + seat_class TEXT NOT NULL DEFAULT 'economy', + adults INTEGER NOT NULL DEFAULT 1 + CHECK(adults > 0 AND adults <= 9), + + -- Schedule definition + frequency TEXT NOT NULL + CHECK(frequency IN ('daily', 'weekly', 'monthly')), + hour INTEGER NOT NULL DEFAULT 6 + CHECK(hour >= 0 AND hour <= 23), + minute INTEGER NOT NULL DEFAULT 0 + CHECK(minute >= 0 AND minute <= 59), + day_of_week INTEGER CHECK(day_of_week >= 0 AND day_of_week <= 6), + day_of_month INTEGER CHECK(day_of_month >= 1 AND day_of_month <= 28), + + -- State + enabled INTEGER NOT NULL DEFAULT 1, + label TEXT, + last_run_at TIMESTAMP, + next_run_at TIMESTAMP NOT NULL, + + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + + -- Frequency-specific constraints + CHECK( + (frequency = 'weekly' AND day_of_week IS NOT NULL) OR + (frequency = 'monthly' AND day_of_month IS NOT NULL) OR + (frequency = 'daily') + ) +); + +-- Fast lookup of due schedules +CREATE UNIQUE INDEX IF NOT EXISTS uq_scheduled_scans_id + ON scheduled_scans(id); + +CREATE INDEX IF NOT EXISTS idx_scheduled_scans_next_run + ON scheduled_scans(next_run_at) + WHERE enabled = 1; + +-- Auto-update updated_at +CREATE TRIGGER IF NOT EXISTS update_scheduled_scans_timestamp +AFTER UPDATE ON scheduled_scans +FOR EACH ROW BEGIN + UPDATE scheduled_scans SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; +END; + +-- Insert schema version bump +INSERT OR IGNORE INTO schema_version (version, description) +VALUES (2, 'Add scheduled_scans table'); +``` + +### 6.2 Add FK column to `scans` + +```sql +-- Migration: add scheduled_scan_id to scans +ALTER TABLE scans ADD COLUMN scheduled_scan_id INTEGER + REFERENCES scheduled_scans(id) ON DELETE SET NULL; + +CREATE INDEX IF NOT EXISTS idx_scans_scheduled_scan_id + ON scans(scheduled_scan_id) + WHERE scheduled_scan_id IS NOT NULL; +``` + +--- + +## 7. Migration (`database/init_db.py`) + +Add two migration functions, called before `executescript(schema_sql)`: + +```python +def _migrate_add_scheduled_scans(conn, verbose=True): + """Migration: create scheduled_scans table and add FK to scans.""" + cursor = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='scheduled_scans'" + ) + if cursor.fetchone(): + return # Already exists + + if verbose: + print(" 🔄 Migrating: adding scheduled_scans table...") + + conn.execute(""" + CREATE TABLE scheduled_scans ( + id INTEGER PRIMARY KEY AUTOINCREMENT, ... + ) + """) + + # Add scheduled_scan_id to existing scans table + try: + conn.execute("ALTER TABLE scans ADD COLUMN scheduled_scan_id INTEGER REFERENCES scheduled_scans(id) ON DELETE SET NULL") + except sqlite3.OperationalError: + pass # Column already exists + + conn.execute("CREATE INDEX IF NOT EXISTS idx_scans_scheduled_scan_id ON scans(scheduled_scan_id) WHERE scheduled_scan_id IS NOT NULL") + conn.commit() + + if verbose: + print(" ✅ Migration complete: scheduled_scans table created") +``` + +--- + +## 8. API Endpoints + +All under `/api/v1/schedules`. Rate limit: 30 req/min per IP (same as scans list). + +| Method | Path | Description | +|--------|------|-------------| +| `GET` | `/schedules` | List all schedules (paginated) | +| `POST` | `/schedules` | Create a schedule | +| `GET` | `/schedules/{id}` | Schedule details + last 5 scan IDs | +| `PATCH` | `/schedules/{id}` | Update (enable/disable, change frequency/params) | +| `DELETE` | `/schedules/{id}` | Delete schedule (scans are kept, FK set to NULL) | +| `POST` | `/schedules/{id}/run-now` | Trigger immediately (ignores next_run_at) | + +### Request model: `CreateScheduleRequest` + +```python +class CreateScheduleRequest(BaseModel): + origin: str # 3-char IATA + country: Optional[str] # 2-letter ISO country code + destinations: Optional[List[str]] # Alternative: list of IATA codes + window_months: int = 1 # Weeks of data per scan run + seat_class: str = 'economy' + adults: int = 1 + label: Optional[str] # Human-readable name + frequency: str # 'daily' | 'weekly' | 'monthly' + hour: int = 6 # UTC hour (0–23) + minute: int = 0 # UTC minute (0–59) + day_of_week: Optional[int] # Required when frequency='weekly' (0=Mon) + day_of_month: Optional[int] # Required when frequency='monthly' (1–28) +``` + +### Response model: `Schedule` + +```python +class Schedule(BaseModel): + id: int + origin: str + country: str + window_months: int + seat_class: str + adults: int + label: Optional[str] + frequency: str + hour: int + minute: int + day_of_week: Optional[int] + day_of_month: Optional[int] + enabled: bool + last_run_at: Optional[str] + next_run_at: str + created_at: str + recent_scan_ids: List[int] # Last 5 scans created by this schedule +``` + +--- + +## 9. Scheduler Lifecycle (`api_server.py`) + +### 9.1 Startup + +In the existing `lifespan()` context manager, after existing startup code: + +```python +scheduler_task = asyncio.create_task(_scheduler_loop()) +logger.info("Scheduled scan background task started") +yield +scheduler_task.cancel() +try: + await scheduler_task +except asyncio.CancelledError: + pass +``` + +### 9.2 Missed runs on restart + +When the server starts, `_check_and_run_due_schedules()` fires immediately (before the 60-second sleep), catching any schedules that were due while the server was down. Each overdue schedule runs exactly once — `next_run_at` is then advanced to the next future interval. Multiple missed intervals are not caught up. + +### 9.3 Concurrency guard + +Before firing a scan for a schedule, check: + +```python +running = conn.execute(""" + SELECT id FROM scans + WHERE scheduled_scan_id = ? AND status IN ('pending', 'running') +""", (schedule_id,)).fetchone() + +if running: + logger.info(f"Schedule {schedule_id}: previous scan {running[0]} still active, skipping this run") + # Still advance next_run_at so we try again next interval + continue +``` + +--- + +## 10. Frontend Changes + +### 10.1 New page: `Schedules.tsx` + +**List view:** +- Table of all schedules: label, origin → country, frequency, next run (local time), last run, enabled toggle +- "New Schedule" button opens create form (same airport search component as Scans) +- Inline enable/disable toggle (PATCH request, optimistic update) +- "Run now" button per row + +**Create form fields (below existing scan form fields):** +- Frequency selector: Daily / Weekly / Monthly (segmented button) +- Time of day: hour:minute picker (UTC, with note) +- Day of week (shown only for Weekly): Mon–Sun selector +- Day of month (shown only for Monthly): 1–28 number input +- Optional label field + +### 10.2 Modified: `ScanDetails.tsx` + +When a scan has `scheduled_scan_id`, show a small "Scheduled" chip in the header with a link to `/schedules/{scheduled_scan_id}`. + +### 10.3 Navigation (`Layout.tsx`) + +Add "Schedules" link to sidebar between Scans and Airports. + +### 10.4 API client (`api.ts`) + +```typescript +export interface Schedule { + id: number; + origin: string; + country: string; + window_months: number; + seat_class: string; + adults: number; + label?: string; + frequency: 'daily' | 'weekly' | 'monthly'; + hour: number; + minute: number; + day_of_week?: number; + day_of_month?: number; + enabled: boolean; + last_run_at?: string; + next_run_at: string; + created_at: string; + recent_scan_ids: number[]; +} + +export const scheduleApi = { + list: (page = 1, limit = 20) => + api.get>('/schedules', { params: { page, limit } }), + get: (id: number) => + api.get(`/schedules/${id}`), + create: (data: CreateScheduleRequest) => + api.post('/schedules', data), + update: (id: number, data: Partial & { enabled?: boolean }) => + api.patch(`/schedules/${id}`, data), + delete: (id: number) => + api.delete(`/schedules/${id}`), + runNow: (id: number) => + api.post<{ scan_id: number }>(`/schedules/${id}/run-now`), +}; +``` + +--- + +## 11. Edge Cases + +| Case | Handling | +|------|----------| +| Previous scan still running at next interval | Skip this interval's run, advance `next_run_at`, log warning | +| Server down when schedule is due | On startup, runs any overdue schedule once; does not catch up multiple missed intervals | +| Schedule deleted while scan is running | `ON DELETE SET NULL` on FK — scan continues, `scheduled_scan_id` becomes NULL | +| `window_months` covers past dates | Scan start date is always "tomorrow" at creation time, same as manual scans | +| Monthly with day_of_month=29..31 | Capped at 28 in validation — avoids invalid dates in all months | +| Simultaneous due schedules | Each creates an independent asyncio task; existing `max_workers=3` semaphore in scan_processor limits total API concurrency across all running scans | +| Schedule created at 05:59, fires at 06:00 UTC | `next_run_at` is computed at creation time — if 06:00 today already passed, fires tomorrow | + +--- + +## 12. Files Changed + +| File | Change | +|------|--------| +| `database/schema.sql` | Add `scheduled_scans` table, trigger, indexes, schema_version bump | +| `database/init_db.py` | `_migrate_add_scheduled_scans()` + call in `initialize_database()` | +| `api_server.py` | `compute_next_run()`, `_scheduler_loop()`, `_check_and_run_due_schedules()`, 6 new endpoints, lifespan update, new Pydantic models | +| `frontend/src/api.ts` | `Schedule` type, `CreateScheduleRequest` type, `scheduleApi` object | +| `frontend/src/pages/Schedules.tsx` | New page (list + inline create form) | +| `frontend/src/pages/ScanDetails.tsx` | "Scheduled" badge + link when `scheduled_scan_id` present | +| `frontend/src/components/Layout.tsx` | Schedules nav link | + +Total: 7 files. Estimated ~500 new lines (backend ~250, frontend ~250). + +--- + +## 13. Out of Scope + +- Notifications / alerts when a scheduled scan completes (email, webhook) +- Per-schedule price change detection / diffing between runs +- Timezone-aware scheduling (all times UTC for now) +- Pause/resume of scheduled scans (separate PRD) +- Rate limiting across simultaneous scheduled scans (existing semaphore provides soft protection) +- Dashboard widgets for upcoming scheduled runs diff --git a/flight-comparator/api_server.py b/flight-comparator/api_server.py index 2e49566..545f5ab 100644 --- a/flight-comparator/api_server.py +++ b/flight-comparator/api_server.py @@ -22,6 +22,7 @@ from pydantic import BaseModel, Field, validator, ValidationError from contextlib import asynccontextmanager from functools import lru_cache from datetime import datetime, date, timedelta +import asyncio import json import os import re @@ -224,6 +225,7 @@ RATE_LIMITS = { 'scans': (50, 60), # 50 scan creations per minute 'logs': (100, 60), # 100 log requests per minute 'airports': (500, 60), # 500 airport searches per minute + 'schedules': (30, 60), # 30 schedule requests per minute } @@ -240,10 +242,127 @@ def get_rate_limit_for_path(path: str) -> tuple[str, int, int]: return 'logs', *RATE_LIMITS['logs'] elif '/airports' in path: return 'airports', *RATE_LIMITS['airports'] + elif '/schedules' in path: + return 'schedules', *RATE_LIMITS['schedules'] else: return 'default', *RATE_LIMITS['default'] +# ============================================================================= +# Scheduler +# ============================================================================= + +def compute_next_run(frequency: str, hour: int, minute: int, + day_of_week: int = None, day_of_month: int = None, + after: datetime = None) -> datetime: + """Compute the next UTC run time for a scheduled scan.""" + now = after or datetime.utcnow() + base = now.replace(hour=hour, minute=minute, second=0, microsecond=0) + + if frequency == 'daily': + return base if base > now else base + timedelta(days=1) + + elif frequency == 'weekly': + days_ahead = (day_of_week - now.weekday()) % 7 + if days_ahead == 0 and base <= now: + days_ahead = 7 + return (now + timedelta(days=days_ahead)).replace( + hour=hour, minute=minute, second=0, microsecond=0) + + elif frequency == 'monthly': + candidate = now.replace(day=day_of_month, hour=hour, minute=minute, + second=0, microsecond=0) + if candidate <= now: + m, y = (now.month % 12) + 1, now.year + (1 if now.month == 12 else 0) + candidate = candidate.replace(year=y, month=m) + return candidate + + raise ValueError(f"Unknown frequency: {frequency}") + + +def _check_and_run_due_schedules(): + """Query DB for due schedules and fire a scan for each.""" + try: + conn = get_connection() + cursor = conn.cursor() + + now_str = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + cursor.execute(""" + SELECT id, origin, country, window_months, seat_class, adults, + frequency, hour, minute, day_of_week, day_of_month + FROM scheduled_scans + WHERE enabled = 1 AND next_run_at <= ? + """, (now_str,)) + due = cursor.fetchall() + + for row in due: + (sched_id, origin, country, window_months, seat_class, adults, + frequency, hour, minute, day_of_week, day_of_month) = row + + # Concurrency guard: skip if a scan for this schedule is still active + running = conn.execute(""" + SELECT id FROM scans + WHERE scheduled_scan_id = ? AND status IN ('pending', 'running') + """, (sched_id,)).fetchone() + + if running: + logging.info( + f"Schedule {sched_id}: previous scan {running[0]} still active, skipping" + ) + else: + # Compute date window + start_date = (date.today() + timedelta(days=1)).isoformat() + end_dt = date.today() + timedelta(days=1) + timedelta(days=30 * window_months) + end_date = end_dt.isoformat() + + conn.execute(""" + INSERT INTO scans ( + origin, country, start_date, end_date, + status, seat_class, adults, scheduled_scan_id + ) VALUES (?, ?, ?, ?, 'pending', ?, ?, ?) + """, (origin, country, start_date, end_date, + seat_class, adults, sched_id)) + conn.commit() + scan_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0] + + try: + start_scan_processor(scan_id) + logging.info( + f"Schedule {sched_id}: fired scan {scan_id} " + f"({origin} → {country})" + ) + except Exception as e: + logging.error( + f"Schedule {sched_id}: failed to start scan {scan_id}: {e}" + ) + + # Advance next_run_at regardless of whether we fired + next_run = compute_next_run( + frequency, hour, minute, day_of_week, day_of_month + ) + conn.execute(""" + UPDATE scheduled_scans + SET last_run_at = ?, next_run_at = ? + WHERE id = ? + """, (now_str, next_run.strftime('%Y-%m-%d %H:%M:%S'), sched_id)) + conn.commit() + + conn.close() + + except Exception as e: + logging.error(f"Scheduler error: {e}", exc_info=True) + + +async def _scheduler_loop(): + """Background task: check for due schedules every 60 seconds.""" + logging.info("Scheduler loop started") + # Run immediately on startup to catch any missed schedules + _check_and_run_due_schedules() + while True: + await asyncio.sleep(60) + _check_and_run_due_schedules() + + @asynccontextmanager async def lifespan(app: FastAPI): """Initialize airport data and database on server start.""" @@ -308,7 +427,18 @@ async def lifespan(app: FastAPI): print(f"⚠️ Scan cleanup warning: {e}") logging.info("Flight Radar API v2.0 startup complete") + + # Start scheduled scan background task + scheduler_task = asyncio.create_task(_scheduler_loop()) + logging.info("Scheduled scan background task started") + yield + + scheduler_task.cancel() + try: + await scheduler_task + except asyncio.CancelledError: + pass logging.info("Flight Radar API v2.0 shutting down") @@ -799,6 +929,7 @@ class Scan(BaseModel): error_message: Optional[str] = Field(None, description="Error message if scan failed") seat_class: str = Field(..., description="Seat class") adults: int = Field(..., ge=1, le=9, description="Number of adults") + scheduled_scan_id: Optional[int] = Field(None, description="ID of the schedule that created this scan") class ScanCreateResponse(BaseModel): @@ -1123,7 +1254,7 @@ async def create_scan(request: ScanRequest): SELECT id, origin, country, start_date, end_date, created_at, updated_at, status, total_routes, routes_scanned, total_flights, error_message, - seat_class, adults + seat_class, adults, scheduled_scan_id FROM scans WHERE id = ? """, (scan_id,)) @@ -1148,7 +1279,8 @@ async def create_scan(request: ScanRequest): total_flights=row[10], error_message=row[11], seat_class=row[12], - adults=row[13] + adults=row[13], + scheduled_scan_id=row[14] if len(row) > 14 else None ) logging.info(f"Scan created: ID={scan_id}, origin={scan.origin}, country={scan.country}, dates={scan.start_date} to {scan.end_date}") @@ -1227,7 +1359,7 @@ async def list_scans( SELECT id, origin, country, start_date, end_date, created_at, updated_at, status, total_routes, routes_scanned, total_flights, error_message, - seat_class, adults + seat_class, adults, scheduled_scan_id FROM scans {where_clause} ORDER BY created_at DESC @@ -1254,7 +1386,8 @@ async def list_scans( total_flights=row[10], error_message=row[11], seat_class=row[12], - adults=row[13] + adults=row[13], + scheduled_scan_id=row[14] if len(row) > 14 else None )) # Build pagination metadata @@ -1295,7 +1428,7 @@ async def get_scan_status(scan_id: int): SELECT id, origin, country, start_date, end_date, created_at, updated_at, status, total_routes, routes_scanned, total_flights, error_message, - seat_class, adults + seat_class, adults, scheduled_scan_id FROM scans WHERE id = ? """, (scan_id,)) @@ -1323,7 +1456,8 @@ async def get_scan_status(scan_id: int): total_flights=row[10], error_message=row[11], seat_class=row[12], - adults=row[13] + adults=row[13], + scheduled_scan_id=row[14] if len(row) > 14 else None ) except HTTPException: @@ -1649,7 +1783,7 @@ async def get_logs( @router_v1.get("/flights/{route_id}") -async def get_flights(route_id: str): +async def get_flights_stub(route_id: str): """ Get all flights for a specific route. @@ -1659,6 +1793,348 @@ async def get_flights(route_id: str): raise HTTPException(status_code=501, detail="Flights endpoint not yet implemented") +# ============================================================================= +# Schedules +# ============================================================================= + +class CreateScheduleRequest(BaseModel): + """Request body for creating or updating a scheduled scan.""" + origin: str = Field(..., description="Origin airport IATA code (3 letters)") + country: str = Field(..., description="Destination country ISO code (2 letters) or comma-separated IATA codes") + window_months: int = Field(1, ge=1, le=12, description="Months of data per scan run") + seat_class: str = Field('economy', description="Seat class") + adults: int = Field(1, ge=1, le=9, description="Number of adults") + label: Optional[str] = Field(None, description="Human-readable name for this schedule") + frequency: str = Field(..., description="Recurrence: daily | weekly | monthly") + hour: int = Field(6, ge=0, le=23, description="UTC hour (0–23)") + minute: int = Field(0, ge=0, le=59, description="UTC minute (0–59)") + day_of_week: Optional[int] = Field(None, ge=0, le=6, description="Required for weekly (0=Mon)") + day_of_month: Optional[int] = Field(None, ge=1, le=28, description="Required for monthly (1–28)") + + @validator('origin', pre=True) + def uppercase_origin(cls, v): + return v.strip().upper() if v else v + + @validator('country', pre=True) + def uppercase_country(cls, v): + return v.strip().upper() if v else v + + @validator('frequency') + def validate_frequency(cls, v): + if v not in ('daily', 'weekly', 'monthly'): + raise ValueError("frequency must be daily, weekly, or monthly") + return v + + @validator('day_of_week', always=True) + def validate_day_of_week(cls, v, values): + if values.get('frequency') == 'weekly' and v is None: + raise ValueError("day_of_week is required when frequency is weekly") + return v + + @validator('day_of_month', always=True) + def validate_day_of_month(cls, v, values): + if values.get('frequency') == 'monthly' and v is None: + raise ValueError("day_of_month is required when frequency is monthly") + return v + + +class UpdateScheduleRequest(BaseModel): + """Request body for PATCH /schedules/{id}.""" + enabled: Optional[bool] = None + label: Optional[str] = None + frequency: Optional[str] = None + hour: Optional[int] = Field(None, ge=0, le=23) + minute: Optional[int] = Field(None, ge=0, le=59) + day_of_week: Optional[int] = Field(None, ge=0, le=6) + day_of_month: Optional[int] = Field(None, ge=1, le=28) + window_months: Optional[int] = Field(None, ge=1, le=12) + seat_class: Optional[str] = None + adults: Optional[int] = Field(None, ge=1, le=9) + + @validator('frequency') + def validate_frequency(cls, v): + if v is not None and v not in ('daily', 'weekly', 'monthly'): + raise ValueError("frequency must be daily, weekly, or monthly") + return v + + +class Schedule(BaseModel): + """A recurring scheduled scan.""" + id: int + origin: str + country: str + window_months: int + seat_class: str + adults: int + label: Optional[str] + frequency: str + hour: int + minute: int + day_of_week: Optional[int] + day_of_month: Optional[int] + enabled: bool + last_run_at: Optional[str] + next_run_at: str + created_at: str + recent_scan_ids: List[int] + + +def _row_to_schedule(row, recent_scan_ids: list) -> Schedule: + """Convert a DB row (sqlite3.Row or tuple) to a Schedule model.""" + return Schedule( + id=row['id'], + origin=row['origin'], + country=row['country'], + window_months=row['window_months'], + seat_class=row['seat_class'], + adults=row['adults'], + label=row['label'], + frequency=row['frequency'], + hour=row['hour'], + minute=row['minute'], + day_of_week=row['day_of_week'], + day_of_month=row['day_of_month'], + enabled=bool(row['enabled']), + last_run_at=row['last_run_at'], + next_run_at=row['next_run_at'], + created_at=row['created_at'], + recent_scan_ids=recent_scan_ids, + ) + + +def _get_recent_scan_ids(conn, schedule_id: int, limit: int = 5) -> list: + rows = conn.execute(""" + SELECT id FROM scans + WHERE scheduled_scan_id = ? + ORDER BY created_at DESC + LIMIT ? + """, (schedule_id, limit)).fetchall() + return [r[0] for r in rows] + + +@router_v1.get("/schedules", response_model=PaginatedResponse[Schedule]) +async def list_schedules( + page: int = Query(1, ge=1), + limit: int = Query(20, ge=1, le=100), +): + """List all scheduled scans with pagination.""" + try: + conn = get_connection() + + total = conn.execute("SELECT COUNT(*) FROM scheduled_scans").fetchone()[0] + total_pages = math.ceil(total / limit) if total > 0 else 0 + offset = (page - 1) * limit + + rows = conn.execute(""" + SELECT * FROM scheduled_scans + ORDER BY created_at DESC + LIMIT ? OFFSET ? + """, (limit, offset)).fetchall() + + items = [ + _row_to_schedule(r, _get_recent_scan_ids(conn, r['id'])) + for r in rows + ] + conn.close() + + pagination = PaginationMetadata( + page=page, limit=limit, total=total, pages=total_pages, + has_next=page < total_pages, has_prev=page > 1, + ) + return PaginatedResponse(data=items, pagination=pagination) + + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to list schedules: {e}") + + +@router_v1.post("/schedules", response_model=Schedule, status_code=201) +async def create_schedule(request: CreateScheduleRequest): + """Create a new scheduled scan.""" + try: + next_run = compute_next_run( + request.frequency, request.hour, request.minute, + request.day_of_week, request.day_of_month, + ) + next_run_str = next_run.strftime('%Y-%m-%d %H:%M:%S') + + conn = get_connection() + conn.execute(""" + INSERT INTO scheduled_scans ( + origin, country, window_months, seat_class, adults, + label, frequency, hour, minute, day_of_week, day_of_month, + enabled, next_run_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ?) + """, ( + request.origin, request.country, request.window_months, + request.seat_class, request.adults, request.label, + request.frequency, request.hour, request.minute, + request.day_of_week, request.day_of_month, next_run_str, + )) + conn.commit() + sched_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0] + + row = conn.execute( + "SELECT * FROM scheduled_scans WHERE id = ?", (sched_id,) + ).fetchone() + result = _row_to_schedule(row, []) + conn.close() + return result + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to create schedule: {e}") + + +@router_v1.get("/schedules/{schedule_id}", response_model=Schedule) +async def get_schedule(schedule_id: int): + """Get a single schedule by ID, including its last 5 scan IDs.""" + try: + conn = get_connection() + row = conn.execute( + "SELECT * FROM scheduled_scans WHERE id = ?", (schedule_id,) + ).fetchone() + + if not row: + conn.close() + raise HTTPException(status_code=404, detail=f"Schedule not found: {schedule_id}") + + recent = _get_recent_scan_ids(conn, schedule_id) + result = _row_to_schedule(row, recent) + conn.close() + return result + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to get schedule: {e}") + + +@router_v1.patch("/schedules/{schedule_id}", response_model=Schedule) +async def update_schedule(schedule_id: int, request: UpdateScheduleRequest): + """Update schedule fields. Recomputes next_run_at if schedule params change.""" + try: + conn = get_connection() + row = conn.execute( + "SELECT * FROM scheduled_scans WHERE id = ?", (schedule_id,) + ).fetchone() + + if not row: + conn.close() + raise HTTPException(status_code=404, detail=f"Schedule not found: {schedule_id}") + + # Merge updates on top of existing values + frequency = request.frequency if request.frequency is not None else row['frequency'] + hour = request.hour if request.hour is not None else row['hour'] + minute = request.minute if request.minute is not None else row['minute'] + day_of_week = request.day_of_week if request.day_of_week is not None else row['day_of_week'] + day_of_month = request.day_of_month if request.day_of_month is not None else row['day_of_month'] + + next_run = compute_next_run(frequency, hour, minute, day_of_week, day_of_month) + next_run_str = next_run.strftime('%Y-%m-%d %H:%M:%S') + + enabled_val = int(request.enabled) if request.enabled is not None else row['enabled'] + label_val = request.label if request.label is not None else row['label'] + wm_val = request.window_months if request.window_months is not None else row['window_months'] + sc_val = request.seat_class if request.seat_class is not None else row['seat_class'] + adults_val = request.adults if request.adults is not None else row['adults'] + + conn.execute(""" + UPDATE scheduled_scans + SET enabled = ?, label = ?, frequency = ?, hour = ?, minute = ?, + day_of_week = ?, day_of_month = ?, window_months = ?, + seat_class = ?, adults = ?, next_run_at = ? + WHERE id = ? + """, ( + enabled_val, label_val, frequency, hour, minute, + day_of_week, day_of_month, wm_val, sc_val, adults_val, + next_run_str, schedule_id, + )) + conn.commit() + + updated_row = conn.execute( + "SELECT * FROM scheduled_scans WHERE id = ?", (schedule_id,) + ).fetchone() + recent = _get_recent_scan_ids(conn, schedule_id) + result = _row_to_schedule(updated_row, recent) + conn.close() + return result + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to update schedule: {e}") + + +@router_v1.delete("/schedules/{schedule_id}", status_code=204) +async def delete_schedule(schedule_id: int): + """Delete a schedule. Associated scans are kept with scheduled_scan_id set to NULL.""" + try: + conn = get_connection() + row = conn.execute( + "SELECT id FROM scheduled_scans WHERE id = ?", (schedule_id,) + ).fetchone() + + if not row: + conn.close() + raise HTTPException(status_code=404, detail=f"Schedule not found: {schedule_id}") + + # Nullify FK in scans before deleting (SQLite FK cascade may not be set) + conn.execute( + "UPDATE scans SET scheduled_scan_id = NULL WHERE scheduled_scan_id = ?", + (schedule_id,) + ) + conn.execute("DELETE FROM scheduled_scans WHERE id = ?", (schedule_id,)) + conn.commit() + conn.close() + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to delete schedule: {e}") + + +@router_v1.post("/schedules/{schedule_id}/run-now") +async def run_schedule_now(schedule_id: int): + """Trigger a scheduled scan immediately, ignoring next_run_at.""" + try: + conn = get_connection() + row = conn.execute( + "SELECT * FROM scheduled_scans WHERE id = ?", (schedule_id,) + ).fetchone() + + if not row: + conn.close() + raise HTTPException(status_code=404, detail=f"Schedule not found: {schedule_id}") + + start_date = (date.today() + timedelta(days=1)).isoformat() + end_dt = date.today() + timedelta(days=1) + timedelta(days=30 * row['window_months']) + end_date = end_dt.isoformat() + + conn.execute(""" + INSERT INTO scans ( + origin, country, start_date, end_date, + status, seat_class, adults, scheduled_scan_id + ) VALUES (?, ?, ?, ?, 'pending', ?, ?, ?) + """, ( + row['origin'], row['country'], start_date, end_date, + row['seat_class'], row['adults'], schedule_id, + )) + conn.commit() + scan_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0] + conn.close() + + start_scan_processor(scan_id) + logging.info(f"Schedule {schedule_id}: manual run-now fired scan {scan_id}") + + return {"scan_id": scan_id} + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to run schedule: {e}") + + # ============================================================================= # Include Router (IMPORTANT!) # ============================================================================= diff --git a/flight-comparator/database/init_db.py b/flight-comparator/database/init_db.py index 77a621c..d024102 100644 --- a/flight-comparator/database/init_db.py +++ b/flight-comparator/database/init_db.py @@ -167,6 +167,29 @@ def _migrate_add_routes_unique_index(conn, verbose=True): print(" ✅ Migration complete: uq_routes_scan_dest index created") +def _migrate_add_scheduled_scan_id_to_scans(conn, verbose=True): + """ + Migration: add scheduled_scan_id column to scans table. + + Existing rows get NULL (manual scans). New column has no inline FK + declaration because SQLite's ALTER TABLE ADD COLUMN doesn't support it; + the relationship is enforced at the application level. + """ + cursor = conn.execute("PRAGMA table_info(scans)") + columns = [row[1] for row in cursor.fetchall()] + if 'scheduled_scan_id' in columns: + return # Already migrated + + if verbose: + print(" 🔄 Migrating scans table: adding scheduled_scan_id column...") + + conn.execute("ALTER TABLE scans ADD COLUMN scheduled_scan_id INTEGER") + conn.commit() + + if verbose: + print(" ✅ Migration complete: scheduled_scan_id column added to scans") + + def initialize_database(db_path=None, verbose=True): """ Initialize or migrate the database. @@ -212,6 +235,7 @@ def initialize_database(db_path=None, verbose=True): # Apply migrations before running schema _migrate_relax_country_constraint(conn, verbose) _migrate_add_routes_unique_index(conn, verbose) + _migrate_add_scheduled_scan_id_to_scans(conn, verbose) # Load and execute schema schema_sql = load_schema() diff --git a/flight-comparator/database/schema.sql b/flight-comparator/database/schema.sql index a2d9433..c39a2a3 100644 --- a/flight-comparator/database/schema.sql +++ b/flight-comparator/database/schema.sql @@ -45,6 +45,9 @@ CREATE TABLE IF NOT EXISTS scans ( seat_class TEXT DEFAULT 'economy', adults INTEGER DEFAULT 1 CHECK(adults > 0 AND adults <= 9), + -- FK to scheduled_scans (NULL for manual scans) + scheduled_scan_id INTEGER, + -- Constraints across columns CHECK(end_date >= start_date), CHECK(routes_scanned <= total_routes OR total_routes = 0) @@ -61,6 +64,10 @@ CREATE INDEX IF NOT EXISTS idx_scans_status CREATE INDEX IF NOT EXISTS idx_scans_created_at ON scans(created_at DESC); -- For recent scans query +CREATE INDEX IF NOT EXISTS idx_scans_scheduled_scan_id + ON scans(scheduled_scan_id) + WHERE scheduled_scan_id IS NOT NULL; + -- ============================================================================ -- Table: routes -- Purpose: Store discovered routes with flight statistics @@ -244,7 +251,9 @@ ORDER BY created_at ASC; -- Initial Data: None (tables start empty) -- ============================================================================ +-- ============================================================================ -- Schema version tracking (for future migrations) +-- ============================================================================ CREATE TABLE IF NOT EXISTS schema_version ( version INTEGER PRIMARY KEY, applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, @@ -254,6 +263,64 @@ CREATE TABLE IF NOT EXISTS schema_version ( INSERT OR IGNORE INTO schema_version (version, description) VALUES (1, 'Initial web app schema with scans and routes tables'); +-- ============================================================================ +-- Table: scheduled_scans +-- Purpose: Define recurring scan schedules (daily / weekly / monthly) +-- ============================================================================ +CREATE TABLE IF NOT EXISTS scheduled_scans ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + + -- Scan parameters (same as scans table) + origin TEXT NOT NULL CHECK(length(origin) = 3), + country TEXT NOT NULL CHECK(length(country) >= 2), + window_months INTEGER NOT NULL DEFAULT 1 + CHECK(window_months >= 1 AND window_months <= 12), + seat_class TEXT NOT NULL DEFAULT 'economy', + adults INTEGER NOT NULL DEFAULT 1 + CHECK(adults > 0 AND adults <= 9), + + -- Schedule definition + frequency TEXT NOT NULL + CHECK(frequency IN ('daily', 'weekly', 'monthly')), + hour INTEGER NOT NULL DEFAULT 6 + CHECK(hour >= 0 AND hour <= 23), + minute INTEGER NOT NULL DEFAULT 0 + CHECK(minute >= 0 AND minute <= 59), + day_of_week INTEGER CHECK(day_of_week >= 0 AND day_of_week <= 6), + day_of_month INTEGER CHECK(day_of_month >= 1 AND day_of_month <= 28), + + -- State + enabled INTEGER NOT NULL DEFAULT 1, + label TEXT, + last_run_at TIMESTAMP, + next_run_at TIMESTAMP NOT NULL, + + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + + -- Frequency-specific field requirements + CHECK( + (frequency = 'weekly' AND day_of_week IS NOT NULL) OR + (frequency = 'monthly' AND day_of_month IS NOT NULL) OR + (frequency = 'daily') + ) +); + +-- Fast lookup of due schedules (partial index on enabled rows only) +CREATE INDEX IF NOT EXISTS idx_scheduled_scans_next_run + ON scheduled_scans(next_run_at) + WHERE enabled = 1; + +-- Auto-update updated_at on every PATCH +CREATE TRIGGER IF NOT EXISTS update_scheduled_scans_timestamp +AFTER UPDATE ON scheduled_scans +FOR EACH ROW BEGIN + UPDATE scheduled_scans SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; +END; + +INSERT OR IGNORE INTO schema_version (version, description) +VALUES (2, 'Add scheduled_scans table'); + -- ============================================================================ -- Verification Queries (for testing) -- ============================================================================ diff --git a/flight-comparator/frontend/src/App.tsx b/flight-comparator/frontend/src/App.tsx index efcf02d..77ea4f2 100644 --- a/flight-comparator/frontend/src/App.tsx +++ b/flight-comparator/frontend/src/App.tsx @@ -3,6 +3,7 @@ import Layout from './components/Layout'; import Dashboard from './pages/Dashboard'; import Scans from './pages/Scans'; import ScanDetails from './pages/ScanDetails'; +import Schedules from './pages/Schedules'; import Airports from './pages/Airports'; import Logs from './pages/Logs'; import ErrorBoundary from './components/ErrorBoundary'; @@ -16,6 +17,7 @@ function App() { } /> } /> } /> + } /> } /> } /> diff --git a/flight-comparator/frontend/src/api.ts b/flight-comparator/frontend/src/api.ts index 2e736bc..cbb8c5f 100644 --- a/flight-comparator/frontend/src/api.ts +++ b/flight-comparator/frontend/src/api.ts @@ -23,6 +23,41 @@ export interface Scan { error_message?: string; seat_class: string; adults: number; + scheduled_scan_id?: number; +} + +export interface Schedule { + id: number; + origin: string; + country: string; + window_months: number; + seat_class: string; + adults: number; + label?: string; + frequency: 'daily' | 'weekly' | 'monthly'; + hour: number; + minute: number; + day_of_week?: number; + day_of_month?: number; + enabled: boolean; + last_run_at?: string; + next_run_at: string; + created_at: string; + recent_scan_ids: number[]; +} + +export interface CreateScheduleRequest { + origin: string; + country: string; + window_months?: number; + seat_class?: string; + adults?: number; + label?: string; + frequency: 'daily' | 'weekly' | 'monthly'; + hour?: number; + minute?: number; + day_of_week?: number; + day_of_month?: number; } export interface Route { @@ -135,6 +170,26 @@ export const airportApi = { }, }; +export const scheduleApi = { + list: (page = 1, limit = 20) => + api.get>('/schedules', { params: { page, limit } }), + + get: (id: number) => + api.get(`/schedules/${id}`), + + create: (data: CreateScheduleRequest) => + api.post('/schedules', data), + + update: (id: number, data: Partial & { enabled?: boolean }) => + api.patch(`/schedules/${id}`, data), + + delete: (id: number) => + api.delete(`/schedules/${id}`), + + runNow: (id: number) => + api.post<{ scan_id: number }>(`/schedules/${id}/run-now`), +}; + export const logApi = { list: (page = 1, limit = 50, level?: string, search?: string) => { const params: any = { page, limit }; diff --git a/flight-comparator/frontend/src/components/Layout.tsx b/flight-comparator/frontend/src/components/Layout.tsx index d58cb3b..cb3e24d 100644 --- a/flight-comparator/frontend/src/components/Layout.tsx +++ b/flight-comparator/frontend/src/components/Layout.tsx @@ -7,6 +7,7 @@ import { ScrollText, PlaneTakeoff, Plus, + CalendarClock, } from 'lucide-react'; import { cn } from '../lib/utils'; @@ -18,8 +19,9 @@ type NavItem = { const PRIMARY_NAV: NavItem[] = [ { icon: LayoutDashboard, label: 'Dashboard', path: '/' }, - { icon: ScanSearch, label: 'Scans', path: '/scans' }, - { icon: MapPin, label: 'Airports', path: '/airports' }, + { icon: ScanSearch, label: 'Scans', path: '/scans' }, + { icon: CalendarClock, label: 'Schedules', path: '/schedules' }, + { icon: MapPin, label: 'Airports', path: '/airports' }, ]; const SECONDARY_NAV: NavItem[] = [ @@ -32,6 +34,7 @@ function getPageTitle(pathname: string): string { if (pathname === '/') return 'Dashboard'; if (pathname.startsWith('/scans/')) return 'Scan Details'; if (pathname === '/scans') return 'New Scan'; + if (pathname === '/schedules') return 'Schedules'; if (pathname === '/airports') return 'Airports'; if (pathname === '/logs') return 'Logs'; return 'Flight Radar'; diff --git a/flight-comparator/frontend/src/pages/ScanDetails.tsx b/flight-comparator/frontend/src/pages/ScanDetails.tsx index 4fec1a9..c2f26b0 100644 --- a/flight-comparator/frontend/src/pages/ScanDetails.tsx +++ b/flight-comparator/frontend/src/pages/ScanDetails.tsx @@ -1,9 +1,10 @@ import { Fragment, useEffect, useState } from 'react'; -import { useParams, useNavigate } from 'react-router-dom'; +import { useParams, useNavigate, Link } from 'react-router-dom'; import { ArrowLeft, PlaneTakeoff, Calendar, + CalendarClock, Users, Armchair, Clock, @@ -221,6 +222,16 @@ export default function ScanDetails() {

{scan.origin} → {scan.country}

+ {scan.scheduled_scan_id != null && ( + +