fix: improve airport search ranking and add missing modern airports

- Rewrite airport search to use priority buckets instead of simple
  append: exact IATA → IATA prefix → city prefix → city contains →
  name prefix → name contains → country match. This ensures BER
  appears before Berlin-Schönefeld when typing "BER".
- Add _MISSING_AIRPORTS patch list to get_airport_data() so airports
  absent from the OpenFlights dataset (e.g. BER opened Nov 2020,
  IST new Istanbul airport) are included at runtime.
- Deduplicate results via seen-set to avoid duplicate entries.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-27 15:48:43 +01:00
parent 260f3aa196
commit 8bd47ac43a

View File

@@ -880,40 +880,56 @@ async def search_airports(
raise HTTPException(status_code=500, detail=f"Failed to load airport data: {e}") raise HTTPException(status_code=500, detail=f"Failed to load airport data: {e}")
query = q.lower().strip() query = q.lower().strip()
results = []
# Search all airports # Priority buckets — higher bucket = shown first
p0_exact_iata: list[Airport] = [] # IATA == query exactly (e.g. "BER")
p1_iata_prefix: list[Airport] = [] # IATA starts with query (e.g. "BE" → BER)
p2_city_prefix: list[Airport] = [] # city starts with query (e.g. "ber" → Berlin)
p3_city_contains: list[Airport] = [] # city contains query
p4_name_prefix: list[Airport] = [] # name starts with query
p5_name_contains: list[Airport] = [] # name contains query
p6_country: list[Airport] = [] # country code contains query
seen: set[str] = set()
for airport in airports_data: for airport in airports_data:
# Skip invalid airport data (data quality issues in OpenFlights dataset)
try: try:
# Search in IATA code (exact match prioritized) iata_l = airport['iata'].lower()
if airport['iata'].lower() == query: city_l = airport.get('city', '').lower()
results.insert(0, Airport(**airport)) # Exact match at top name_l = airport['name'].lower()
country_l = airport.get('country', '').lower()
if iata_l in seen:
continue continue
# Search in IATA code (partial match) obj = Airport(**airport)
if query in airport['iata'].lower():
results.append(Airport(**airport)) if iata_l == query:
p0_exact_iata.append(obj)
elif iata_l.startswith(query):
p1_iata_prefix.append(obj)
elif city_l.startswith(query):
p2_city_prefix.append(obj)
elif query in city_l:
p3_city_contains.append(obj)
elif name_l.startswith(query):
p4_name_prefix.append(obj)
elif query in name_l:
p5_name_contains.append(obj)
elif query in country_l:
p6_country.append(obj)
else:
continue continue
# Search in city name seen.add(iata_l)
if query in airport.get('city', '').lower():
results.append(Airport(**airport))
continue
# Search in airport name
if query in airport['name'].lower():
results.append(Airport(**airport))
continue
# Search in country code
if query in airport['country'].lower():
results.append(Airport(**airport))
continue
except Exception: except Exception:
# Skip airports with invalid data (e.g., invalid IATA codes like 'DU9') # Skip airports with invalid data (e.g., invalid IATA codes like 'DU9')
continue continue
results = (
p0_exact_iata + p1_iata_prefix + p2_city_prefix +
p3_city_contains + p4_name_prefix + p5_name_contains + p6_country
)
# Calculate pagination # Calculate pagination
total = len(results) total = len(results)
total_pages = math.ceil(total / limit) if total > 0 else 0 total_pages = math.ceil(total / limit) if total > 0 else 0
@@ -1607,6 +1623,13 @@ app.include_router(router_v1)
# Helper Functions # Helper Functions
# ============================================================================= # =============================================================================
# Airports missing from the OpenFlights dataset (opened/renamed after dataset freeze)
_MISSING_AIRPORTS = [
{'iata': 'BER', 'name': 'Berlin Brandenburg Airport', 'city': 'Berlin', 'country': 'DE'},
{'iata': 'IST', 'name': 'Istanbul Airport', 'city': 'Istanbul', 'country': 'TR'},
]
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def get_airport_data(): def get_airport_data():
""" """
@@ -1637,6 +1660,19 @@ def get_airport_data():
'longitude': airport.get('lon', 0.0), 'longitude': airport.get('lon', 0.0),
}) })
# Patch in modern airports missing from the OpenFlights dataset
existing_iatas = {a['iata'] for a in airports}
for extra in _MISSING_AIRPORTS:
if extra['iata'] not in existing_iatas:
airports.append({
'iata': extra['iata'],
'name': extra['name'],
'city': extra['city'],
'country': extra['country'],
'latitude': 0.0,
'longitude': 0.0,
})
return airports return airports