Spaces:
Runtime error
Runtime error
from fastapi import FastAPI, BackgroundTasks | |
from fastapi.middleware.cors import CORSMiddleware | |
from fastapi.responses import JSONResponse | |
from pydantic import BaseModel, Field | |
import requests, httpx, asyncio | |
from geopy.geocoders import Nominatim | |
import geopy.distance | |
from cachetools import TTLCache | |
import os | |
from dotenv import load_dotenv | |
from random import sample | |
from backend.utils import generate_circle_centers, fetch_url | |
load_dotenv() | |
app = FastAPI() | |
loc = Nominatim(user_agent="GetLoc") | |
class Geodistance(BaseModel): | |
lat1: float = Field(..., ge=-90, le=90) | |
lon1: float = Field(..., ge=-180, le=180) | |
lat2: float = Field(..., ge=-90, le=90) | |
lon2: float = Field(..., ge=-180, le=180) | |
unit: str = "km" | |
class NearbyWikiPage(BaseModel): | |
lat: float = Field(default=54.163337, ge=-90, le=90) | |
lon: float = Field(default=37.561109, ge=-180, le=180) | |
radius: int = Field(default=1000, ge=10, le=100_000,description="Distance in meters from the reference point") | |
limit: int = Field(10, ge=1, description="Number of pages to return") | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], # Replace with your frontend domain in prod | |
allow_credentials=False, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
BACKEND_WIKI_CACHE_TTL = int(os.getenv("BACKEND_WIKI_CACHE_TTL", 300)) | |
summary_cache = TTLCache(maxsize=100, ttl=BACKEND_WIKI_CACHE_TTL) # ttl time in seconds, then cache expires | |
full_page_cache = TTLCache(maxsize=100, ttl=BACKEND_WIKI_CACHE_TTL) | |
def health_check(): | |
return {"status": "ok"} | |
async def get_wiki_summary(summary_page_name: str, background_tasks: BackgroundTasks): | |
""" | |
This function fetches the summary of a Wikipedia page along with its geographical coordinates. | |
It also caches the result in ephemeral in-memory cache in the background. | |
Input: summary_page_name: str - Name of the Wikipedia page to fetch summary for. | |
Output: {"title": "Page Title", "content": "Summary content here", "latitude": float, "longitude": float9} | |
""" | |
if summary_page_name in summary_cache: | |
# print("Cache hit for summary:", page_name) #Working | |
return JSONResponse(content=summary_cache[summary_page_name], status_code=200) | |
try: | |
async with httpx.AsyncClient() as client: | |
response = await client.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{summary_page_name}", timeout=10) | |
if response.status_code != 200: | |
return JSONResponse( | |
content={"error": "Page not found"}, | |
status_code=404 | |
) | |
try: | |
coords = loc.geocode(summary_page_name, timeout=5) | |
except Exception as e: | |
coords = None | |
result = { | |
"title": summary_page_name, | |
"content": f"{response.json().get('extract', 'No content available')}", | |
"latitude": coords.latitude if coords else None, | |
"longitude": coords.longitude if coords else None | |
} | |
background_tasks.add_task(lambda: summary_cache.__setitem__(summary_page_name, result)) | |
return JSONResponse( | |
content= result, | |
status_code=200 | |
) | |
except Exception as e: | |
return JSONResponse( | |
content={"error": str(e), 'response': str(response)}, | |
status_code=500 | |
) | |
async def search_wiki_full_page(full_page: str, background_tasks: BackgroundTasks): | |
""" | |
This function fetches the full content of a Wikipedia page along with its geographical coordinates. | |
It also caches the result in ephemeral in-memory cache in the background. | |
Input: full_page: str - Name of the Wikipedia page to fetch full content for. | |
Output: {"title": "Page Title", "content": "Full content here", "latitude": float, "longitude": float} | |
""" | |
if full_page in full_page_cache: | |
# print("Cache hit for full_page:", full_page) #Working | |
return JSONResponse(content=full_page_cache[full_page], status_code=200) | |
async with httpx.AsyncClient() as client: | |
response = await client.get(f"https://en.wikipedia.org/wiki/{full_page}", timeout=10) | |
try: | |
if response.status_code != 200: | |
return JSONResponse( | |
content={"error": "Page not found"}, | |
status_code=404 | |
) | |
try: | |
coords = loc.geocode(full_page, timeout=5) | |
except Exception as e: | |
coords = None | |
result = { | |
"title": full_page, | |
"content": str(response.text), | |
"latitude": coords.latitude if coords else None, | |
"longitude": coords.longitude if coords else None | |
} | |
background_tasks.add_task(lambda: full_page_cache.__setitem__(full_page, result)) | |
return JSONResponse( | |
content= result, | |
status_code=200 | |
) | |
except Exception as e: | |
return JSONResponse( | |
content={"error": str(e), 'response': str(response)}, | |
status_code=500 | |
) | |
def get_geodistance(payload: Geodistance): | |
""" | |
Input: "lat1", "lon1", "lat2", "lon2", "unit (km/mi)" | |
Output: {"distance": float, "unit": str, "lat1": float, "lon1": float, "lat2": float, "lon2": float} | |
""" | |
lat1, lon1 = payload.lat1, payload.lon1 | |
lat2, lon2 = payload.lat2, payload.lon2 | |
unit = payload.unit | |
try: | |
distance_km = geopy.distance.distance((lat1, lon1), (lat2, lon2)).km | |
if unit == "km": | |
distance = distance_km | |
elif unit == "mi": | |
distance = distance_km * 0.621371 | |
else: | |
return JSONResponse( | |
content={"error": "Invalid unit"}, | |
status_code=400 | |
) | |
except Exception as e: | |
return JSONResponse( | |
content={"error": str(e)}, | |
status_code=500 | |
) | |
return JSONResponse( | |
content={ | |
"distance": distance, | |
"unit": unit, | |
"lat1": lat1, | |
"lon1": lon1, | |
"lat2": lat2, | |
"lon2": lon2 | |
}, | |
status_code=200 | |
) | |
async def get_nearby_wiki_pages(payload: NearbyWikiPage): | |
""" | |
Returns a list of wikipedia pages whose geographical coordinates are within a specified radius from a given location. | |
Input: | |
- lat: Latitude of the reference point | |
- lon: Longitude of the reference point | |
- radius: Radius in meters within which to search for pages | |
- limit: Maximum number of pages to return | |
Output: | |
{ | |
"pages": [ | |
{ | |
"pageid": 123456, | |
"title": "Page Title", | |
"lat": 54.163337, | |
"lon": 37.561109, | |
"dist": 123.45 # Dist. in meters from the reference point | |
... | |
}, | |
... | |
], | |
"count": 10 #Total no. of such pages | |
} | |
Example raw respone from Wikipedia API: https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord=40.7128%7C-74.0060&gsradius=10000&gslimit=1&format=json | |
""" | |
lat_center, lon_center = payload.lat, payload.lon | |
radius = payload.radius | |
limit = payload.limit | |
wiki_geosearch_radius_limit_meters = 10000 # Wikipedia API limit for geosearch radius in meters | |
if radius <= wiki_geosearch_radius_limit_meters: | |
url = ("https://en.wikipedia.org/w/api.php"+"?action=query" | |
"&list=geosearch" | |
f"&gscoord={lat_center}|{lon_center}" | |
f"&gsradius={radius}" | |
f"&gslimit={limit}" | |
"&format=json") | |
try: | |
async with httpx.AsyncClient() as client: | |
response = await client.get(url, timeout=10) | |
if response.status_code != 200: | |
return JSONResponse( | |
content={"error": "Failed to fetch nearby pages"}, | |
status_code=500 | |
) | |
data = response.json() | |
pages = data.get("query", {}).get("geosearch", []) | |
if len(pages) > limit: | |
pages = sample(pages, limit) | |
return JSONResponse( | |
content={ | |
"pages": pages, | |
"count": len(pages) | |
}, | |
status_code=200 | |
) | |
except Exception as e: | |
return JSONResponse( | |
content={"error": str(e)}, | |
status_code=500 | |
) | |
elif radius > wiki_geosearch_radius_limit_meters: | |
all_pages = [] | |
small_circle_centers = generate_circle_centers(lat_center, lon_center, radius / 1000, small_radius_km=10) | |
base_url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord={lat}|{lon}&gsradius={small_radius_km}&gslimit={page_limit}&format=json" | |
urls = [base_url.format(lat=center[0], lon=center[1], small_radius_km=wiki_geosearch_radius_limit_meters, page_limit=100) for center in small_circle_centers] | |
print("URL Counts:", len(urls)) | |
try: | |
async with httpx.AsyncClient() as client: | |
tasks = [fetch_url(client, url) for url in urls] | |
results = await asyncio.gather(*tasks) | |
for result in results: | |
for unit in result.get("data", {}).get("query", {}).get("geosearch", []): | |
lat, lon = unit.get("lat"), unit.get("lon") | |
if lat is not None and lon is not None: | |
dist = int(geopy.distance.distance( | |
(lat_center, lon_center), (lat, lon) | |
).m) | |
else: | |
dist = None | |
if (not dist) or (dist and dist > radius): | |
continue | |
unit_with_dist = {**unit, "dist": dist} | |
all_pages.append(unit_with_dist) | |
if len(all_pages) > limit: | |
all_pages = sample(all_pages, limit) | |
return JSONResponse( | |
content={ | |
"pages": all_pages, | |
"count": len(all_pages) | |
} | |
) | |
except Exception as e: | |
return JSONResponse( | |
content={"error": str(e)}, | |
status_code=500 | |
) | |
def random(): | |
url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord=54.163337|37.561109&gsradius=10000&gslimit=10&format=json" | |
response = requests.get(url, timeout=10) | |
if response.status_code != 200: | |
return JSONResponse( | |
content={"error": "Failed to fetch random page"}, | |
status_code=500 | |
) | |
data = response.json() | |
pages = data.get("query", {}).get("geosearch", []) | |
if not pages: | |
return JSONResponse( | |
content={"error": "No pages found"}, | |
status_code=404 | |
) | |
return JSONResponse( | |
content={ | |
"pages": pages, | |
"count": len(pages) | |
}, | |
status_code=200 | |
) |