from fastapi import FastAPI, BackgroundTasks from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from pydantic import BaseModel, Field import requests, httpx, asyncio from geopy.geocoders import Nominatim import geopy.distance from cachetools import TTLCache import os from dotenv import load_dotenv from random import sample from backend.utils import generate_circle_centers, fetch_url load_dotenv() app = FastAPI() loc = Nominatim(user_agent="GetLoc") class Geodistance(BaseModel): lat1: float = Field(..., ge=-90, le=90) lon1: float = Field(..., ge=-180, le=180) lat2: float = Field(..., ge=-90, le=90) lon2: float = Field(..., ge=-180, le=180) unit: str = "km" class NearbyWikiPage(BaseModel): lat: float = Field(default=54.163337, ge=-90, le=90) lon: float = Field(default=37.561109, ge=-180, le=180) radius: int = Field(default=1000, ge=10, le=100_000,description="Distance in meters from the reference point") limit: int = Field(10, ge=1, description="Number of pages to return") app.add_middleware( CORSMiddleware, allow_origins=["*"], # Replace with your frontend domain in prod allow_credentials=False, allow_methods=["*"], allow_headers=["*"], ) BACKEND_WIKI_CACHE_TTL = int(os.getenv("BACKEND_WIKI_CACHE_TTL", 300)) summary_cache = TTLCache(maxsize=100, ttl=BACKEND_WIKI_CACHE_TTL) # ttl time in seconds, then cache expires full_page_cache = TTLCache(maxsize=100, ttl=BACKEND_WIKI_CACHE_TTL) @app.get("/") def health_check(): return {"status": "ok"} @app.get("/wiki/search/summary/{summary_page_name}") async def get_wiki_summary(summary_page_name: str, background_tasks: BackgroundTasks): """ This function fetches the summary of a Wikipedia page along with its geographical coordinates. It also caches the result in ephemeral in-memory cache in the background. Input: summary_page_name: str - Name of the Wikipedia page to fetch summary for. Output: {"title": "Page Title", "content": "Summary content here", "latitude": float, "longitude": float9} """ if summary_page_name in summary_cache: # print("Cache hit for summary:", page_name) #Working return JSONResponse(content=summary_cache[summary_page_name], status_code=200) try: async with httpx.AsyncClient() as client: response = await client.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{summary_page_name}", timeout=10) if response.status_code != 200: return JSONResponse( content={"error": "Page not found"}, status_code=404 ) try: coords = loc.geocode(summary_page_name, timeout=5) except Exception as e: coords = None result = { "title": summary_page_name, "content": f"{response.json().get('extract', 'No content available')}", "latitude": coords.latitude if coords else None, "longitude": coords.longitude if coords else None } background_tasks.add_task(lambda: summary_cache.__setitem__(summary_page_name, result)) return JSONResponse( content= result, status_code=200 ) except Exception as e: return JSONResponse( content={"error": str(e), 'response': str(response)}, status_code=500 ) @app.get("/wiki/search/full/{full_page}") async def search_wiki_full_page(full_page: str, background_tasks: BackgroundTasks): """ This function fetches the full content of a Wikipedia page along with its geographical coordinates. It also caches the result in ephemeral in-memory cache in the background. Input: full_page: str - Name of the Wikipedia page to fetch full content for. Output: {"title": "Page Title", "content": "Full content here", "latitude": float, "longitude": float} """ if full_page in full_page_cache: # print("Cache hit for full_page:", full_page) #Working return JSONResponse(content=full_page_cache[full_page], status_code=200) async with httpx.AsyncClient() as client: response = await client.get(f"https://en.wikipedia.org/wiki/{full_page}", timeout=10) try: if response.status_code != 200: return JSONResponse( content={"error": "Page not found"}, status_code=404 ) try: coords = loc.geocode(full_page, timeout=5) except Exception as e: coords = None result = { "title": full_page, "content": str(response.text), "latitude": coords.latitude if coords else None, "longitude": coords.longitude if coords else None } background_tasks.add_task(lambda: full_page_cache.__setitem__(full_page, result)) return JSONResponse( content= result, status_code=200 ) except Exception as e: return JSONResponse( content={"error": str(e), 'response': str(response)}, status_code=500 ) @app.post("/geodistance") def get_geodistance(payload: Geodistance): """ Input: "lat1", "lon1", "lat2", "lon2", "unit (km/mi)" Output: {"distance": float, "unit": str, "lat1": float, "lon1": float, "lat2": float, "lon2": float} """ lat1, lon1 = payload.lat1, payload.lon1 lat2, lon2 = payload.lat2, payload.lon2 unit = payload.unit try: distance_km = geopy.distance.distance((lat1, lon1), (lat2, lon2)).km if unit == "km": distance = distance_km elif unit == "mi": distance = distance_km * 0.621371 else: return JSONResponse( content={"error": "Invalid unit"}, status_code=400 ) except Exception as e: return JSONResponse( content={"error": str(e)}, status_code=500 ) return JSONResponse( content={ "distance": distance, "unit": unit, "lat1": lat1, "lon1": lon1, "lat2": lat2, "lon2": lon2 }, status_code=200 ) @app.post("/wiki/nearby") async def get_nearby_wiki_pages(payload: NearbyWikiPage): """ Returns a list of wikipedia pages whose geographical coordinates are within a specified radius from a given location. Input: - lat: Latitude of the reference point - lon: Longitude of the reference point - radius: Radius in meters within which to search for pages - limit: Maximum number of pages to return Output: { "pages": [ { "pageid": 123456, "title": "Page Title", "lat": 54.163337, "lon": 37.561109, "dist": 123.45 # Dist. in meters from the reference point ... }, ... ], "count": 10 #Total no. of such pages } Example raw respone from Wikipedia API: https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord=40.7128%7C-74.0060&gsradius=10000&gslimit=1&format=json """ lat_center, lon_center = payload.lat, payload.lon radius = payload.radius limit = payload.limit wiki_geosearch_radius_limit_meters = 10000 # Wikipedia API limit for geosearch radius in meters if radius <= wiki_geosearch_radius_limit_meters: url = ("https://en.wikipedia.org/w/api.php"+"?action=query" "&list=geosearch" f"&gscoord={lat_center}|{lon_center}" f"&gsradius={radius}" f"&gslimit={limit}" "&format=json") try: async with httpx.AsyncClient() as client: response = await client.get(url, timeout=10) if response.status_code != 200: return JSONResponse( content={"error": "Failed to fetch nearby pages"}, status_code=500 ) data = response.json() pages = data.get("query", {}).get("geosearch", []) if len(pages) > limit: pages = sample(pages, limit) return JSONResponse( content={ "pages": pages, "count": len(pages) }, status_code=200 ) except Exception as e: return JSONResponse( content={"error": str(e)}, status_code=500 ) elif radius > wiki_geosearch_radius_limit_meters: all_pages = [] small_circle_centers = generate_circle_centers(lat_center, lon_center, radius / 1000, small_radius_km=10) base_url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord={lat}|{lon}&gsradius={small_radius_km}&gslimit={page_limit}&format=json" urls = [base_url.format(lat=center[0], lon=center[1], small_radius_km=wiki_geosearch_radius_limit_meters, page_limit=100) for center in small_circle_centers] print("URL Counts:", len(urls)) try: async with httpx.AsyncClient() as client: tasks = [fetch_url(client, url) for url in urls] results = await asyncio.gather(*tasks) for result in results: for unit in result.get("data", {}).get("query", {}).get("geosearch", []): lat, lon = unit.get("lat"), unit.get("lon") if lat is not None and lon is not None: dist = int(geopy.distance.distance( (lat_center, lon_center), (lat, lon) ).m) else: dist = None if (not dist) or (dist and dist > radius): continue unit_with_dist = {**unit, "dist": dist} all_pages.append(unit_with_dist) if len(all_pages) > limit: all_pages = sample(all_pages, limit) return JSONResponse( content={ "pages": all_pages, "count": len(all_pages) } ) except Exception as e: return JSONResponse( content={"error": str(e)}, status_code=500 ) @app.get("/random") def random(): url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord=54.163337|37.561109&gsradius=10000&gslimit=10&format=json" response = requests.get(url, timeout=10) if response.status_code != 200: return JSONResponse( content={"error": "Failed to fetch random page"}, status_code=500 ) data = response.json() pages = data.get("query", {}).get("geosearch", []) if not pages: return JSONResponse( content={"error": "No pages found"}, status_code=404 ) return JSONResponse( content={ "pages": pages, "count": len(pages) }, status_code=200 )