File size: 11,812 Bytes
e615aaf
b11d1b5
7934774
f07dae8
f88eda5
b11d1b5
f07dae8
e615aaf
7934774
 
3b0d00b
8d1a80c
7934774
 
bfbdc91
 
 
b11d1b5
 
f07dae8
 
 
 
 
 
 
d6ae08e
 
 
f88eda5
d6ae08e
 
b11d1b5
 
 
a93666a
b11d1b5
 
 
 
7934774
 
 
e615aaf
bfbdc91
 
 
b11d1b5
4f15807
 
0b5acb5
 
 
 
 
 
4f15807
e615aaf
4f15807
 
59275a8
 
b11d1b5
4f15807
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e615aaf
 
4f15807
 
 
 
 
 
 
 
 
a68b13b
4f15807
0b5acb5
 
 
 
 
 
 
e615aaf
 
 
 
59275a8
 
a68b13b
 
 
 
 
 
4f15807
 
 
 
53b2d2f
e615aaf
53b2d2f
 
 
 
e615aaf
 
 
 
 
 
a68b13b
 
e615aaf
a68b13b
 
 
 
 
f07dae8
 
 
 
0b5acb5
 
 
 
f07dae8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e615aaf
 
f88eda5
8d1a80c
f88eda5
d6ae08e
 
0b5acb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f88eda5
0b5acb5
f88eda5
d6ae08e
 
 
8d1a80c
 
 
f88eda5
 
 
 
 
 
59275a8
f88eda5
59275a8
 
f88eda5
 
 
 
 
 
 
 
 
3b0d00b
 
 
f88eda5
 
 
 
 
 
 
 
d6ae08e
f88eda5
d6ae08e
 
8d1a80c
 
f88eda5
3b0d00b
 
f88eda5
8d1a80c
d6ae08e
f88eda5
 
 
 
 
 
 
3b0d00b
f88eda5
3b0d00b
f88eda5
 
 
 
 
 
 
d6ae08e
8d1a80c
 
 
f88eda5
 
 
3b0d00b
 
8d1a80c
f88eda5
 
 
 
 
 
 
 
 
 
 
 
d6ae08e
 
 
e615aaf
 
d6ae08e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e615aaf
 
d6ae08e
 
e615aaf
7934774
f07dae8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
from fastapi import FastAPI, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
import requests, httpx, asyncio
from geopy.geocoders import Nominatim
import geopy.distance
from cachetools import TTLCache
import os
from dotenv import load_dotenv
from random import sample
from backend.utils import generate_circle_centers, fetch_url

load_dotenv()

app = FastAPI()

loc = Nominatim(user_agent="GetLoc")

class Geodistance(BaseModel):
    lat1: float = Field(..., ge=-90, le=90)
    lon1: float = Field(..., ge=-180, le=180)
    lat2: float = Field(..., ge=-90, le=90)
    lon2: float = Field(..., ge=-180, le=180)
    unit: str = "km"

class NearbyWikiPage(BaseModel):
    lat: float = Field(default=54.163337, ge=-90, le=90)
    lon: float = Field(default=37.561109, ge=-180, le=180)
    radius: int = Field(default=1000, ge=10, le=100_000,description="Distance in meters from the reference point")
    limit: int = Field(10, ge=1, description="Number of pages to return")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Replace with your frontend domain in prod
    allow_credentials=False,
    allow_methods=["*"],
    allow_headers=["*"],
)

BACKEND_WIKI_CACHE_TTL = int(os.getenv("BACKEND_WIKI_CACHE_TTL", 300))
summary_cache = TTLCache(maxsize=100, ttl=BACKEND_WIKI_CACHE_TTL)  # ttl time in seconds, then cache expires
full_page_cache = TTLCache(maxsize=100, ttl=BACKEND_WIKI_CACHE_TTL)

@app.get("/")
def health_check():
    return {"status": "ok"}

@app.get("/wiki/search/summary/{summary_page_name}")
async def get_wiki_summary(summary_page_name: str, background_tasks: BackgroundTasks):
    """

        This function fetches the summary of a Wikipedia page along with its geographical coordinates.

        It also caches the result in ephemeral in-memory cache in the background.

        Input: summary_page_name: str - Name of the Wikipedia page to fetch summary for.

        Output: {"title": "Page Title", "content": "Summary content here", "latitude": float, "longitude": float9}

    """
    if summary_page_name in summary_cache:
        # print("Cache hit for summary:", page_name) #Working
        return JSONResponse(content=summary_cache[summary_page_name], status_code=200)
    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{summary_page_name}", timeout=10)

        if response.status_code != 200:
            return JSONResponse(
                content={"error": "Page not found"},
                status_code=404
            )
        try:
            coords = loc.geocode(summary_page_name, timeout=5)
        except Exception as e:
            coords = None
        
        result = {
                "title": summary_page_name,
                "content": f"{response.json().get('extract', 'No content available')}",
                "latitude": coords.latitude if coords else None,
                "longitude": coords.longitude if coords else None
            }
        
        background_tasks.add_task(lambda: summary_cache.__setitem__(summary_page_name, result))


        return JSONResponse(
            content= result,
            status_code=200
        )
    except Exception as e:
        return JSONResponse(
            content={"error": str(e), 'response': str(response)},
            status_code=500
        )

@app.get("/wiki/search/full/{full_page}")
async def search_wiki_full_page(full_page: str, background_tasks: BackgroundTasks):
    """

        This function fetches the full content of a Wikipedia page along with its geographical coordinates. 

        It also caches the result in ephemeral in-memory cache in the background.

        Input: full_page: str - Name of the Wikipedia page to fetch full content for.

        Output: {"title": "Page Title", "content": "Full content here", "latitude": float, "longitude": float}

    """
    if full_page in full_page_cache:
        # print("Cache hit for full_page:", full_page) #Working
        return JSONResponse(content=full_page_cache[full_page], status_code=200)
    
    async with httpx.AsyncClient() as client:
        response = await client.get(f"https://en.wikipedia.org/wiki/{full_page}", timeout=10)
    try:
        if response.status_code != 200:
            return JSONResponse(
                content={"error": "Page not found"},
                status_code=404
            )
        try:
            coords = loc.geocode(full_page, timeout=5)
        except Exception as e:
            coords = None

        result = {
                        "title": full_page, 
                        "content": str(response.text),
                        "latitude": coords.latitude if coords else None,
                        "longitude": coords.longitude if coords else None
                }
        
        background_tasks.add_task(lambda: full_page_cache.__setitem__(full_page, result))

        return JSONResponse(
            content= result,
            status_code=200
        )
    
    except Exception as e:
        return JSONResponse(
            content={"error": str(e), 'response': str(response)},
            status_code=500
        )


@app.post("/geodistance")
def get_geodistance(payload: Geodistance):
    """

        Input: "lat1", "lon1", "lat2", "lon2", "unit (km/mi)"

        Output: {"distance": float, "unit": str, "lat1": float, "lon1": float, "lat2": float, "lon2": float}

    """
    lat1, lon1 = payload.lat1, payload.lon1
    lat2, lon2 = payload.lat2, payload.lon2
    unit = payload.unit

    try:
        distance_km = geopy.distance.distance((lat1, lon1), (lat2, lon2)).km
        if unit == "km":
            distance = distance_km
        elif unit == "mi":
            distance = distance_km * 0.621371
        else:
            return JSONResponse(
                content={"error": "Invalid unit"},
                status_code=400
            )
        
    except Exception as e:
        return JSONResponse(
            content={"error": str(e)},
            status_code=500
        )
    return JSONResponse(
        content={
            "distance": distance,
            "unit": unit,
            "lat1": lat1,
            "lon1": lon1,
            "lat2": lat2,
            "lon2": lon2
        },
        status_code=200
    )



    
@app.post("/wiki/nearby")
async def get_nearby_wiki_pages(payload: NearbyWikiPage):
    """

    Returns a list of wikipedia pages whose geographical coordinates are within a specified radius from a given location.

    Input:

    - lat: Latitude of the reference point

    - lon: Longitude of the reference point

    - radius: Radius in meters within which to search for pages

    - limit: Maximum number of pages to return



    Output:

        {

            "pages": [

                {

                    "pageid": 123456,

                    "title": "Page Title",

                    "lat": 54.163337,

                    "lon": 37.561109,

                    "dist": 123.45  # Dist. in meters from the reference point

                    ...

                },

                ...

            ],

            "count": 10 #Total no. of such pages

        }

    Example raw respone from Wikipedia API: https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord=40.7128%7C-74.0060&gsradius=10000&gslimit=1&format=json

    """
    lat_center, lon_center = payload.lat, payload.lon
    radius = payload.radius
    limit = payload.limit

    wiki_geosearch_radius_limit_meters = 10000 # Wikipedia API limit for geosearch radius in meters

    if radius <= wiki_geosearch_radius_limit_meters:
        url = ("https://en.wikipedia.org/w/api.php"+"?action=query"
                "&list=geosearch"
                f"&gscoord={lat_center}|{lon_center}"
                f"&gsradius={radius}"
                f"&gslimit={limit}"
                "&format=json")

        try:
            async with httpx.AsyncClient() as client:
                response = await client.get(url, timeout=10)
            if response.status_code != 200:
                return JSONResponse(
                    content={"error": "Failed to fetch nearby pages"},
                    status_code=500
                )
            data = response.json()

            pages = data.get("query", {}).get("geosearch", [])

            if len(pages) > limit:
                pages = sample(pages, limit)

            return JSONResponse(
                content={
                    "pages": pages,
                    "count": len(pages)
                },
                status_code=200
            )
        except Exception as e:
            return JSONResponse(
                content={"error": str(e)},
                status_code=500
            )
        
    elif radius > wiki_geosearch_radius_limit_meters:
        all_pages = []

        small_circle_centers = generate_circle_centers(lat_center, lon_center, radius / 1000, small_radius_km=10)
        base_url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord={lat}|{lon}&gsradius={small_radius_km}&gslimit={page_limit}&format=json"
        urls = [base_url.format(lat=center[0], lon=center[1], small_radius_km=wiki_geosearch_radius_limit_meters, page_limit=100) for center in small_circle_centers]

        print("URL Counts:", len(urls))
        try:
            async with httpx.AsyncClient() as client:
                tasks = [fetch_url(client, url) for url in urls]
                results = await asyncio.gather(*tasks)
            
            for result in results:

                for unit in result.get("data", {}).get("query", {}).get("geosearch", []):

                    lat, lon = unit.get("lat"), unit.get("lon")
                    if lat is not None and lon is not None:
                        dist = int(geopy.distance.distance(
                                (lat_center, lon_center), (lat, lon)
                            ).m)
                    else: 
                        dist = None

                    if (not dist) or (dist and dist > radius):
                        continue

                    unit_with_dist = {**unit, "dist": dist}
                    all_pages.append(unit_with_dist)

            if len(all_pages) > limit:
                all_pages = sample(all_pages, limit)

            return JSONResponse(
                content={
                    "pages": all_pages,
                    "count": len(all_pages)
                }
            )
        
        except Exception as e:
            return JSONResponse(
                content={"error": str(e)},
                status_code=500
        )    



@app.get("/random")
def random():
    url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord=54.163337|37.561109&gsradius=10000&gslimit=10&format=json"
    response = requests.get(url, timeout=10)

    if response.status_code != 200:
        return JSONResponse(
            content={"error": "Failed to fetch random page"},
            status_code=500
        )
    data = response.json()
    pages = data.get("query", {}).get("geosearch", [])
    if not pages:
        return JSONResponse(
            content={"error": "No pages found"},
            status_code=404
        )
     
    return JSONResponse(
        content={
            "pages": pages,
            "count": len(pages)
        },
        status_code=200
    )