Spaces:
Runtime error
Runtime error
Commit
·
3b0d00b
1
Parent(s):
8d1a80c
Randomize both <10km and >10km nearby searches
Browse files- Since we are anyway making requests to all the urls, there's no point in being stingy with processing them. Let's process all of them and then randomize the results (if #required results < #available results)
- Added randomization to < 10km nearby search as well
main.py
CHANGED
@@ -8,7 +8,7 @@ import geopy.distance
|
|
8 |
from cachetools import TTLCache
|
9 |
import os
|
10 |
from dotenv import load_dotenv
|
11 |
-
from random import
|
12 |
from backend.utils import generate_circle_centers, fetch_url
|
13 |
|
14 |
load_dotenv()
|
@@ -230,6 +230,9 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
230 |
|
231 |
pages = data.get("query", {}).get("geosearch", [])
|
232 |
|
|
|
|
|
|
|
233 |
return JSONResponse(
|
234 |
content={
|
235 |
"pages": pages,
|
@@ -244,12 +247,11 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
244 |
)
|
245 |
|
246 |
elif radius > wiki_geosearch_radius_limit_meters:
|
247 |
-
print(radius)
|
248 |
-
small_circle_centers = generate_circle_centers(lat_center, lon_center, radius / 1000, small_radius_km=10)
|
249 |
all_pages = []
|
|
|
|
|
250 |
base_url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord={lat}|{lon}&gsradius={small_radius_km}&gslimit={page_limit}&format=json"
|
251 |
urls = [base_url.format(lat=center[0], lon=center[1], small_radius_km=wiki_geosearch_radius_limit_meters, page_limit=100) for center in small_circle_centers]
|
252 |
-
shuffle(urls) # If # available pages > # requested pages by user, randomize the results to avoid clustering around a single direction.
|
253 |
|
254 |
print("URL Counts:", len(urls))
|
255 |
try:
|
@@ -259,15 +261,14 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
259 |
|
260 |
# print(results)
|
261 |
for result in results:
|
|
|
262 |
for unit in result.get("data", {}).get("query", {}).get("geosearch", []):
|
263 |
-
|
264 |
-
break
|
265 |
lat, lon = unit.get("lat"), unit.get("lon")
|
266 |
if lat is not None and lon is not None:
|
267 |
dist = int(geopy.distance.distance(
|
268 |
(lat_center, lon_center), (lat, lon)
|
269 |
).m)
|
270 |
-
# print(dist)
|
271 |
else:
|
272 |
dist = None
|
273 |
|
@@ -277,7 +278,8 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
277 |
unit_with_dist = {**unit, "dist": dist}
|
278 |
all_pages.append(unit_with_dist)
|
279 |
|
280 |
-
|
|
|
281 |
|
282 |
return JSONResponse(
|
283 |
content={
|
|
|
8 |
from cachetools import TTLCache
|
9 |
import os
|
10 |
from dotenv import load_dotenv
|
11 |
+
from random import sample
|
12 |
from backend.utils import generate_circle_centers, fetch_url
|
13 |
|
14 |
load_dotenv()
|
|
|
230 |
|
231 |
pages = data.get("query", {}).get("geosearch", [])
|
232 |
|
233 |
+
if len(pages) > limit:
|
234 |
+
pages = sample(pages, limit)
|
235 |
+
|
236 |
return JSONResponse(
|
237 |
content={
|
238 |
"pages": pages,
|
|
|
247 |
)
|
248 |
|
249 |
elif radius > wiki_geosearch_radius_limit_meters:
|
|
|
|
|
250 |
all_pages = []
|
251 |
+
|
252 |
+
small_circle_centers = generate_circle_centers(lat_center, lon_center, radius / 1000, small_radius_km=10)
|
253 |
base_url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord={lat}|{lon}&gsradius={small_radius_km}&gslimit={page_limit}&format=json"
|
254 |
urls = [base_url.format(lat=center[0], lon=center[1], small_radius_km=wiki_geosearch_radius_limit_meters, page_limit=100) for center in small_circle_centers]
|
|
|
255 |
|
256 |
print("URL Counts:", len(urls))
|
257 |
try:
|
|
|
261 |
|
262 |
# print(results)
|
263 |
for result in results:
|
264 |
+
|
265 |
for unit in result.get("data", {}).get("query", {}).get("geosearch", []):
|
266 |
+
|
|
|
267 |
lat, lon = unit.get("lat"), unit.get("lon")
|
268 |
if lat is not None and lon is not None:
|
269 |
dist = int(geopy.distance.distance(
|
270 |
(lat_center, lon_center), (lat, lon)
|
271 |
).m)
|
|
|
272 |
else:
|
273 |
dist = None
|
274 |
|
|
|
278 |
unit_with_dist = {**unit, "dist": dist}
|
279 |
all_pages.append(unit_with_dist)
|
280 |
|
281 |
+
if len(all_pages) > limit:
|
282 |
+
all_pages = sample(all_pages, limit)
|
283 |
|
284 |
return JSONResponse(
|
285 |
content={
|