Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,6 @@ import requests
|
|
4 |
import os
|
5 |
import pandas as pd
|
6 |
import folium
|
7 |
-
from folium import plugins
|
8 |
from geopy.geocoders import Nominatim
|
9 |
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
|
10 |
import time
|
@@ -12,6 +11,10 @@ import random
|
|
12 |
from typing import List, Tuple, Optional
|
13 |
import io
|
14 |
|
|
|
|
|
|
|
|
|
15 |
# Geocoding Service
|
16 |
class GeocodingService:
|
17 |
def __init__(self, user_agent: str = None, timeout: int = 10, rate_limit: float = 1.1):
|
@@ -89,86 +92,55 @@ class GeocodingService:
|
|
89 |
# Mapping Functions
|
90 |
def create_location_map(df: pd.DataFrame,
|
91 |
coordinates_col: str = 'coordinates',
|
92 |
-
places_col: str = '
|
93 |
title_col: Optional[str] = None) -> folium.Map:
|
94 |
-
"""
|
95 |
-
Create an interactive map with individual markers for all locations.
|
96 |
-
|
97 |
-
Args:
|
98 |
-
df: DataFrame containing coordinates and location names
|
99 |
-
coordinates_col: Name of column containing coordinates
|
100 |
-
places_col: Name of column containing location names
|
101 |
-
title_col: Optional column name for additional marker information
|
102 |
-
|
103 |
-
Returns:
|
104 |
-
folium.Map object with all locations marked individually
|
105 |
-
"""
|
106 |
# Initialize the map
|
107 |
m = folium.Map(location=[0, 0], zoom_start=2)
|
108 |
-
|
109 |
-
# Add fullscreen option
|
110 |
-
plugins.Fullscreen().add_to(m)
|
111 |
-
|
112 |
-
# Add search functionality
|
113 |
-
plugins.Search(
|
114 |
-
layer=None,
|
115 |
-
geom_type="Point",
|
116 |
-
placeholder="Search for a place...",
|
117 |
-
collapsed=True,
|
118 |
-
search_label="name"
|
119 |
-
).add_to(m)
|
120 |
-
|
121 |
-
# Keep track of all valid coordinates for setting bounds
|
122 |
all_coords = []
|
123 |
|
124 |
# Process each row in the DataFrame
|
125 |
for idx, row in df.iterrows():
|
126 |
coordinates = row[coordinates_col]
|
127 |
-
|
128 |
-
# Handle places column - try different methods to parse places
|
129 |
-
try:
|
130 |
-
if pd.notna(row[places_col]):
|
131 |
-
if isinstance(row[places_col], str) and ',' in row[places_col]:
|
132 |
-
places = [p.strip() for p in row[places_col].split(',')]
|
133 |
-
else:
|
134 |
-
places = [str(row[places_col])]
|
135 |
-
else:
|
136 |
-
places = []
|
137 |
-
except Exception as e:
|
138 |
-
print(f"Error processing places for row {idx}: {e}")
|
139 |
-
places = []
|
140 |
-
|
141 |
-
# Get optional title information
|
142 |
title = row[title_col] if title_col and pd.notna(row[title_col]) else None
|
143 |
|
144 |
# Skip if no coordinates
|
145 |
if not coordinates:
|
146 |
continue
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
# Ensure places and coordinates have compatible lengths
|
|
|
149 |
while len(places) < len(coordinates):
|
150 |
places.append(f"Location {len(places) + 1}")
|
151 |
-
|
152 |
-
# Add
|
153 |
for i, coord in enumerate(coordinates):
|
154 |
if coord is not None: # Skip None coordinates
|
155 |
lat, lon = coord
|
156 |
|
157 |
# Get place name safely
|
158 |
-
|
|
|
|
|
|
|
159 |
|
160 |
# Create popup content
|
161 |
popup_content = f"<b>{place_name}</b>"
|
162 |
if title:
|
163 |
popup_content += f"<br>{title}"
|
164 |
|
165 |
-
# Add marker
|
166 |
folium.Marker(
|
167 |
location=[lat, lon],
|
168 |
popup=folium.Popup(popup_content, max_width=300),
|
169 |
tooltip=place_name,
|
170 |
-
# Uncomment for different icons
|
171 |
-
# icon=folium.Icon(color='red', icon='info-sign')
|
172 |
).add_to(m)
|
173 |
|
174 |
all_coords.append([lat, lon])
|
@@ -176,10 +148,6 @@ def create_location_map(df: pd.DataFrame,
|
|
176 |
# If we have coordinates, fit the map bounds to include all points
|
177 |
if all_coords:
|
178 |
m.fit_bounds(all_coords)
|
179 |
-
|
180 |
-
# Add layer control and measure tool
|
181 |
-
folium.LayerControl().add_to(m)
|
182 |
-
plugins.MeasureControl(position='topright', primary_length_unit='kilometers').add_to(m)
|
183 |
|
184 |
return m
|
185 |
|
@@ -209,7 +177,7 @@ def process_excel(file, places_column):
|
|
209 |
return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}", None
|
210 |
|
211 |
# Initialize the geocoding service
|
212 |
-
geocoder = GeocodingService(user_agent="
|
213 |
|
214 |
# Process locations and add coordinates
|
215 |
print(f"Processing locations from column: {places_column}")
|
@@ -245,84 +213,140 @@ def process_excel(file, places_column):
|
|
245 |
print(f"Error processing file: {e}\n{trace}")
|
246 |
return None, f"Error processing file: {str(e)}", None
|
247 |
|
248 |
-
#
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
with gr.Column():
|
254 |
-
excel_file = gr.File(label="Upload Excel File")
|
255 |
-
places_column = gr.Textbox(label="Places Column Name", value="place_of_distribution")
|
256 |
-
process_btn = gr.Button("Process and Map", variant="primary")
|
257 |
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
try:
|
268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
|
|
|
|
275 |
else:
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
|
|
|
|
288 |
|
289 |
-
#
|
290 |
-
|
291 |
-
""
|
292 |
-
Function to create a map directly from an Excel file without using Gradio.
|
293 |
-
Useful for Jupyter notebooks or standalone scripts.
|
294 |
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
|
|
|
|
|
|
319 |
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
|
326 |
-
# For use in Hugging Face Spaces
|
327 |
if __name__ == "__main__":
|
328 |
demo.launch()
|
|
|
4 |
import os
|
5 |
import pandas as pd
|
6 |
import folium
|
|
|
7 |
from geopy.geocoders import Nominatim
|
8 |
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
|
9 |
import time
|
|
|
11 |
from typing import List, Tuple, Optional
|
12 |
import io
|
13 |
|
14 |
+
# NuExtract API configuration
|
15 |
+
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
|
16 |
+
headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"}
|
17 |
+
|
18 |
# Geocoding Service
|
19 |
class GeocodingService:
|
20 |
def __init__(self, user_agent: str = None, timeout: int = 10, rate_limit: float = 1.1):
|
|
|
92 |
# Mapping Functions
|
93 |
def create_location_map(df: pd.DataFrame,
|
94 |
coordinates_col: str = 'coordinates',
|
95 |
+
places_col: str = 'places',
|
96 |
title_col: Optional[str] = None) -> folium.Map:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
# Initialize the map
|
98 |
m = folium.Map(location=[0, 0], zoom_start=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
all_coords = []
|
100 |
|
101 |
# Process each row in the DataFrame
|
102 |
for idx, row in df.iterrows():
|
103 |
coordinates = row[coordinates_col]
|
104 |
+
places_text = row[places_col] if pd.notna(row[places_col]) else ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
title = row[title_col] if title_col and pd.notna(row[title_col]) else None
|
106 |
|
107 |
# Skip if no coordinates
|
108 |
if not coordinates:
|
109 |
continue
|
110 |
|
111 |
+
# Parse places into a list
|
112 |
+
try:
|
113 |
+
places = [p.strip() for p in places_text.split(',') if p.strip()]
|
114 |
+
except:
|
115 |
+
# Fall back to treating it as a single place if splitting fails
|
116 |
+
places = [places_text] if places_text else []
|
117 |
+
|
118 |
# Ensure places and coordinates have compatible lengths
|
119 |
+
# If places is shorter, add placeholder names
|
120 |
while len(places) < len(coordinates):
|
121 |
places.append(f"Location {len(places) + 1}")
|
122 |
+
|
123 |
+
# Add markers for each coordinate
|
124 |
for i, coord in enumerate(coordinates):
|
125 |
if coord is not None: # Skip None coordinates
|
126 |
lat, lon = coord
|
127 |
|
128 |
# Get place name safely
|
129 |
+
if i < len(places):
|
130 |
+
place_name = places[i]
|
131 |
+
else:
|
132 |
+
place_name = f"Location {i + 1}"
|
133 |
|
134 |
# Create popup content
|
135 |
popup_content = f"<b>{place_name}</b>"
|
136 |
if title:
|
137 |
popup_content += f"<br>{title}"
|
138 |
|
139 |
+
# Add marker to the map
|
140 |
folium.Marker(
|
141 |
location=[lat, lon],
|
142 |
popup=folium.Popup(popup_content, max_width=300),
|
143 |
tooltip=place_name,
|
|
|
|
|
144 |
).add_to(m)
|
145 |
|
146 |
all_coords.append([lat, lon])
|
|
|
148 |
# If we have coordinates, fit the map bounds to include all points
|
149 |
if all_coords:
|
150 |
m.fit_bounds(all_coords)
|
|
|
|
|
|
|
|
|
151 |
|
152 |
return m
|
153 |
|
|
|
177 |
return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}", None
|
178 |
|
179 |
# Initialize the geocoding service
|
180 |
+
geocoder = GeocodingService(user_agent="gradio_map_visualization_app")
|
181 |
|
182 |
# Process locations and add coordinates
|
183 |
print(f"Processing locations from column: {places_column}")
|
|
|
213 |
print(f"Error processing file: {e}\n{trace}")
|
214 |
return None, f"Error processing file: {str(e)}", None
|
215 |
|
216 |
+
# NuExtract Functions
|
217 |
+
def extract_info(template, text):
|
218 |
+
try:
|
219 |
+
# Format prompt according to NuExtract-1.5 requirements
|
220 |
+
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
|
|
|
|
|
|
|
|
|
221 |
|
222 |
+
# Call API
|
223 |
+
payload = {
|
224 |
+
"inputs": prompt,
|
225 |
+
"parameters": {
|
226 |
+
"max_new_tokens": 1000,
|
227 |
+
"do_sample": False
|
228 |
+
}
|
229 |
+
}
|
230 |
|
231 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
232 |
+
|
233 |
+
# If the model is loading, inform the user
|
234 |
+
if response.status_code == 503:
|
235 |
+
response_json = response.json()
|
236 |
+
if "error" in response_json and "loading" in response_json["error"]:
|
237 |
+
estimated_time = response_json.get("estimated_time", "unknown")
|
238 |
+
return f"⏳ Model is loading (ETA: {int(float(estimated_time)) if isinstance(estimated_time, (int, float, str)) else 'unknown'} seconds)", "Please try again in a few minutes"
|
239 |
+
|
240 |
+
if response.status_code != 200:
|
241 |
+
return f"❌ API Error: {response.status_code}", response.text
|
242 |
+
|
243 |
+
# Process result
|
244 |
+
result = response.json()
|
245 |
+
|
246 |
+
# Handle different response formats
|
247 |
try:
|
248 |
+
if isinstance(result, list):
|
249 |
+
if len(result) > 0:
|
250 |
+
result_text = result[0].get("generated_text", "")
|
251 |
+
else:
|
252 |
+
return "❌ Empty result list", "{}"
|
253 |
+
else:
|
254 |
+
result_text = str(result)
|
255 |
|
256 |
+
# Split at output marker if present
|
257 |
+
if "<|output|>" in result_text:
|
258 |
+
parts = result_text.split("<|output|>")
|
259 |
+
if len(parts) > 1:
|
260 |
+
json_text = parts[1].strip()
|
261 |
+
else:
|
262 |
+
json_text = result_text
|
263 |
else:
|
264 |
+
json_text = result_text
|
265 |
+
|
266 |
+
# Try to parse as JSON
|
267 |
+
try:
|
268 |
+
extracted = json.loads(json_text)
|
269 |
+
formatted = json.dumps(extracted, indent=2)
|
270 |
+
except json.JSONDecodeError:
|
271 |
+
return "❌ JSON parsing error", json_text
|
272 |
+
|
273 |
+
return "✅ Success", formatted
|
274 |
+
except Exception as inner_e:
|
275 |
+
return f"❌ Error processing result: {str(inner_e)}", "{}"
|
276 |
+
except Exception as e:
|
277 |
+
return f"❌ Error: {str(e)}", "{}"
|
278 |
|
279 |
+
# Create the Gradio interface
|
280 |
+
with gr.Blocks() as demo:
|
281 |
+
gr.Markdown("# Historical Data Analysis Tools")
|
|
|
|
|
282 |
|
283 |
+
with gr.Tabs():
|
284 |
+
with gr.TabItem("Text Extraction"):
|
285 |
+
gr.Markdown("## NuExtract-1.5 Structured Data Extraction")
|
286 |
+
|
287 |
+
with gr.Row():
|
288 |
+
with gr.Column():
|
289 |
+
template = gr.Textbox(
|
290 |
+
label="JSON Template",
|
291 |
+
value='{"earthquake location": "", "dateline location": ""}',
|
292 |
+
lines=5
|
293 |
+
)
|
294 |
+
text = gr.Textbox(
|
295 |
+
label="Text to Extract From",
|
296 |
+
value="Neues Erdbeben in Japan. Aus Tokio wird berichtet, daß in Yokohama bei einem Erdbeben sechs Personen getötet und 22 verwundet, in Tokio vier getötet und 22 verwundet wurden. In Yokohama seien 6VV Häuser zerstört worden. Die telephonische und telegraphische Verbindung zwischen Tokio und Osaka ist unterbrochen worden. Der Trambahnverkehr in Tokio liegt still. Auch der Eisenbahnverkehr zwischen Tokio und Yokohama ist unterbrochen. In Sngamo, einer Vorstadt von Tokio sind Brände ausgebrochen. Ein Eisenbahnzug stürzte in den Vajugawafluß zwischen Gotemba und Tokio. Sechs Züge wurden umgeworfen. Mit dem letzten japanischen Erdbeben sind seit eineinhalb Jahrtausenden bis heute in Japan 229 größere Erdbeben zu verzeichnen gewesen.",
|
297 |
+
lines=8
|
298 |
+
)
|
299 |
+
extract_btn = gr.Button("Extract Information", variant="primary")
|
300 |
+
|
301 |
+
with gr.Column():
|
302 |
+
status = gr.Textbox(label="Status")
|
303 |
+
output = gr.Textbox(label="Output", lines=10)
|
304 |
+
|
305 |
+
extract_btn.click(
|
306 |
+
fn=extract_info,
|
307 |
+
inputs=[template, text],
|
308 |
+
outputs=[status, output]
|
309 |
+
)
|
310 |
|
311 |
+
with gr.TabItem("Geocoding & Mapping"):
|
312 |
+
gr.Markdown("## Location Mapping Tool")
|
313 |
+
|
314 |
+
with gr.Row():
|
315 |
+
with gr.Column():
|
316 |
+
excel_file = gr.File(label="Upload Excel File")
|
317 |
+
places_column = gr.Textbox(label="Places Column Name", value="places")
|
318 |
+
process_btn = gr.Button("Process and Map", variant="primary")
|
319 |
+
|
320 |
+
with gr.Column():
|
321 |
+
map_output = gr.HTML(label="Map Visualization")
|
322 |
+
stats_output = gr.Textbox(label="Statistics", lines=3)
|
323 |
+
processed_file = gr.File(label="Processed Data", visible=True, interactive=False)
|
324 |
+
|
325 |
+
def process_and_map(file, column):
|
326 |
+
if file is None:
|
327 |
+
return None, "Please upload an Excel file", None
|
328 |
+
|
329 |
+
try:
|
330 |
+
map_path, stats, processed_path = process_excel(file, column)
|
331 |
+
|
332 |
+
if map_path and processed_path:
|
333 |
+
with open(map_path, "r") as f:
|
334 |
+
map_html = f.read()
|
335 |
+
|
336 |
+
return map_html, stats, processed_path
|
337 |
+
else:
|
338 |
+
return None, stats, None
|
339 |
+
except Exception as e:
|
340 |
+
import traceback
|
341 |
+
trace = traceback.format_exc()
|
342 |
+
print(f"Error in process_and_map: {e}\n{trace}")
|
343 |
+
return None, f"Error: {str(e)}", None
|
344 |
+
|
345 |
+
process_btn.click(
|
346 |
+
fn=process_and_map,
|
347 |
+
inputs=[excel_file, places_column],
|
348 |
+
outputs=[map_output, stats_output, processed_file]
|
349 |
+
)
|
350 |
|
|
|
351 |
if __name__ == "__main__":
|
352 |
demo.launch()
|