oberbics commited on
Commit
063d83e
·
verified ·
1 Parent(s): bd09e8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -124
app.py CHANGED
@@ -4,7 +4,6 @@ import requests
4
  import os
5
  import pandas as pd
6
  import folium
7
- from folium import plugins
8
  from geopy.geocoders import Nominatim
9
  from geopy.exc import GeocoderTimedOut, GeocoderServiceError
10
  import time
@@ -12,6 +11,10 @@ import random
12
  from typing import List, Tuple, Optional
13
  import io
14
 
 
 
 
 
15
  # Geocoding Service
16
  class GeocodingService:
17
  def __init__(self, user_agent: str = None, timeout: int = 10, rate_limit: float = 1.1):
@@ -89,86 +92,55 @@ class GeocodingService:
89
  # Mapping Functions
90
  def create_location_map(df: pd.DataFrame,
91
  coordinates_col: str = 'coordinates',
92
- places_col: str = 'place_of_distribution', # Updated to match your column name
93
  title_col: Optional[str] = None) -> folium.Map:
94
- """
95
- Create an interactive map with individual markers for all locations.
96
-
97
- Args:
98
- df: DataFrame containing coordinates and location names
99
- coordinates_col: Name of column containing coordinates
100
- places_col: Name of column containing location names
101
- title_col: Optional column name for additional marker information
102
-
103
- Returns:
104
- folium.Map object with all locations marked individually
105
- """
106
  # Initialize the map
107
  m = folium.Map(location=[0, 0], zoom_start=2)
108
-
109
- # Add fullscreen option
110
- plugins.Fullscreen().add_to(m)
111
-
112
- # Add search functionality
113
- plugins.Search(
114
- layer=None,
115
- geom_type="Point",
116
- placeholder="Search for a place...",
117
- collapsed=True,
118
- search_label="name"
119
- ).add_to(m)
120
-
121
- # Keep track of all valid coordinates for setting bounds
122
  all_coords = []
123
 
124
  # Process each row in the DataFrame
125
  for idx, row in df.iterrows():
126
  coordinates = row[coordinates_col]
127
-
128
- # Handle places column - try different methods to parse places
129
- try:
130
- if pd.notna(row[places_col]):
131
- if isinstance(row[places_col], str) and ',' in row[places_col]:
132
- places = [p.strip() for p in row[places_col].split(',')]
133
- else:
134
- places = [str(row[places_col])]
135
- else:
136
- places = []
137
- except Exception as e:
138
- print(f"Error processing places for row {idx}: {e}")
139
- places = []
140
-
141
- # Get optional title information
142
  title = row[title_col] if title_col and pd.notna(row[title_col]) else None
143
 
144
  # Skip if no coordinates
145
  if not coordinates:
146
  continue
147
 
 
 
 
 
 
 
 
148
  # Ensure places and coordinates have compatible lengths
 
149
  while len(places) < len(coordinates):
150
  places.append(f"Location {len(places) + 1}")
151
-
152
- # Add individual markers for each location
153
  for i, coord in enumerate(coordinates):
154
  if coord is not None: # Skip None coordinates
155
  lat, lon = coord
156
 
157
  # Get place name safely
158
- place_name = places[i] if i < len(places) else f"Location {i + 1}"
 
 
 
159
 
160
  # Create popup content
161
  popup_content = f"<b>{place_name}</b>"
162
  if title:
163
  popup_content += f"<br>{title}"
164
 
165
- # Add marker directly to the map (not in a cluster)
166
  folium.Marker(
167
  location=[lat, lon],
168
  popup=folium.Popup(popup_content, max_width=300),
169
  tooltip=place_name,
170
- # Uncomment for different icons
171
- # icon=folium.Icon(color='red', icon='info-sign')
172
  ).add_to(m)
173
 
174
  all_coords.append([lat, lon])
@@ -176,10 +148,6 @@ def create_location_map(df: pd.DataFrame,
176
  # If we have coordinates, fit the map bounds to include all points
177
  if all_coords:
178
  m.fit_bounds(all_coords)
179
-
180
- # Add layer control and measure tool
181
- folium.LayerControl().add_to(m)
182
- plugins.MeasureControl(position='topright', primary_length_unit='kilometers').add_to(m)
183
 
184
  return m
185
 
@@ -209,7 +177,7 @@ def process_excel(file, places_column):
209
  return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}", None
210
 
211
  # Initialize the geocoding service
212
- geocoder = GeocodingService(user_agent="map_visualization_app")
213
 
214
  # Process locations and add coordinates
215
  print(f"Processing locations from column: {places_column}")
@@ -245,84 +213,140 @@ def process_excel(file, places_column):
245
  print(f"Error processing file: {e}\n{trace}")
246
  return None, f"Error processing file: {str(e)}", None
247
 
248
- # Create the Gradio interface
249
- with gr.Blocks() as demo:
250
- gr.Markdown("# Location Mapping Tool")
251
-
252
- with gr.Row():
253
- with gr.Column():
254
- excel_file = gr.File(label="Upload Excel File")
255
- places_column = gr.Textbox(label="Places Column Name", value="place_of_distribution")
256
- process_btn = gr.Button("Process and Map", variant="primary")
257
 
258
- with gr.Column():
259
- map_output = gr.HTML(label="Map Visualization")
260
- stats_output = gr.Textbox(label="Statistics", lines=3)
261
- processed_file = gr.File(label="Processed Data", visible=True, interactive=False)
262
-
263
- def process_and_map(file, column):
264
- if file is None:
265
- return None, "Please upload an Excel file", None
266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  try:
268
- map_path, stats, processed_path = process_excel(file, column)
 
 
 
 
 
 
269
 
270
- if map_path and processed_path:
271
- with open(map_path, "r") as f:
272
- map_html = f.read()
273
-
274
- return map_html, stats, processed_path
 
 
275
  else:
276
- return None, stats, None
277
- except Exception as e:
278
- import traceback
279
- trace = traceback.format_exc()
280
- print(f"Error in process_and_map: {e}\n{trace}")
281
- return None, f"Error: {str(e)}", None
282
-
283
- process_btn.click(
284
- fn=process_and_map,
285
- inputs=[excel_file, places_column],
286
- outputs=[map_output, stats_output, processed_file]
287
- )
 
 
288
 
289
- # For direct use in Jupyter or standalone Python scripts
290
- def create_map_from_excel(file_path, places_column='place_of_distribution'):
291
- """
292
- Function to create a map directly from an Excel file without using Gradio.
293
- Useful for Jupyter notebooks or standalone scripts.
294
 
295
- Args:
296
- file_path: Path to the Excel file
297
- places_column: Name of the column containing location names
298
-
299
- Returns:
300
- folium.Map object
301
- """
302
- try:
303
- # Read Excel file
304
- df = pd.read_excel(file_path)
305
-
306
- # Check if column exists
307
- if places_column not in df.columns:
308
- print(f"Column '{places_column}' not found. Available columns: {', '.join(df.columns)}")
309
- return None
310
-
311
- # Initialize geocoder
312
- geocoder = GeocodingService(user_agent="jupyter_map_app")
313
-
314
- # Process locations
315
- df['coordinates'] = df[places_column].apply(geocoder.process_locations)
316
-
317
- # Create map
318
- map_obj = create_location_map(df, coordinates_col='coordinates', places_col=places_column)
 
 
 
319
 
320
- return map_obj
321
-
322
- except Exception as e:
323
- print(f"Error creating map: {e}")
324
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
 
326
- # For use in Hugging Face Spaces
327
  if __name__ == "__main__":
328
  demo.launch()
 
4
  import os
5
  import pandas as pd
6
  import folium
 
7
  from geopy.geocoders import Nominatim
8
  from geopy.exc import GeocoderTimedOut, GeocoderServiceError
9
  import time
 
11
  from typing import List, Tuple, Optional
12
  import io
13
 
14
+ # NuExtract API configuration
15
+ API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
16
+ headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"}
17
+
18
  # Geocoding Service
19
  class GeocodingService:
20
  def __init__(self, user_agent: str = None, timeout: int = 10, rate_limit: float = 1.1):
 
92
  # Mapping Functions
93
  def create_location_map(df: pd.DataFrame,
94
  coordinates_col: str = 'coordinates',
95
+ places_col: str = 'places',
96
  title_col: Optional[str] = None) -> folium.Map:
 
 
 
 
 
 
 
 
 
 
 
 
97
  # Initialize the map
98
  m = folium.Map(location=[0, 0], zoom_start=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  all_coords = []
100
 
101
  # Process each row in the DataFrame
102
  for idx, row in df.iterrows():
103
  coordinates = row[coordinates_col]
104
+ places_text = row[places_col] if pd.notna(row[places_col]) else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  title = row[title_col] if title_col and pd.notna(row[title_col]) else None
106
 
107
  # Skip if no coordinates
108
  if not coordinates:
109
  continue
110
 
111
+ # Parse places into a list
112
+ try:
113
+ places = [p.strip() for p in places_text.split(',') if p.strip()]
114
+ except:
115
+ # Fall back to treating it as a single place if splitting fails
116
+ places = [places_text] if places_text else []
117
+
118
  # Ensure places and coordinates have compatible lengths
119
+ # If places is shorter, add placeholder names
120
  while len(places) < len(coordinates):
121
  places.append(f"Location {len(places) + 1}")
122
+
123
+ # Add markers for each coordinate
124
  for i, coord in enumerate(coordinates):
125
  if coord is not None: # Skip None coordinates
126
  lat, lon = coord
127
 
128
  # Get place name safely
129
+ if i < len(places):
130
+ place_name = places[i]
131
+ else:
132
+ place_name = f"Location {i + 1}"
133
 
134
  # Create popup content
135
  popup_content = f"<b>{place_name}</b>"
136
  if title:
137
  popup_content += f"<br>{title}"
138
 
139
+ # Add marker to the map
140
  folium.Marker(
141
  location=[lat, lon],
142
  popup=folium.Popup(popup_content, max_width=300),
143
  tooltip=place_name,
 
 
144
  ).add_to(m)
145
 
146
  all_coords.append([lat, lon])
 
148
  # If we have coordinates, fit the map bounds to include all points
149
  if all_coords:
150
  m.fit_bounds(all_coords)
 
 
 
 
151
 
152
  return m
153
 
 
177
  return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}", None
178
 
179
  # Initialize the geocoding service
180
+ geocoder = GeocodingService(user_agent="gradio_map_visualization_app")
181
 
182
  # Process locations and add coordinates
183
  print(f"Processing locations from column: {places_column}")
 
213
  print(f"Error processing file: {e}\n{trace}")
214
  return None, f"Error processing file: {str(e)}", None
215
 
216
+ # NuExtract Functions
217
+ def extract_info(template, text):
218
+ try:
219
+ # Format prompt according to NuExtract-1.5 requirements
220
+ prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
 
 
 
 
221
 
222
+ # Call API
223
+ payload = {
224
+ "inputs": prompt,
225
+ "parameters": {
226
+ "max_new_tokens": 1000,
227
+ "do_sample": False
228
+ }
229
+ }
230
 
231
+ response = requests.post(API_URL, headers=headers, json=payload)
232
+
233
+ # If the model is loading, inform the user
234
+ if response.status_code == 503:
235
+ response_json = response.json()
236
+ if "error" in response_json and "loading" in response_json["error"]:
237
+ estimated_time = response_json.get("estimated_time", "unknown")
238
+ return f"⏳ Model is loading (ETA: {int(float(estimated_time)) if isinstance(estimated_time, (int, float, str)) else 'unknown'} seconds)", "Please try again in a few minutes"
239
+
240
+ if response.status_code != 200:
241
+ return f"❌ API Error: {response.status_code}", response.text
242
+
243
+ # Process result
244
+ result = response.json()
245
+
246
+ # Handle different response formats
247
  try:
248
+ if isinstance(result, list):
249
+ if len(result) > 0:
250
+ result_text = result[0].get("generated_text", "")
251
+ else:
252
+ return "❌ Empty result list", "{}"
253
+ else:
254
+ result_text = str(result)
255
 
256
+ # Split at output marker if present
257
+ if "<|output|>" in result_text:
258
+ parts = result_text.split("<|output|>")
259
+ if len(parts) > 1:
260
+ json_text = parts[1].strip()
261
+ else:
262
+ json_text = result_text
263
  else:
264
+ json_text = result_text
265
+
266
+ # Try to parse as JSON
267
+ try:
268
+ extracted = json.loads(json_text)
269
+ formatted = json.dumps(extracted, indent=2)
270
+ except json.JSONDecodeError:
271
+ return "❌ JSON parsing error", json_text
272
+
273
+ return "✅ Success", formatted
274
+ except Exception as inner_e:
275
+ return f"❌ Error processing result: {str(inner_e)}", "{}"
276
+ except Exception as e:
277
+ return f"❌ Error: {str(e)}", "{}"
278
 
279
+ # Create the Gradio interface
280
+ with gr.Blocks() as demo:
281
+ gr.Markdown("# Historical Data Analysis Tools")
 
 
282
 
283
+ with gr.Tabs():
284
+ with gr.TabItem("Text Extraction"):
285
+ gr.Markdown("## NuExtract-1.5 Structured Data Extraction")
286
+
287
+ with gr.Row():
288
+ with gr.Column():
289
+ template = gr.Textbox(
290
+ label="JSON Template",
291
+ value='{"earthquake location": "", "dateline location": ""}',
292
+ lines=5
293
+ )
294
+ text = gr.Textbox(
295
+ label="Text to Extract From",
296
+ value="Neues Erdbeben in Japan. Aus Tokio wird berichtet, daß in Yokohama bei einem Erdbeben sechs Personen getötet und 22 verwundet, in Tokio vier getötet und 22 verwundet wurden. In Yokohama seien 6VV Häuser zerstört worden. Die telephonische und telegraphische Verbindung zwischen Tokio und Osaka ist unterbrochen worden. Der Trambahnverkehr in Tokio liegt still. Auch der Eisenbahnverkehr zwischen Tokio und Yokohama ist unterbrochen. In Sngamo, einer Vorstadt von Tokio sind Brände ausgebrochen. Ein Eisenbahnzug stürzte in den Vajugawafluß zwischen Gotemba und Tokio. Sechs Züge wurden umgeworfen. Mit dem letzten japanischen Erdbeben sind seit eineinhalb Jahrtausenden bis heute in Japan 229 größere Erdbeben zu verzeichnen gewesen.",
297
+ lines=8
298
+ )
299
+ extract_btn = gr.Button("Extract Information", variant="primary")
300
+
301
+ with gr.Column():
302
+ status = gr.Textbox(label="Status")
303
+ output = gr.Textbox(label="Output", lines=10)
304
+
305
+ extract_btn.click(
306
+ fn=extract_info,
307
+ inputs=[template, text],
308
+ outputs=[status, output]
309
+ )
310
 
311
+ with gr.TabItem("Geocoding & Mapping"):
312
+ gr.Markdown("## Location Mapping Tool")
313
+
314
+ with gr.Row():
315
+ with gr.Column():
316
+ excel_file = gr.File(label="Upload Excel File")
317
+ places_column = gr.Textbox(label="Places Column Name", value="places")
318
+ process_btn = gr.Button("Process and Map", variant="primary")
319
+
320
+ with gr.Column():
321
+ map_output = gr.HTML(label="Map Visualization")
322
+ stats_output = gr.Textbox(label="Statistics", lines=3)
323
+ processed_file = gr.File(label="Processed Data", visible=True, interactive=False)
324
+
325
+ def process_and_map(file, column):
326
+ if file is None:
327
+ return None, "Please upload an Excel file", None
328
+
329
+ try:
330
+ map_path, stats, processed_path = process_excel(file, column)
331
+
332
+ if map_path and processed_path:
333
+ with open(map_path, "r") as f:
334
+ map_html = f.read()
335
+
336
+ return map_html, stats, processed_path
337
+ else:
338
+ return None, stats, None
339
+ except Exception as e:
340
+ import traceback
341
+ trace = traceback.format_exc()
342
+ print(f"Error in process_and_map: {e}\n{trace}")
343
+ return None, f"Error: {str(e)}", None
344
+
345
+ process_btn.click(
346
+ fn=process_and_map,
347
+ inputs=[excel_file, places_column],
348
+ outputs=[map_output, stats_output, processed_file]
349
+ )
350
 
 
351
  if __name__ == "__main__":
352
  demo.launch()