oberbics commited on
Commit
3de9a41
·
verified ·
1 Parent(s): 992277b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +503 -142
app.py CHANGED
@@ -1,107 +1,139 @@
1
  import gradio as gr
 
 
 
2
  import pandas as pd
3
  import folium
4
- from folium.plugins import MeasureControl, Fullscreen, Search, MarkerCluster
5
  from geopy.geocoders import Nominatim
6
- import tempfile
7
- import warnings
8
- import os
9
  import time
10
  import random
11
- from datetime import datetime
 
 
 
12
 
13
  warnings.filterwarnings("ignore")
14
 
15
- # Updated Historical Tile Providers with reliable sources
16
- HISTORICAL_TILES = {
17
- "Historical 1700s-1800s": {
18
  "url": "https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}",
19
  "attr": "Esri",
20
- "fallback": "https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}",
21
- "years": (1700, 1900)
22
  },
23
- "Early 1900s": {
24
  "url": "https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}",
25
  "attr": "Esri",
26
- "fallback": "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png",
27
- "years": (1901, 1920)
28
  },
29
- "Modern Era": {
30
  "url": "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png",
31
  "attr": "OpenStreetMap",
32
- "fallback": None,
33
- "years": (1921, 2023)
34
  },
35
- # Additional reliable tile sources
36
  "Terrain": {
37
  "url": "https://server.arcgisonline.com/ArcGIS/rest/services/World_Terrain_Base/MapServer/tile/{z}/{y}/{x}",
38
  "attr": "Esri",
39
- "fallback": None,
40
- "years": (1700, 2023)
41
  },
42
  "Toner": {
43
- "url": "https://tiles.stadiamaps.com/tiles/stamen_toner/{z}/{x}/{y}.png", # Updated Stamen source
44
  "attr": "Stadia Maps",
45
- "fallback": None,
46
- "years": (1700, 2023)
47
  }
48
  }
49
 
50
- class SafeGeocoder:
51
- def __init__(self):
52
- user_agent = f"historical_mapper_v7_{random.randint(1000, 9999)}"
53
- self.geolocator = Nominatim(user_agent=user_agent, timeout=10)
54
- self.cache = {} # Simple cache to avoid repeated requests
 
 
 
 
 
 
 
 
 
 
55
  self.last_request = 0
56
-
57
- def _respect_rate_limit(self):
58
- # Ensure at least 1 second between requests
59
  current_time = time.time()
60
- elapsed = current_time - self.last_request
61
- if elapsed < 1.0:
62
- time.sleep(1.0 - elapsed)
63
  self.last_request = time.time()
64
-
65
- def get_coords(self, location: str):
66
- if not location or pd.isna(location):
67
- return None
68
-
69
- # Convert to string if needed
70
- location = str(location).strip()
71
-
72
  # Check cache first
73
  if location in self.cache:
74
  return self.cache[location]
75
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  try:
77
- self._respect_rate_limit()
78
- result = self.geolocator.geocode(location)
79
- if result:
80
- coords = (result.latitude, result.longitude)
81
- self.cache[location] = coords
82
- return coords
83
- self.cache[location] = None
84
- return None
 
 
 
 
 
 
85
  except Exception as e:
86
- print(f"Geocoding error for '{location}': {e}")
87
- self.cache[location] = None
88
- return None
 
89
 
90
- def create_reliable_map(df, location_col, year):
91
  """Create a map with multiple layer options and better error handling"""
92
 
93
- # Select appropriate default tile configuration based on year
94
- default_tile_name = next(
95
- (name for name, t in HISTORICAL_TILES.items()
96
- if t["years"][0] <= year <= t["years"][1] and name in ["Historical 1700s-1800s", "Early 1900s", "Modern Era"]),
97
- "Modern Era"
98
- )
99
 
100
  # Initialize map
101
  m = folium.Map(location=[20, 0], zoom_start=2, control_scale=True)
102
 
103
  # Add all tile layers with the appropriate one active
104
- for name, config in HISTORICAL_TILES.items():
105
  folium.TileLayer(
106
  tiles=config["url"],
107
  attr=f"{config['attr']} ({name})",
@@ -136,28 +168,39 @@ def create_reliable_map(df, location_col, year):
136
  if col != location_col and not pd.isna(row[col]):
137
  additional_info += f"<br><b>{col}:</b> {row[col]}"
138
 
139
- # Geocode location
140
- point = geocoder.get_coords(location)
141
- if point:
142
- # Create popup content
143
- popup_content = f"""
144
- <div style="min-width: 200px; max-width: 300px">
145
- <h4>{location}</h4>
146
- <p><i>Historical View ({year})</i></p>
147
- {additional_info}
148
- </div>
149
- """
150
-
151
- # Add marker
152
- folium.Marker(
153
- location=point,
154
- popup=folium.Popup(popup_content, max_width=300),
155
- tooltip=location,
156
- icon=folium.Icon(color="blue", icon="info-sign")
157
- ).add_to(marker_cluster)
158
 
159
- coords.append(point)
160
- processed_count += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  # Layer control
163
  folium.LayerControl(collapsed=False).add_to(m)
@@ -198,95 +241,413 @@ def create_reliable_map(df, location_col, year):
198
  }, 3000); // Wait 3 seconds for tiles to load
199
  });
200
  </script>
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  """))
202
 
 
 
 
 
 
 
 
 
 
 
 
203
  return m._repr_html_(), processed_count
204
 
205
- def process_data(file_obj, location_col, year):
206
- try:
207
- # Handle file reading
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  try:
209
- df = pd.read_excel(file_obj.name)
 
 
 
 
 
 
 
210
  except Exception as e:
211
- return None, f"Error reading Excel file: {str(e)}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
- # Validate columns
214
- if location_col not in df.columns:
215
- return None, f"Column '{location_col}' not found. Available columns: {', '.join(df.columns)}", None
 
 
216
 
217
  # Create map
218
- map_html, processed_count = create_reliable_map(df, location_col, year)
219
 
220
  # Save processed data
221
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
222
- df.to_excel(tmp.name, index=False)
223
  processed_path = tmp.name
 
224
 
225
  # Generate stats
226
- total_locations = df[location_col].count()
227
  success_rate = (processed_count / total_locations * 100) if total_locations > 0 else 0
228
 
229
- stats = f"Found {processed_count} of {total_locations} locations ({success_rate:.1f}%) from year {year}"
230
 
231
- return (
232
- f"<div style='width:100%; height:70vh; border:1px solid #ddd'>{map_html}</div>",
233
- stats,
234
- processed_path
235
- )
 
 
 
 
 
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  except Exception as e:
238
  import traceback
239
- error_details = traceback.format_exc()
240
- print(f"Error in processing: {error_details}")
241
  return None, f"Error: {str(e)}", None
242
 
243
- # Gradio Interface
244
- with gr.Blocks(title="Historical Maps", theme=gr.themes.Soft()) as app:
245
- gr.Markdown("# Historical Map Viewer")
246
- gr.Markdown("Upload an Excel file with location data to visualize on historical maps.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
- with gr.Row():
249
- with gr.Column():
250
- file_input = gr.File(
251
- label="1. Upload Excel File",
252
- file_types=[".xlsx", ".xls"],
253
- type="filepath"
254
- )
255
- location_col = gr.Textbox(
256
- label="2. Location Column Name",
257
- value="location",
258
- placeholder="e.g., 'city', 'address', or 'place'"
259
- )
260
- year = gr.Slider(
261
- label="3. Historical Period (Year)",
262
- minimum=1700,
263
- maximum=2023,
264
- value=1865,
265
- step=1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  )
267
- btn = gr.Button("Generate Map", variant="primary")
268
 
269
- gr.Markdown("""
270
- ### Tips:
271
- - For best results, make sure location names are clear (e.g., "Paris, France" instead of just "Paris")
272
- - If the map appears gray, try switching the tile layer using the layer control in the top-right
273
- - You can measure distances and view the map in fullscreen using the controls
 
274
  """)
275
 
276
- with gr.Column():
277
- map_display = gr.HTML(
278
- label="Historical Map",
279
- value="<div style='text-align:center;padding:2em;color:gray;border:1px solid #ddd;height:70vh'>"
280
- "Map will appear here after generation</div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  )
282
- stats = gr.Textbox(label="Map Information")
283
- download = gr.File(label="Download Processed Data")
284
 
285
- btn.click(
286
- process_data,
287
- inputs=[file_input, location_col, year],
288
- outputs=[map_display, stats, download]
289
- )
290
 
291
  if __name__ == "__main__":
292
- app.launch()
 
1
  import gradio as gr
2
+ import json
3
+ import requests
4
+ import os
5
  import pandas as pd
6
  import folium
7
+ from folium.plugins import MeasureControl, Fullscreen, MarkerCluster, Search
8
  from geopy.geocoders import Nominatim
9
+ from geopy.exc import GeocoderTimedOut, GeocoderServiceError
 
 
10
  import time
11
  import random
12
+ from typing import List, Tuple, Optional
13
+ import io
14
+ import tempfile
15
+ import warnings
16
 
17
  warnings.filterwarnings("ignore")
18
 
19
+ # Map Tile Providers with reliable sources
20
+ MAP_TILES = {
21
+ "Satellite": {
22
  "url": "https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}",
23
  "attr": "Esri",
24
+ "fallback": "https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}"
 
25
  },
26
+ "Topographic": {
27
  "url": "https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}",
28
  "attr": "Esri",
29
+ "fallback": "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png"
 
30
  },
31
+ "OpenStreetMap": {
32
  "url": "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png",
33
  "attr": "OpenStreetMap",
34
+ "fallback": None
 
35
  },
 
36
  "Terrain": {
37
  "url": "https://server.arcgisonline.com/ArcGIS/rest/services/World_Terrain_Base/MapServer/tile/{z}/{y}/{x}",
38
  "attr": "Esri",
39
+ "fallback": None
 
40
  },
41
  "Toner": {
42
+ "url": "https://tiles.stadiamaps.com/tiles/stamen_toner/{z}/{x}/{y}.png",
43
  "attr": "Stadia Maps",
44
+ "fallback": None
 
45
  }
46
  }
47
 
48
+ # NuExtract API configuration
49
+ API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
50
+ headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"}
51
+
52
+ # Geocoding Service
53
+ class GeocodingService:
54
+ def __init__(self, user_agent: str = None, timeout: int = 10, rate_limit: float = 1.1):
55
+ if user_agent is None:
56
+ user_agent = f"python_geocoding_script_{random.randint(1000, 9999)}"
57
+
58
+ self.geolocator = Nominatim(
59
+ user_agent=user_agent,
60
+ timeout=timeout
61
+ )
62
+ self.rate_limit = rate_limit
63
  self.last_request = 0
64
+ self.cache = {} # Simple in-memory cache
65
+
66
+ def _rate_limit_wait(self):
67
  current_time = time.time()
68
+ time_since_last = current_time - self.last_request
69
+ if time_since_last < self.rate_limit:
70
+ time.sleep(self.rate_limit - time_since_last)
71
  self.last_request = time.time()
72
+
73
+ def geocode_location(self, location: str, max_retries: int = 3) -> Optional[Tuple[float, float]]:
 
 
 
 
 
 
74
  # Check cache first
75
  if location in self.cache:
76
  return self.cache[location]
77
+
78
+ for attempt in range(max_retries):
79
+ try:
80
+ self._rate_limit_wait()
81
+ location_data = self.geolocator.geocode(location)
82
+ if location_data:
83
+ # Store in cache and return
84
+ self.cache[location] = (location_data.latitude, location_data.longitude)
85
+ return self.cache[location]
86
+ # Cache None results too
87
+ self.cache[location] = None
88
+ return None
89
+ except (GeocoderTimedOut, GeocoderServiceError) as e:
90
+ if attempt == max_retries - 1:
91
+ print(f"Failed to geocode '{location}' after {max_retries} attempts: {e}")
92
+ self.cache[location] = None
93
+ return None
94
+ time.sleep(2 ** attempt) # Exponential backoff
95
+ except Exception as e:
96
+ print(f"Error geocoding '{location}': {e}")
97
+ self.cache[location] = None
98
+ return None
99
+ return None
100
+
101
+ def process_locations(self, locations: str) -> List[Optional[Tuple[float, float]]]:
102
+ if pd.isna(locations) or not locations:
103
+ return []
104
+
105
  try:
106
+ # First try to intelligently parse
107
+ import re
108
+ pattern = r"([^,]+(?:,\s*[A-Za-z]+)?)"
109
+ matches = re.findall(pattern, locations)
110
+ location_list = [match.strip() for match in matches if match.strip()]
111
+
112
+ # If regex finds nothing, fall back to simple comma splitting
113
+ if not location_list:
114
+ location_list = [loc.strip() for loc in locations.split(',') if loc.strip()]
115
+
116
+ # For debugging
117
+ print(f"Parsed '{locations}' into: {location_list}")
118
+
119
+ return [self.geocode_location(loc) for loc in location_list]
120
  except Exception as e:
121
+ print(f"Error parsing locations '{locations}': {e}")
122
+ # Fall back to simple method
123
+ location_list = [loc.strip() for loc in locations.split(',') if loc.strip()]
124
+ return [self.geocode_location(loc) for loc in location_list]
125
 
126
+ def create_reliable_map(df, location_col):
127
  """Create a map with multiple layer options and better error handling"""
128
 
129
+ # Set default tile
130
+ default_tile_name = "Satellite"
 
 
 
 
131
 
132
  # Initialize map
133
  m = folium.Map(location=[20, 0], zoom_start=2, control_scale=True)
134
 
135
  # Add all tile layers with the appropriate one active
136
+ for name, config in MAP_TILES.items():
137
  folium.TileLayer(
138
  tiles=config["url"],
139
  attr=f"{config['attr']} ({name})",
 
168
  if col != location_col and not pd.isna(row[col]):
169
  additional_info += f"<br><b>{col}:</b> {row[col]}"
170
 
171
+ # Parse multiple locations if comma-separated
172
+ try:
173
+ locations = [loc.strip() for loc in location.split(',') if loc.strip()]
174
+ if not locations:
175
+ locations = [location]
176
+ except:
177
+ locations = [location]
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+ # Process each location
180
+ for loc in locations:
181
+ # Geocode location
182
+ point = geocoder.get_coords(loc)
183
+ if point:
184
+ # Create popup content
185
+ popup_content = f"""
186
+ <div style="min-width: 200px; max-width: 300px">
187
+ <h4 style="font-family: 'Source Sans Pro', sans-serif; margin-bottom: 5px;">{loc}</h4>
188
+ <div style="font-family: 'Source Sans Pro', sans-serif; font-size: 14px;">
189
+ {additional_info}
190
+ </div>
191
+ </div>
192
+ """
193
+
194
+ # Add marker
195
+ folium.Marker(
196
+ location=point,
197
+ popup=folium.Popup(popup_content, max_width=300),
198
+ tooltip=loc,
199
+ icon=folium.Icon(color="blue", icon="info-sign")
200
+ ).add_to(marker_cluster)
201
+
202
+ coords.append(point)
203
+ processed_count += 1
204
 
205
  # Layer control
206
  folium.LayerControl(collapsed=False).add_to(m)
 
241
  }, 3000); // Wait 3 seconds for tiles to load
242
  });
243
  </script>
244
+
245
+ <style>
246
+ .leaflet-popup-content {
247
+ font-family: 'Source Sans Pro', sans-serif;
248
+ }
249
+ .leaflet-popup-content h4 {
250
+ font-weight: 600;
251
+ margin-bottom: 8px;
252
+ }
253
+ .leaflet-control-layers {
254
+ font-family: 'Source Sans Pro', sans-serif;
255
+ }
256
+ </style>
257
  """))
258
 
259
+ # Add custom CSS for better fonts
260
+ custom_css = """
261
+ <style>
262
+ @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600&display=swap');
263
+ .leaflet-container {
264
+ font-family: 'Source Sans Pro', sans-serif;
265
+ }
266
+ </style>
267
+ """
268
+ m.get_root().header.add_child(folium.Element(custom_css))
269
+
270
  return m._repr_html_(), processed_count
271
 
272
+ # SafeGeocoder with better error handling
273
+ class SafeGeocoder:
274
+ def __init__(self):
275
+ user_agent = f"location_mapper_v1_{random.randint(1000, 9999)}"
276
+ self.geolocator = Nominatim(user_agent=user_agent, timeout=10)
277
+ self.cache = {} # Simple cache to avoid repeated requests
278
+ self.last_request = 0
279
+
280
+ def _respect_rate_limit(self):
281
+ # Ensure at least 1 second between requests
282
+ current_time = time.time()
283
+ elapsed = current_time - self.last_request
284
+ if elapsed < 1.0:
285
+ time.sleep(1.0 - elapsed)
286
+ self.last_request = time.time()
287
+
288
+ def get_coords(self, location: str):
289
+ if not location or pd.isna(location):
290
+ return None
291
+
292
+ # Convert to string if needed
293
+ location = str(location).strip()
294
+
295
+ # Check cache first
296
+ if location in self.cache:
297
+ return self.cache[location]
298
+
299
  try:
300
+ self._respect_rate_limit()
301
+ result = self.geolocator.geocode(location)
302
+ if result:
303
+ coords = (result.latitude, result.longitude)
304
+ self.cache[location] = coords
305
+ return coords
306
+ self.cache[location] = None
307
+ return None
308
  except Exception as e:
309
+ print(f"Geocoding error for '{location}': {e}")
310
+ self.cache[location] = None
311
+ return None
312
+
313
+ def process_excel(file, places_column):
314
+ # Check if file is None
315
+ if file is None:
316
+ return None, "No file uploaded", None
317
+
318
+ try:
319
+ # Handle various file object types that Gradio might provide
320
+ if hasattr(file, 'name'):
321
+ # Gradio file object
322
+ df = pd.read_excel(file.name)
323
+ elif isinstance(file, bytes):
324
+ # Raw bytes
325
+ df = pd.read_excel(io.BytesIO(file))
326
+ else:
327
+ # Assume it's a filepath string
328
+ df = pd.read_excel(file)
329
 
330
+ # Print column names for debugging
331
+ print(f"Columns in Excel file: {list(df.columns)}")
332
+
333
+ if places_column not in df.columns:
334
+ return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}", None
335
 
336
  # Create map
337
+ map_html, processed_count = create_reliable_map(df, places_column)
338
 
339
  # Save processed data
340
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
 
341
  processed_path = tmp.name
342
+ df.to_excel(processed_path, index=False)
343
 
344
  # Generate stats
345
+ total_locations = df[places_column].count()
346
  success_rate = (processed_count / total_locations * 100) if total_locations > 0 else 0
347
 
348
+ stats = f"Found {processed_count} of {total_locations} locations ({success_rate:.1f}%)"
349
 
350
+ return map_html, stats, processed_path
351
+ except Exception as e:
352
+ import traceback
353
+ trace = traceback.format_exc()
354
+ print(f"Error processing file: {e}\n{trace}")
355
+ return None, f"Error processing file: {str(e)}", None
356
+
357
+ def process_and_map(file, column):
358
+ if file is None:
359
+ return None, "Please upload an Excel file", None
360
 
361
+ try:
362
+ map_html, stats, processed_path = process_excel(file, column)
363
+
364
+ if map_html and processed_path:
365
+ # Create responsive container for the map
366
+ responsive_html = f"""
367
+ <div style="width:100%; height:70vh; margin:0; padding:0; border:1px solid #e0e0e0; border-radius:8px; overflow:hidden;">
368
+ {map_html}
369
+ </div>
370
+ """
371
+ return responsive_html, stats, processed_path
372
+ else:
373
+ return None, stats, None
374
  except Exception as e:
375
  import traceback
376
+ trace = traceback.format_exc()
377
+ print(f"Error in process_and_map: {e}\n{trace}")
378
  return None, f"Error: {str(e)}", None
379
 
380
+ # NuExtract Functions
381
+ def extract_info(template, text):
382
+ try:
383
+ # Format prompt according to NuExtract-1.5 requirements
384
+ prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
385
+
386
+ # Call API
387
+ payload = {
388
+ "inputs": prompt,
389
+ "parameters": {
390
+ "max_new_tokens": 1000,
391
+ "do_sample": False
392
+ }
393
+ }
394
+
395
+ response = requests.post(API_URL, headers=headers, json=payload)
396
+
397
+ # If the model is loading, inform the user
398
+ if response.status_code == 503:
399
+ response_json = response.json()
400
+ if "error" in response_json and "loading" in response_json["error"]:
401
+ estimated_time = response_json.get("estimated_time", "unknown")
402
+ return f"⏳ Model is loading (ETA: {int(float(estimated_time)) if isinstance(estimated_time, (int, float, str)) else 'unknown'} seconds)", "Please try again in a few minutes"
403
+
404
+ if response.status_code != 200:
405
+ return f"❌ API Error: {response.status_code}", response.text
406
+
407
+ # Process result
408
+ result = response.json()
409
+
410
+ # Handle different response formats
411
+ try:
412
+ if isinstance(result, list):
413
+ if len(result) > 0:
414
+ result_text = result[0].get("generated_text", "")
415
+ else:
416
+ return "❌ Empty result list", "{}"
417
+ else:
418
+ result_text = str(result)
419
+
420
+ # Split at output marker if present
421
+ if "<|output|>" in result_text:
422
+ parts = result_text.split("<|output|>")
423
+ if len(parts) > 1:
424
+ json_text = parts[1].strip()
425
+ else:
426
+ json_text = result_text
427
+ else:
428
+ json_text = result_text
429
+
430
+ # Try to parse as JSON
431
+ try:
432
+ extracted = json.loads(json_text)
433
+ formatted = json.dumps(extracted, indent=2)
434
+ except json.JSONDecodeError:
435
+ return "❌ JSON parsing error", json_text
436
+
437
+ return "✅ Success", formatted
438
+ except Exception as inner_e:
439
+ return f"❌ Error processing result: {str(inner_e)}", "{}"
440
+ except Exception as e:
441
+ return f"❌ Error: {str(e)}", "{}"
442
+
443
+ # Custom CSS for improved styling
444
+ custom_css = """
445
+ <style>
446
+ @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@300;400;600;700&display=swap');
447
+
448
+ :root {
449
+ --primary-color: #2c6bb3;
450
+ --secondary-color: #4e8fd1;
451
+ --background-color: #f7f9fc;
452
+ --text-color: #333333;
453
+ --border-color: #e0e0e0;
454
+ }
455
+
456
+ body, .gradio-container {
457
+ font-family: 'Source Sans Pro', sans-serif !important;
458
+ background-color: var(--background-color);
459
+ color: var(--text-color);
460
+ }
461
+
462
+ h1 {
463
+ font-weight: 700 !important;
464
+ color: var(--primary-color) !important;
465
+ font-size: 2.5rem !important;
466
+ margin-bottom: 1rem !important;
467
+ }
468
+
469
+ h2 {
470
+ font-weight: 600 !important;
471
+ color: var(--secondary-color) !important;
472
+ font-size: 1.5rem !important;
473
+ margin-top: 1rem !important;
474
+ margin-bottom: 0.75rem !important;
475
+ }
476
+
477
+ .gradio-button.primary {
478
+ background-color: var(--primary-color) !important;
479
+ }
480
+
481
+ .gradio-button.primary:hover {
482
+ background-color: var(--secondary-color) !important;
483
+ }
484
+
485
+ .gradio-tab-nav button {
486
+ font-family: 'Source Sans Pro', sans-serif !important;
487
+ font-weight: 600 !important;
488
+ }
489
+
490
+ .gradio-tab-nav button.selected {
491
+ color: var(--primary-color) !important;
492
+ border-color: var(--primary-color) !important;
493
+ }
494
+
495
+ .info-box {
496
+ background-color: #e8f4fd;
497
+ border-left: 4px solid var(--primary-color);
498
+ padding: 15px;
499
+ margin: 15px 0;
500
+ border-radius: 4px;
501
+ }
502
+
503
+ .stats-box {
504
+ background-color: white;
505
+ border: 1px solid var(--border-color);
506
+ border-radius: 8px;
507
+ padding: 15px;
508
+ font-size: 1rem;
509
+ line-height: 1.5;
510
+ }
511
+
512
+ .subtle-text {
513
+ font-size: 0.9rem;
514
+ color: #666;
515
+ font-style: italic;
516
+ }
517
+
518
+ .file-upload-box {
519
+ border: 2px dashed var(--border-color);
520
+ border-radius: 8px;
521
+ padding: 20px;
522
+ text-align: center;
523
+ transition: all 0.3s ease;
524
+ }
525
+
526
+ .file-upload-box:hover {
527
+ border-color: var(--primary-color);
528
+ }
529
+
530
+ </style>
531
+ """
532
+
533
+ # Create the Gradio interface
534
+ with gr.Blocks(css=custom_css) as demo:
535
+ gr.HTML("""
536
+ <div style="text-align: center; margin-bottom: 1rem">
537
+ <h1>Historical Data Analysis Tools</h1>
538
+ <p style="font-size: 1.1rem; margin-top: -10px;">Extract, visualize, and analyze historical data with ease</p>
539
+ </div>
540
+ """)
541
 
542
+ with gr.Tabs():
543
+ with gr.TabItem("📍 Location Mapping"):
544
+ gr.HTML("""
545
+ <div class="info-box">
546
+ <h3 style="margin-top: 0;">Map Your Historical Locations</h3>
547
+ <p>Upload an Excel file containing location data to create an interactive map visualization. The tool will geocode your locations and display them on a customizable map.</p>
548
+ </div>
549
+ """)
550
+
551
+ with gr.Row():
552
+ with gr.Column():
553
+ excel_file = gr.File(
554
+ label="Upload Excel File",
555
+ file_types=[".xlsx", ".xls"],
556
+ elem_classes="file-upload-box"
557
+ )
558
+ places_column = gr.Textbox(
559
+ label="Location Column Name",
560
+ value="dateline_locations",
561
+ placeholder="e.g., 'location', 'city', or 'address'"
562
+ )
563
+ process_btn = gr.Button("Generate Map", variant="primary", size="lg")
564
+
565
+ gr.HTML("""
566
+ <div class="subtle-text">
567
+ <p><strong>Tips:</strong></p>
568
+ <ul>
569
+ <li>For best results, use specific location names (e.g., "Paris, France" instead of just "Paris")</li>
570
+ <li>You can switch between different map styles using the controls in the top-right</li>
571
+ <li>Click markers to see location details</li>
572
+ </ul>
573
+ </div>
574
+ """)
575
+
576
+ with gr.Column():
577
+ map_output = gr.HTML(
578
+ label="Interactive Map",
579
+ value="""
580
+ <div style="text-align:center; height:70vh; display:flex; align-items:center; justify-content:center;
581
+ background-color:#f5f5f5; border:1px solid #e0e0e0; border-radius:8px;">
582
+ <div>
583
+ <img src="https://cdn-icons-png.flaticon.com/512/854/854878.png" width="100">
584
+ <p style="margin-top:20px; color:#666;">Your map will appear here after processing</p>
585
+ </div>
586
+ </div>
587
+ """
588
+ )
589
+ stats_output = gr.Textbox(
590
+ label="Location Statistics",
591
+ lines=2,
592
+ elem_classes="stats-box"
593
+ )
594
+ processed_file = gr.File(
595
+ label="Download Processed Data",
596
+ visible=True,
597
+ interactive=False
598
+ )
599
+
600
+ process_btn.click(
601
+ fn=process_and_map,
602
+ inputs=[excel_file, places_column],
603
+ outputs=[map_output, stats_output, processed_file]
604
  )
 
605
 
606
+ with gr.TabItem("🔍 Text Extraction"):
607
+ gr.HTML("""
608
+ <div class="info-box">
609
+ <h3 style="margin-top: 0;">Extract Structured Data from Text</h3>
610
+ <p>Use NuExtract-1.5 to automatically extract structured information from historical texts. Define the JSON template for the data you want to extract.</p>
611
+ </div>
612
  """)
613
 
614
+ with gr.Row():
615
+ with gr.Column():
616
+ template = gr.Textbox(
617
+ label="JSON Template",
618
+ value='{"earthquake location": "", "dateline location": ""}',
619
+ lines=5,
620
+ placeholder="Define the fields you want to extract as a JSON template"
621
+ )
622
+ text = gr.Textbox(
623
+ label="Text to Extract From",
624
+ value="Neues Erdbeben in Japan. Aus Tokio wird berichtet, daß in Yokohama bei einem Erdbeben sechs Personen getötet und 22 verwundet, in Tokio vier getötet und 22 verwundet wurden. In Yokohama seien 6VV Häuser zerstört worden. Die telephonische und telegraphische Verbindung zwischen Tokio und Osaka ist unterbrochen worden. Der Trambahnverkehr in Tokio liegt still. Auch der Eisenbahnverkehr zwischen Tokio und Yokohama ist unterbrochen. In Sngamo, einer Vorstadt von Tokio sind Brände ausgebrochen. Ein Eisenbahnzug stürzte in den Vajugawafluß zwischen Gotemba und Tokio. Sechs Züge wurden umgeworfen. Mit dem letzten japanischen Erdbeben sind seit eineinhalb Jahrtausenden bis heute in Japan 229 größere Erdbeben zu verzeichnen gewesen.",
625
+ lines=8,
626
+ placeholder="Enter the text you want to extract information from"
627
+ )
628
+ extract_btn = gr.Button("Extract Information", variant="primary", size="lg")
629
+
630
+ with gr.Column():
631
+ status = gr.Textbox(
632
+ label="Status",
633
+ elem_classes="stats-box"
634
+ )
635
+ output = gr.JSON(
636
+ label="Extracted Data",
637
+ elem_classes="stats-box"
638
+ )
639
+
640
+ extract_btn.click(
641
+ fn=extract_info,
642
+ inputs=[template, text],
643
+ outputs=[status, output]
644
  )
 
 
645
 
646
+ gr.HTML("""
647
+ <div style="text-align: center; margin-top: 2rem; padding-top: 1rem; border-top: 1px solid #eee; font-size: 0.9rem; color: #666;">
648
+ <p>Made with <span style="color: #e25555;">❤</span> for historical data research</p>
649
+ </div>
650
+ """)
651
 
652
  if __name__ == "__main__":
653
+ demo.launch()