Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import json | |
import requests | |
import os | |
import pandas as pd | |
import folium | |
from folium.plugins import MeasureControl, Fullscreen, MarkerCluster | |
from geopy.geocoders import Nominatim | |
from geopy.exc import GeocoderTimedOut, GeocoderServiceError | |
import time | |
import random | |
from typing import List, Tuple, Optional | |
import io | |
import tempfile | |
import warnings | |
warnings.filterwarnings("ignore") | |
# Map Tile Providers with reliable sources | |
MAP_TILES = { | |
"Toner": { | |
"url": "https://tiles.stadiamaps.com/tiles/stamen_toner/{z}/{x}/{y}.png", | |
"attr": "Stadia Maps" | |
} | |
} | |
# NuExtract API configuration | |
API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5" | |
headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"} | |
class SafeGeocoder: | |
def __init__(self): | |
user_agent = f"location_mapper_v1_{random.randint(1000, 9999)}" | |
self.geolocator = Nominatim(user_agent=user_agent, timeout=10) | |
self.cache = {} | |
self.last_request = 0 | |
def _respect_rate_limit(self): | |
current_time = time.time() | |
elapsed = current_time - self.last_request | |
if elapsed < 1.0: | |
time.sleep(1.0 - elapsed) | |
self.last_request = time.time() | |
def get_coords(self, location: str): | |
if not location or pd.isna(location): | |
return None | |
location = str(location).strip() | |
if location in self.cache: | |
return self.cache[location] | |
try: | |
self._respect_rate_limit() | |
result = self.geolocator.geocode(location) | |
if result: | |
coords = (result.latitude, result.longitude) | |
self.cache[location] = coords | |
return coords | |
self.cache[location] = None | |
return None | |
except Exception as e: | |
print(f"Geocoding error for '{location}': {e}") | |
self.cache[location] = None | |
return None | |
# NuExtract Functions | |
def extract_info(template, text): | |
try: | |
# Format prompt according to NuExtract-1.5 requirements | |
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>" | |
# Call API | |
payload = { | |
"inputs": prompt, | |
"parameters": { | |
"max_new_tokens": 1000, | |
"do_sample": False | |
} | |
} | |
response = requests.post(API_URL, headers=headers, json=payload) | |
# If the model is loading, inform the user | |
if response.status_code == 503: | |
response_json = response.json() | |
if "error" in response_json and "loading" in response_json["error"]: | |
estimated_time = response_json.get("estimated_time", "unknown") | |
return f"⏳ Model is loading (ETA: {int(float(estimated_time)) if isinstance(estimated_time, (int, float, str)) else 'unknown'} seconds)", "Please try again in a few minutes" | |
if response.status_code != 200: | |
return f"❌ API Error: {response.status_code}", response.text | |
# Process result | |
result = response.json() | |
# Handle different response formats | |
if isinstance(result, list) and len(result) > 0: | |
result_text = result[0].get("generated_text", "") | |
else: | |
result_text = str(result) | |
# Split at output marker if present | |
if "<|output|>" in result_text: | |
json_text = result_text.split("<|output|>")[1].strip() | |
else: | |
json_text = result_text | |
# Try to parse as JSON | |
try: | |
extracted = json.loads(json_text) | |
formatted = json.dumps(extracted, indent=2) | |
except json.JSONDecodeError: | |
return "❌ JSON parsing error", json_text | |
return "✅ Success", formatted | |
except Exception as e: | |
return f"❌ Error: {str(e)}", "{}" | |
def create_map(df, location_col): | |
# Initialize map with Toner style | |
m = folium.Map(location=[20, 0], zoom_start=2, control_scale=True) | |
# Add the single tile layer without controls | |
folium.TileLayer( | |
tiles=MAP_TILES["Toner"]["url"], | |
attr=MAP_TILES["Toner"]["attr"], | |
name="Toner", | |
overlay=False, | |
control=False | |
).add_to(m) | |
# Add plugins | |
Fullscreen().add_to(m) | |
MeasureControl(position='topright', primary_length_unit='kilometers').add_to(m) | |
# Process markers | |
geocoder = SafeGeocoder() | |
coords = [] | |
marker_cluster = MarkerCluster(name="Locations").add_to(m) | |
processed_count = 0 | |
for idx, row in df.iterrows(): | |
if pd.isna(row[location_col]): | |
continue | |
location = str(row[location_col]).strip() | |
# Get additional info | |
additional_info = "" | |
for col in df.columns: | |
if col != location_col and not pd.isna(row[col]): | |
additional_info += f"<br><b>{col}:</b> {row[col]}" | |
# Parse locations | |
try: | |
locations = [loc.strip() for loc in location.split(',') if loc.strip()] | |
if not locations: | |
locations = [location] | |
except: | |
locations = [location] | |
# Process each location | |
for loc in locations: | |
point = geocoder.get_coords(loc) | |
if point: | |
popup_content = f""" | |
<div style="min-width: 200px; max-width: 300px"> | |
<h4 style="font-family: 'Source Sans Pro', sans-serif; margin-bottom: 5px;">{loc}</h4> | |
<div style="font-family: 'Source Sans Pro', sans-serif; font-size: 14px;"> | |
{additional_info} | |
</div> | |
</div> | |
""" | |
folium.Marker( | |
location=point, | |
popup=folium.Popup(popup_content, max_width=300), | |
tooltip=loc, | |
icon=folium.Icon(color="blue", icon="info-sign") | |
).add_to(marker_cluster) | |
coords.append(point) | |
processed_count += 1 | |
# Set bounds | |
if coords: | |
m.fit_bounds(coords) | |
# Add custom font CSS | |
custom_css = """ | |
<style> | |
@import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600&display=swap'); | |
.leaflet-container { | |
font-family: 'Source Sans Pro', sans-serif; | |
} | |
</style> | |
""" | |
m.get_root().header.add_child(folium.Element(custom_css)) | |
return m._repr_html_(), processed_count | |
def process_excel(file, places_column): | |
if file is None: | |
return None, "No file uploaded", None | |
try: | |
# Handle file | |
if hasattr(file, 'name'): | |
df = pd.read_excel(file.name) | |
elif isinstance(file, bytes): | |
df = pd.read_excel(io.BytesIO(file)) | |
else: | |
df = pd.read_excel(file) | |
print(f"Columns in Excel file: {list(df.columns)}") | |
if places_column not in df.columns: | |
return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}", None | |
# Create map | |
map_html, processed_count = create_map(df, places_column) | |
# Save processed data | |
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp: | |
processed_path = tmp.name | |
df.to_excel(processed_path, index=False) | |
# Stats | |
total_locations = df[places_column].count() | |
success_rate = (processed_count / total_locations * 100) if total_locations > 0 else 0 | |
stats = f"Found {processed_count} of {total_locations} locations ({success_rate:.1f}%)" | |
return map_html, stats, processed_path | |
except Exception as e: | |
import traceback | |
trace = traceback.format_exc() | |
print(f"Error processing file: {e}\n{trace}") | |
return None, f"Error processing file: {str(e)}", None | |
# Create separate interfaces for each tab to avoid conflicts | |
# CSS for improved styling | |
custom_css = """ | |
<style> | |
@import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@300;400;600;700&display=swap'); | |
body, .gradio-container { | |
font-family: 'Source Sans Pro', sans-serif !important; | |
color: #333333; | |
} | |
h1 { | |
font-weight: 700 !important; | |
color: #2c6bb3 !important; | |
font-size: 2.5rem !important; | |
margin-bottom: 1rem !important; | |
} | |
h2 { | |
font-weight: 600 !important; | |
color: #4e8fd1 !important; | |
font-size: 1.5rem !important; | |
margin-top: 1rem !important; | |
margin-bottom: 0.75rem !important; | |
} | |
.gradio-button.primary { | |
background-color: #ff7518 !important; | |
} | |
.info-box { | |
background-color: #e8f4fd; | |
border-left: 4px solid #2c6bb3; | |
padding: 15px; | |
margin: 15px 0; | |
border-radius: 4px; | |
} | |
.file-upload-box { | |
border: 2px dashed #e0e0e0; | |
border-radius: 8px; | |
padding: 20px; | |
text-align: center; | |
transition: all 0.3s ease; | |
} | |
</style> | |
""" | |
# Text Extraction tab as a separate Blocks interface | |
with gr.Blocks(css=custom_css) as extraction_interface: | |
gr.HTML(""" | |
<div class="info-box"> | |
<h3 style="margin-top: 0;">Extract Structured Data from Text</h3> | |
<p>Use NuExtract-1.5 to automatically extract structured information from historical texts. Define the JSON template for the data you want to extract.</p> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
template = gr.Textbox( | |
label="JSON Template", | |
value='{"earthquake location": "", "dateline location": ""}', | |
lines=5 | |
) | |
text = gr.Textbox( | |
label="Text to Extract From", | |
value="Neues Erdbeben in Japan. Aus Tokio wird berichtet, daß in Yokohama bei einem Erdbeben sechs Personen getötet und 22 verwundet, in Tokio vier getötet und 22 verwundet wurden. In Yokohama seien 6VV Häuser zerstört worden. Die telephonische und telegraphische Verbindung zwischen Tokio und Osaka ist unterbrochen worden. Der Trambahnverkehr in Tokio liegt still. Auch der Eisenbahnverkehr zwischen Tokio und Yokohama ist unterbrochen. In Sngamo, einer Vorstadt von Tokio sind Brände ausgebrochen. Ein Eisenbahnzug stürzte in den Vajugawafluß zwischen Gotemba und Tokio. Sechs Züge wurden umgeworfen. Mit dem letzten japanischen Erdbeben sind seit eineinhalb Jahrtausenden bis heute in Japan 229 größere Erdbeben zu verzeichnen gewesen.", | |
lines=8 | |
) | |
extract_btn = gr.Button("Extract Information", variant="primary") | |
with gr.Column(): | |
status = gr.Textbox(label="Status") | |
output = gr.Textbox(label="Output", lines=10) | |
extract_btn.click( | |
fn=extract_info, | |
inputs=[template, text], | |
outputs=[status, output] | |
) | |
# Mapping tab as a separate Blocks interface | |
with gr.Blocks(css=custom_css) as mapping_interface: | |
gr.HTML(""" | |
<div class="info-box"> | |
<h3 style="margin-top: 0;">Map Your Historical Locations</h3> | |
<p>Upload an Excel file containing location data to create an interactive map visualization. The tool will geocode your locations and display them on a map.</p> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
excel_file = gr.File( | |
label="Upload Excel File", | |
file_types=[".xlsx", ".xls"], | |
elem_classes="file-upload-box" | |
) | |
places_column = gr.Textbox( | |
label="Location Column Name", | |
value="dateline_locations", | |
placeholder="e.g., 'dateline_locations', 'earthquake_locations', or 'place_of_distribution'" | |
) | |
process_btn = gr.Button("Generate Map", variant="primary") | |
with gr.Column(): | |
map_output = gr.HTML( | |
label="Interactive Map", | |
value=""" | |
<div style="text-align:center; height:70vh; display:flex; align-items:center; justify-content:center; | |
background-color:#f5f5f5; border:1px solid #e0e0e0; border-radius:8px;"> | |
<div> | |
<img src="https://cdn-icons-png.flaticon.com/512/854/854878.png" width="100"> | |
<p style="margin-top:20px; color:#666;">Your map will appear here after processing</p> | |
</div> | |
</div> | |
""" | |
) | |
stats_output = gr.Textbox( | |
label="Location Statistics", | |
lines=2 | |
) | |
processed_file = gr.File( | |
label="Download Processed Data", | |
visible=True, | |
interactive=False | |
) | |
def process_and_map(file, column): | |
if file is None: | |
return None, "Please upload an Excel file", None | |
try: | |
map_html, stats, processed_path = process_excel(file, column) | |
if map_html and processed_path: | |
# Create responsive container for the map | |
responsive_html = f""" | |
<div style="width:100%; height:70vh; margin:0; padding:0; border:1px solid #e0e0e0; border-radius:8px; overflow:hidden;"> | |
{map_html} | |
</div> | |
""" | |
return responsive_html, stats, processed_path | |
else: | |
return None, stats, None | |
except Exception as e: | |
import traceback | |
trace = traceback.format_exc() | |
print(f"Error in process_and_map: {e}\n{trace}") | |
return None, f"Error: {str(e)}", None | |
process_btn.click( | |
fn=process_and_map, | |
inputs=[excel_file, places_column], | |
outputs=[map_output, stats_output, processed_file] | |
) | |
# Main app with proper tab separation | |
with gr.Blocks(css=custom_css, title="Historical Data Analysis") as demo: | |
gr.HTML(""" | |
<div style="text-align: center; margin-bottom: 1rem"> | |
<h1>Historical Data Analysis Tools</h1> | |
<p style="font-size: 1.1rem; margin-top: -10px;">Extract, visualize, and analyze historical data with ease</p> | |
</div> | |
""") | |
with gr.Tabs() as tabs: | |
with gr.TabItem("🔍 Text Extraction"): | |
# Instead of duplicating content, use the interface | |
extraction_interface.render() | |
with gr.TabItem("📍 Location Mapping"): | |
# Instead of duplicating content, use the interface | |
mapping_interface.render() | |
gr.HTML(""" | |
<div style="text-align: center; margin-top: 2rem; padding-top: 1rem; border-top: 1px solid #eee; font-size: 0.9rem; color: #666;"> | |
<p>Made with <span style="color: #e25555;">❤</span> for historical data research</p> | |
</div> | |
""") | |
if __name__ == "__main__": | |
demo.launch() |