import gradio as gr import json import requests import os import pandas as pd import folium from folium.plugins import MeasureControl, Fullscreen, MarkerCluster from geopy.geocoders import Nominatim from geopy.exc import GeocoderTimedOut, GeocoderServiceError import time import random from typing import List, Tuple, Optional import io import tempfile import warnings warnings.filterwarnings("ignore") # Map Tile Providers with reliable sources MAP_TILES = { "GreenMap": { "url": "https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}", "attr": "Esri" } } # NuExtract API configuration API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5" headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"} class SafeGeocoder: def __init__(self): user_agent = f"location_mapper_v1_{random.randint(1000, 9999)}" self.geolocator = Nominatim(user_agent=user_agent, timeout=10) self.cache = {} self.last_request = 0 def _respect_rate_limit(self): current_time = time.time() elapsed = current_time - self.last_request if elapsed < 1.0: time.sleep(1.0 - elapsed) self.last_request = time.time() def get_coords(self, location: str): if not location or pd.isna(location): return None location = str(location).strip() if location in self.cache: return self.cache[location] try: self._respect_rate_limit() result = self.geolocator.geocode(location) if result: coords = (result.latitude, result.longitude) self.cache[location] = coords return coords self.cache[location] = None return None except Exception as e: print(f"Geocoding error for '{location}': {e}") self.cache[location] = None return None def extract_info(template, text): try: prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>" payload = { "inputs": prompt, "parameters": { "max_new_tokens": 1000, "do_sample": False } } response = requests.post(API_URL, headers=headers, json=payload) if response.status_code == 503: response_json = response.json() if "error" in response_json and "loading" in response_json["error"]: estimated_time = response_json.get("estimated_time", "unknown") return f"⏳ Model is loading (ETA: {int(float(estimated_time)) if isinstance(estimated_time, (int, float, str)) else 'unknown'} seconds)", "Please try again in a few minutes" if response.status_code != 200: return f"❌ API Error: {response.status_code}", response.text result = response.json() if isinstance(result, list) and len(result) > 0: result_text = result[0].get("generated_text", "") else: result_text = str(result) if "<|output|>" in result_text: json_text = result_text.split("<|output|>")[1].strip() else: json_text = result_text try: extracted = json.loads(json_text) formatted = json.dumps(extracted, indent=2) except json.JSONDecodeError: return "❌ JSON parsing error", json_text return "✅ Success", formatted except Exception as e: return f"❌ Error: {str(e)}", "{}" def create_map(df, location_col): m = folium.Map( location=[20, 0], zoom_start=2, control_scale=True ) folium.TileLayer( tiles=MAP_TILES["GreenMap"]["url"], attr=MAP_TILES["GreenMap"]["attr"], name="GreenMap", overlay=False, control=False ).add_to(m) Fullscreen().add_to(m) MeasureControl(position='topright', primary_length_unit='kilometers').add_to(m) geocoder = SafeGeocoder() coords = [] marker_cluster = MarkerCluster(name="Locations").add_to(m) processed_count = 0 for idx, row in df.iterrows(): if pd.isna(row[location_col]): continue location = str(row[location_col]).strip() additional_info = "" for col in df.columns: if col != location_col and not pd.isna(row[col]): additional_info += f"
{col}: {row[col]}" try: locations = [loc.strip() for loc in location.split(',') if loc.strip()] if not locations: locations = [location] except: locations = [location] for loc in locations: point = geocoder.get_coords(loc) if point: popup_content = f"""

{loc}

{additional_info}
""" folium.Marker( location=point, popup=folium.Popup(popup_content, max_width=300), tooltip=loc, icon=folium.Icon(color="blue", icon="info-sign") ).add_to(marker_cluster) coords.append(point) processed_count += 1 if coords: m.fit_bounds(coords) custom_css = """ """ m.get_root().header.add_child(folium.Element(custom_css)) return m._repr_html_(), processed_count def process_excel(file, places_column): if file is None: return None, "No file uploaded", None try: if hasattr(file, 'name'): df = pd.read_excel(file.name) elif isinstance(file, bytes): df = pd.read_excel(io.BytesIO(file)) else: df = pd.read_excel(file) print(f"Columns in Excel file: {list(df.columns)}") if places_column not in df.columns: return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}", None map_html, processed_count = create_map(df, places_column) with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp: processed_path = tmp.name df.to_excel(processed_path, index=False) total_locations = df[places_column].count() success_rate = (processed_count / total_locations * 100) if total_locations > 0 else 0 stats = f"Found {processed_count} of {total_locations} locations ({success_rate:.1f}%)" return map_html, stats, processed_path except Exception as e: import traceback trace = traceback.format_exc() print(f"Error processing file: {e}\n{trace}") return None, f"Error processing file: {str(e)}", None custom_css = """ """ with gr.Blocks(css=custom_css, title="Historical Data Analysis") as demo: gr.HTML("""

Historical Data Analysis Tools

Extract, visualize, and analyze historical data with ease

""") with gr.Tabs() as tabs: with gr.TabItem("🔍 Text Extraction"): gr.HTML("""

Extract Structured Data from Text

Use NuExtract-1.5 to automatically extract structured information from historical texts.

""") with gr.Row(): with gr.Column(): template = gr.Textbox( label="JSON Template", value='{"earthquake location": "", "dateline location": ""}', lines=5 ) text = gr.Textbox( label="Text to Extract From", value="Neues Erdbeben in Japan. Aus Tokio wird berichtet...", lines=8 ) extract_btn = gr.Button("Extract Information", variant="primary") with gr.Column(): status = gr.Textbox(label="Status") output = gr.Textbox(label="Output", lines=10) extract_btn.click( fn=extract_info, inputs=[template, text], outputs=[status, output] ) with gr.TabItem("📍 Location Mapping"): gr.HTML("""

Map Your Historical Locations

Upload an Excel file containing location data to create an interactive map visualization.

""") with gr.Row(): with gr.Column(): excel_file = gr.File( label="Upload Excel File", file_types=[".xlsx", ".xls"], elem_classes="file-upload-box" ) places_column = gr.Textbox( label="Location Column Name", value="dateline_locations", placeholder="Enter the column containing locations" ) process_btn = gr.Button("Generate Map", variant="primary") with gr.Column(): map_output = gr.HTML( label="Interactive Map", value="""

Your map will appear here after processing

""", elem_id="map-container" ) stats_output = gr.Textbox( label="Location Statistics", lines=2, elem_classes="stats-box" ) processed_file = gr.File( label="Download Processed Data", visible=True, interactive=False ) def process_and_map(file, column): if file is None: return None, "Please upload an Excel file", None try: map_html, stats, processed_path = process_excel(file, column) if map_html and processed_path: responsive_html = f"""
{map_html}
""" return responsive_html, stats, processed_path else: return None, stats, None except Exception as e: import traceback trace = traceback.format_exc() print(f"Error in process_and_map: {e}\n{trace}") return None, f"Error: {str(e)}", None process_btn.click( fn=process_and_map, inputs=[excel_file, places_column], outputs=[map_output, stats_output, processed_file] ) gr.HTML("""

Made with for historical research

""") if __name__ == "__main__": demo.launch()