oberbics commited on
Commit
6e9dc4d
Β·
verified Β·
1 Parent(s): dd3a96b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +204 -43
app.py CHANGED
@@ -2,22 +2,153 @@ import gradio as gr
2
  import json
3
  import requests
4
  import os
 
 
 
 
 
 
 
 
 
5
 
6
- # Use the Hugging Face Inference API instead of loading the model
7
  API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
8
  headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"}
9
 
10
- def test_function(template, text):
11
- print(f"Test function called with template: {template[:30]} and text: {text[:30]}")
12
- return "Button clicked successfully", "Function was called"
 
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def extract_info(template, text):
15
  try:
16
  # Format prompt according to NuExtract-1.5 requirements
17
  prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
18
- print(f"Processing with prompt: {prompt[:100]}...")
19
 
20
- # Call API instead of using local model
21
  payload = {
22
  "inputs": prompt,
23
  "parameters": {
@@ -26,11 +157,9 @@ def extract_info(template, text):
26
  }
27
  }
28
 
29
- print("Calling API...")
30
  response = requests.post(API_URL, headers=headers, json=payload)
31
 
32
  if response.status_code != 200:
33
- print(f"API error: {response.status_code}, {response.text}")
34
  return f"❌ API Error: {response.status_code}", response.text
35
 
36
  # Process result
@@ -49,56 +178,88 @@ def extract_info(template, text):
49
  json_text = result_text
50
 
51
  # Try to parse as JSON
52
- print("Parsing JSON...")
53
  try:
54
  extracted = json.loads(json_text)
55
  formatted = json.dumps(extracted, indent=2)
56
  except json.JSONDecodeError:
57
- print(f"JSON parsing failed. Raw output: {json_text[:100]}...")
58
  return "❌ JSON parsing error", json_text
59
 
60
  return "βœ… Success", formatted
61
  except Exception as e:
62
- print(f"Error in extraction: {str(e)}")
63
  return f"❌ Error: {str(e)}", "{}"
64
 
65
- # Create a simple interface
66
  with gr.Blocks() as demo:
67
- gr.Markdown("# NuExtract-1.5 Extraction Tool")
68
 
69
- with gr.Row():
70
- with gr.Column():
71
- template = gr.Textbox(
72
- label="JSON Template",
73
- value='{"name": "", "email": ""}',
74
- lines=5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  )
76
- text = gr.Textbox(
77
- label="Text to Extract From",
78
- value="Contact: John Smith ([email protected])",
79
- lines=8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  )
81
 
82
- # Two buttons for testing
83
- test_btn = gr.Button("Test Click")
84
- extract_btn = gr.Button("Extract Information", variant="primary")
85
-
86
- with gr.Column():
87
- status = gr.Textbox(label="Status")
88
- output = gr.Textbox(label="Output", lines=10)
89
-
90
- # Connect both buttons
91
- test_btn.click(
92
- fn=test_function,
93
- inputs=[template, text],
94
- outputs=[status, output]
95
- )
96
-
97
- extract_btn.click(
98
- fn=extract_info,
99
- inputs=[template, text],
100
- outputs=[status, output]
101
- )
102
 
103
  if __name__ == "__main__":
104
  demo.launch()
 
2
  import json
3
  import requests
4
  import os
5
+ import pandas as pd
6
+ import folium
7
+ from geopy.geocoders import Nominatim
8
+ from geopy.exc import GeocoderTimedOut, GeocoderServiceError
9
+ import time
10
+ import random
11
+ from typing import List, Tuple, Optional
12
+ import tempfile
13
+ import io
14
 
15
+ # NuExtract API configuration
16
  API_URL = "https://api-inference.huggingface.co/models/numind/NuExtract-1.5"
17
  headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"}
18
 
19
+ # Geocoding Service
20
+ class GeocodingService:
21
+ def __init__(self, user_agent: str = None, timeout: int = 10, rate_limit: float = 1.1):
22
+ if user_agent is None:
23
+ user_agent = f"python_geocoding_script_{random.randint(1000, 9999)}"
24
 
25
+ self.geolocator = Nominatim(
26
+ user_agent=user_agent,
27
+ timeout=timeout
28
+ )
29
+ self.rate_limit = rate_limit
30
+ self.last_request = 0
31
+
32
+ def _rate_limit_wait(self):
33
+ current_time = time.time()
34
+ time_since_last = current_time - self.last_request
35
+ if time_since_last < self.rate_limit:
36
+ time.sleep(self.rate_limit - time_since_last)
37
+ self.last_request = time.time()
38
+
39
+ def geocode_location(self, location: str, max_retries: int = 3) -> Optional[Tuple[float, float]]:
40
+ for attempt in range(max_retries):
41
+ try:
42
+ self._rate_limit_wait()
43
+ location_data = self.geolocator.geocode(location)
44
+ if location_data:
45
+ return (location_data.latitude, location_data.longitude)
46
+ return None
47
+ except (GeocoderTimedOut, GeocoderServiceError) as e:
48
+ if attempt == max_retries - 1:
49
+ print(f"Failed to geocode '{location}' after {max_retries} attempts: {e}")
50
+ return None
51
+ time.sleep(2 ** attempt) # Exponential backoff
52
+ except Exception as e:
53
+ print(f"Error geocoding '{location}': {e}")
54
+ return None
55
+ return None
56
+
57
+ def process_locations(self, locations: str) -> List[Optional[Tuple[float, float]]]:
58
+ if pd.isna(locations) or not locations:
59
+ return []
60
+
61
+ location_list = [loc.strip() for loc in locations.split(',')]
62
+ return [self.geocode_location(loc) for loc in location_list]
63
+
64
+ # Mapping Functions
65
+ def create_location_map(df: pd.DataFrame,
66
+ coordinates_col: str = 'coordinates',
67
+ places_col: str = 'places',
68
+ title_col: Optional[str] = None) -> folium.Map:
69
+ # Initialize the map
70
+ m = folium.Map(location=[0, 0], zoom_start=2)
71
+ all_coords = []
72
+
73
+ # Process each row in the DataFrame
74
+ for idx, row in df.iterrows():
75
+ coordinates = row[coordinates_col]
76
+ places = row[places_col].split(',') if pd.notna(row[places_col]) else []
77
+ title = row[title_col] if title_col and pd.notna(row[title_col]) else None
78
+
79
+ # Skip if no coordinates
80
+ if not coordinates:
81
+ continue
82
+
83
+ # Add individual markers for each location
84
+ for i, (coord, place) in enumerate(zip(coordinates, places)):
85
+ if coord is not None: # Skip None coordinates
86
+ lat, lon = coord
87
+ place_name = place.strip()
88
+
89
+ # Create popup content
90
+ popup_content = f"<b>{place_name}</b>"
91
+ if title:
92
+ popup_content += f"<br>{title}"
93
+
94
+ # Add marker to the map
95
+ folium.Marker(
96
+ location=[lat, lon],
97
+ popup=folium.Popup(popup_content, max_width=300),
98
+ tooltip=place_name,
99
+ ).add_to(m)
100
+
101
+ all_coords.append([lat, lon])
102
+
103
+ # If we have coordinates, fit the map bounds to include all points
104
+ if all_coords:
105
+ m.fit_bounds(all_coords)
106
+
107
+ return m
108
+
109
+ # Processing Functions
110
+ def process_excel(file, places_column):
111
+ # Read the Excel file
112
+ df = pd.read_excel(io.BytesIO(file))
113
+
114
+ if places_column not in df.columns:
115
+ return None, f"Column '{places_column}' not found in the Excel file. Available columns: {', '.join(df.columns)}"
116
+
117
+ # Initialize the geocoding service
118
+ geocoder = GeocodingService(user_agent="gradio_map_visualization_app")
119
+
120
+ # Process locations and add coordinates
121
+ df['coordinates'] = df[places_column].apply(geocoder.process_locations)
122
+
123
+ # Create the map
124
+ map_obj = create_location_map(df, coordinates_col='coordinates', places_col=places_column)
125
+
126
+ # Save the map to a temporary HTML file
127
+ temp_map_path = "temp_map.html"
128
+ map_obj.save(temp_map_path)
129
+
130
+ # Save the processed DataFrame to Excel
131
+ processed_file_path = "processed_data.xlsx"
132
+ df.to_excel(processed_file_path, index=False)
133
+
134
+ # Statistics
135
+ total_locations = len(df)
136
+ successful_geocodes = df['coordinates'].apply(lambda x: len([c for c in x if c is not None])).sum()
137
+ failed_geocodes = df['coordinates'].apply(lambda x: len([c for c in x if c is None])).sum()
138
+
139
+ stats = f"Total locations: {total_locations}\n"
140
+ stats += f"Successfully geocoded: {successful_geocodes}\n"
141
+ stats += f"Failed to geocode: {failed_geocodes}"
142
+
143
+ return temp_map_path, stats, processed_file_path
144
+
145
+ # NuExtract Functions
146
  def extract_info(template, text):
147
  try:
148
  # Format prompt according to NuExtract-1.5 requirements
149
  prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
 
150
 
151
+ # Call API
152
  payload = {
153
  "inputs": prompt,
154
  "parameters": {
 
157
  }
158
  }
159
 
 
160
  response = requests.post(API_URL, headers=headers, json=payload)
161
 
162
  if response.status_code != 200:
 
163
  return f"❌ API Error: {response.status_code}", response.text
164
 
165
  # Process result
 
178
  json_text = result_text
179
 
180
  # Try to parse as JSON
 
181
  try:
182
  extracted = json.loads(json_text)
183
  formatted = json.dumps(extracted, indent=2)
184
  except json.JSONDecodeError:
 
185
  return "❌ JSON parsing error", json_text
186
 
187
  return "βœ… Success", formatted
188
  except Exception as e:
 
189
  return f"❌ Error: {str(e)}", "{}"
190
 
191
+ # Create the Gradio interface
192
  with gr.Blocks() as demo:
193
+ gr.Markdown("# Historical Data Analysis Tools")
194
 
195
+ with gr.Tabs():
196
+ with gr.TabItem("Text Extraction"):
197
+ gr.Markdown("## NuExtract-1.5 Structured Data Extraction")
198
+
199
+ with gr.Row():
200
+ with gr.Column():
201
+ template = gr.Textbox(
202
+ label="JSON Template",
203
+ value='{"name": "", "email": ""}',
204
+ lines=5
205
+ )
206
+ text = gr.Textbox(
207
+ label="Text to Extract From",
208
+ value="Contact: John Smith ([email protected])",
209
+ lines=8
210
+ )
211
+ extract_btn = gr.Button("Extract Information", variant="primary")
212
+
213
+ with gr.Column():
214
+ status = gr.Textbox(label="Status")
215
+ output = gr.Textbox(label="Output", lines=10)
216
+
217
+ extract_btn.click(
218
+ fn=extract_info,
219
+ inputs=[template, text],
220
+ outputs=[status, output]
221
  )
222
+
223
+ with gr.TabItem("Geocoding & Mapping"):
224
+ gr.Markdown("## Location Mapping Tool")
225
+
226
+ with gr.Row():
227
+ with gr.Column():
228
+ excel_file = gr.File(label="Upload Excel File")
229
+ places_column = gr.Textbox(label="Places Column Name", value="places")
230
+ process_btn = gr.Button("Process and Map", variant="primary")
231
+
232
+ with gr.Column():
233
+ map_output = gr.HTML(label="Map Visualization")
234
+ stats_output = gr.Textbox(label="Statistics", lines=3)
235
+ download_btn = gr.Button("Download Processed Data")
236
+ processed_file = gr.File(label="Processed Data", visible=False)
237
+
238
+ def process_and_map(file, column):
239
+ if file is None:
240
+ return None, "Please upload an Excel file", None
241
+
242
+ map_path, stats, processed_path = process_excel(file, column)
243
+
244
+ if map_path:
245
+ with open(map_path, "r") as f:
246
+ map_html = f.read()
247
+
248
+ return map_html, stats, processed_path
249
+ else:
250
+ return None, stats, None
251
+
252
+ process_btn.click(
253
+ fn=process_and_map,
254
+ inputs=[excel_file, places_column],
255
+ outputs=[map_output, stats_output, processed_file]
256
  )
257
 
258
+ download_btn.click(
259
+ fn=lambda x: x,
260
+ inputs=[processed_file],
261
+ outputs=[processed_file]
262
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
  if __name__ == "__main__":
265
  demo.launch()