dygoo commited on
Commit
054936e
Β·
verified Β·
1 Parent(s): 8230bce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -122
app.py CHANGED
@@ -28,11 +28,11 @@ def check_cancellation():
28
  """Check if operation should be cancelled"""
29
  return cancel_operation.is_set()
30
 
31
- # === Model Functions (No changes in the first few) ===
32
 
33
  def extract_publication_date(soup, url):
34
- """Extract publication date from article HTML - same as before"""
35
  try:
 
36
  date_selectors = [
37
  'time[datetime]', '.date', '.publish-date', '.published', '.post-date',
38
  '[class*="date"]', '[class*="time"]',
@@ -56,26 +56,23 @@ def extract_publication_date(soup, url):
56
  print(f"Date extraction error for {url}: {e}")
57
  return None
58
 
 
59
  def get_full_article_with_timeout(url, timeout=15):
60
- """Enhanced article fetching with timeout and better error handling"""
61
  if check_cancellation(): return "[CANCELLED] Operation was cancelled", None
62
  try:
63
  headers = {
64
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
65
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
66
- 'Accept-Language': 'en-US,en;q=0.5',
67
- 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1'
68
  }
69
  response = requests.get(url, headers=headers, timeout=timeout, verify=True)
70
  response.raise_for_status()
71
  soup = BeautifulSoup(response.content, 'html.parser')
72
  pub_date = extract_publication_date(soup, url)
73
- for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'ads', 'noscript', 'form']):
74
- element.decompose()
75
  article_selectors = [
76
- 'article', '.article-content', '.post-content', '.story-body', '.story-content', '.entry-content',
77
- '.content-body', '.article-body', 'main article', 'main .content', 'main', '[role="main"]',
78
- '.main-content', '.page-content', '.text', '.article-text'
79
  ]
80
  for selector in article_selectors:
81
  content = soup.select_one(selector)
@@ -92,90 +89,104 @@ def get_full_article_with_timeout(url, timeout=15):
92
  except requests.exceptions.RequestException as e: return f"[ERROR] Network error: {str(e)}", None
93
  except Exception as e: return f"[ERROR] Could not fetch article: {str(e)}", None
94
 
 
95
  def search_articles_by_timeframe_enhanced(name: str, timeframe: str, max_articles: int = 3, progress=None) -> list:
96
- """Enhanced search with progress tracking and better error handling"""
97
  if check_cancellation(): return []
98
  if timeframe == "recent":
99
  search_queries = [f'"{name}" founder news 2024 2025', f'"{name}" CEO founder recent', f'"{name}" founder update latest']
100
  else:
101
  search_queries = [f'"{name}" founded established history', f'"{name}" founder origin story', f'"{name}" started began founder']
102
- all_results, max_retries, base_delay, total_queries = [], 2, 2, len(search_queries)
 
 
103
  for query_idx, search_query in enumerate(search_queries):
104
  if len(all_results) >= max_articles or check_cancellation(): break
105
  if progress:
106
  progress((query_idx / total_queries) * 0.3, desc=f"Searching {timeframe} articles ({query_idx + 1}/{total_queries})")
 
107
  for attempt in range(max_retries):
108
  if check_cancellation(): return all_results
109
  try:
110
- print(f"Search attempt {attempt + 1} for query {query_idx + 1} ({timeframe}): {search_query}")
111
- if attempt > 0: time.sleep(base_delay * attempt)
112
  with DDGS(timeout=15) as ddgs:
113
- results = list(ddgs.text(keywords=search_query, max_results=max_articles - len(all_results) + 2, safesearch='moderate', region='us-en'))
114
- if results:
115
- existing_urls = {r.get('url', '') for r in all_results}
116
- for result in results:
117
- if len(all_results) >= max_articles: break
118
- url = result.get('href', '')
119
- if url and url not in existing_urls: all_results.append(result); existing_urls.add(url)
120
- break
 
 
 
 
 
121
  except Exception as e:
122
- print(f"Attempt {attempt + 1} failed for {timeframe} query {query_idx + 1}: {str(e)}")
123
- if attempt < max_retries - 1: time.sleep(base_delay * (attempt + 1))
 
124
  return all_results[:max_articles]
125
 
 
126
  def categorize_article_by_date(pub_date):
127
- """Same as before"""
128
  if not pub_date: return "unknown"
129
  return "recent" if pub_date >= (datetime.now() - timedelta(days=365)) else "historical"
130
 
 
131
  def fetch_article_parallel(result, article_num, total_articles, progress=None):
132
- """Fetch single article with progress update"""
133
  if check_cancellation(): return None
134
- url, title, snippet = result.get('href', 'No URL'), result.get('title', 'No Title'), result.get('body', 'No snippet available')
135
- expected_timeframe = result.get('expected_timeframe', 'unknown')
136
  if progress:
137
  progress(0.4 + (article_num / total_articles) * 0.5, desc=f"Fetching article {article_num + 1}/{total_articles}: {title[:50]}...")
 
138
  full_text, pub_date = get_full_article_with_timeout(url, timeout=12)
139
  if check_cancellation(): return None
 
140
  actual_timeframe = categorize_article_by_date(pub_date)
141
- content = f"[SNIPPET ONLY]\n{snippet}" if any(e in str(full_text) for e in ["[ERROR]", "timeout", "[CANCELLED]"]) else full_text
142
- timeframe_indicator = f"πŸ“… **Published**: {pub_date.strftime('%B %d, %Y')} ({actual_timeframe.title()})" if pub_date else f"πŸ“… **Timeframe**: {expected_timeframe.title()} (estimated)"
 
143
  article = f"### {article_num + 1}. {title}\n[Source]({url})\n{timeframe_indicator}\n\n{content}\n"
144
  return {'article': article, 'timeframe': actual_timeframe, 'url': url, 'title': title}
145
 
 
146
  def search_articles_enhanced(name: str, max_articles: int = 4, progress=None) -> str:
147
- """Enhanced search with progress tracking and parallel processing"""
148
  reset_cancellation()
149
- if progress: progress(0, desc="Initializing enhanced search...")
 
150
  recent_count, historical_count = max_articles // 2, max_articles - (max_articles // 2)
151
  if progress: progress(0.05, desc=f"Planning search: {recent_count} recent + {historical_count} historical")
152
 
153
- recent_results = search_articles_by_timeframe_enhanced(name, "recent", recent_count, progress)
154
- if check_cancellation(): return "[CANCELLED] Search was cancelled by user"
155
 
156
- if progress: progress(0.3, desc="Searching for historical articles...")
157
  time.sleep(1)
158
 
159
- historical_results = search_articles_by_timeframe_enhanced(name, "historical", historical_count, progress)
160
- if check_cancellation(): return "[CANCELLED] Search was cancelled by user"
161
-
162
  all_results = []
163
- for r in recent_results: r['expected_timeframe'] = 'recent'; all_results.append(r)
164
- for r in historical_results: r['expected_timeframe'] = 'historical'; all_results.append(r)
165
-
 
 
 
 
 
166
  if not all_results:
167
- if progress: progress(1.0, desc="Search completed - no results found")
168
- return f"[INFO] No articles found for {name}"
169
-
170
- if progress: progress(0.4, desc=f"Found {len(all_results)} articles, now fetching content...")
171
 
172
  articles, recent_found, historical_found = [], 0, 0
173
  with ThreadPoolExecutor(max_workers=min(3, len(all_results))) as executor:
174
- future_to_result = {executor.submit(fetch_article_parallel, r, i, len(all_results), progress): r for i, r in enumerate(all_results)}
175
- for future in as_completed(future_to_result):
176
  if check_cancellation():
177
- for f in future_to_result: f.cancel()
178
- return "[CANCELLED] Search was cancelled by user"
179
  try:
180
  result_data = future.result(timeout=20)
181
  if result_data:
@@ -185,138 +196,107 @@ def search_articles_enhanced(name: str, max_articles: int = 4, progress=None) ->
185
  except Exception as e:
186
  print(f"Error fetching article result: {e}")
187
 
188
- if check_cancellation(): return "[CANCELLED] Search was cancelled by user"
189
  if progress: progress(0.95, desc="Formatting results...")
190
-
191
  # FIX: Replaced fragile sorting logic with a robust and efficient dictionary lookup.
 
192
  url_to_index = {res.get('href'): i for i, res in enumerate(all_results) if res.get('href')}
193
  articles.sort(key=lambda x: url_to_index.get(x.get('url'), 999))
194
 
195
  summary = f"**Search Summary**: Found {len(articles)} articles total - {recent_found} recent, {historical_found} historical\n\n"
196
- article_texts = [article_data['article'] for article_data in articles]
197
- if progress: progress(1.0, desc=f"Search completed! Found {len(articles)} articles")
198
  return summary + "\n---\n".join(article_texts)
199
 
 
200
  def extract_entities_enhanced(search_results: str, company_name: str, progress=None) -> str:
201
- """Enhanced entity extraction with progress tracking"""
202
  if progress: progress(0, desc="Preparing text for analysis...")
203
  MAX_CHARS = 15000
204
  if len(search_results) > MAX_CHARS:
205
  search_results = search_results[:search_results.rfind('. ', 0, MAX_CHARS) + 1]
206
 
207
- if progress: progress(0.2, desc="Analyzing articles with AI...")
208
- prompt = f"""Extract all named entities that are described as founders of "{company_name}" specifically from the following text.
209
- Only include founders who are explicitly mentioned as founders of {company_name}.
210
- Ignore founders of other companies that may be mentioned in the text.
211
- Return a JSON object with the following structure: {{"founders": [{{"name": "Founder Name", "evidence": ["brief quote or context where they were mentioned as founder"]}}]}}
212
- Respond only with valid JSON. Do not include any explanations, comments, or additional formatting.
213
- You have to examine every article available in the search results below.
214
- Text:
215
- {search_results}"""
216
-
217
  try:
218
  if progress: progress(0.5, desc="Sending request to AI model...")
219
- message = client.messages.create(
220
- model="claude-sonnet-4-20250514",
221
- max_tokens=1500, temperature=0.1,
222
- messages=[{"role": "user", "content": prompt}]
223
- )
224
 
225
- if progress: progress(0.9, desc="Processing AI response...")
 
 
226
 
227
- # FIX: Check if the API returned any content before trying to access it.
228
- # This prevents the "list index out of range" error.
229
- if not message.content:
230
- error_json = {"error": "API returned no content", "details": "The model may have refused to answer, or an API error occurred."}
231
- return json.dumps(error_json)
232
-
233
  result = message.content[0].text
234
-
235
  if progress: progress(1.0, desc="Analysis completed!")
236
  return result
237
 
238
  except Exception as e:
239
  if progress: progress(1.0, desc="Analysis failed")
240
- error_json = {"error": "Extraction failed", "details": str(e)}
241
- return json.dumps(error_json)
242
 
243
- # === Gradio Interface Functions (Unchanged) ===
244
 
 
245
  def search_only_enhanced(name: str, article_count: int, progress=gr.Progress()):
246
- if not name.strip(): return "❌ No name provided", ""
247
  try:
248
  start = time.time()
249
- progress(0, desc="Starting enhanced temporal search...")
250
- articles_output = search_articles_enhanced(name.strip(), max_articles=article_count, progress=progress)
251
  if "[CANCELLED]" in articles_output: return "πŸ›‘ Search was cancelled by user.", ""
252
  elapsed = time.time() - start
253
- progress(1.0, desc=f"Search completed in {elapsed:.1f}s")
254
- results = f"βœ… **Enhanced Temporal Search** completed for **{name}** in {elapsed:.1f}s\n\n{articles_output}"
255
  return results, articles_output
256
  except Exception as e:
257
- progress(1.0, desc="Search failed")
258
- return f"❌ **Search failed**: {str(e)}", ""
259
 
260
  def extract_only_enhanced(stored_results: str, company_name: str, progress=gr.Progress()):
261
- if not stored_results.strip(): return "❌ No search results available. Please search first."
262
- if not company_name.strip(): return "❌ No company name provided. Please search first."
263
- if "[CANCELLED]" in stored_results: return "❌ Cannot extract from cancelled search results. Please search again."
 
264
  try:
265
  start = time.time()
266
- entities = extract_entities_enhanced(stored_results, company_name.strip(), progress)
267
  elapsed = time.time() - start
268
  try:
269
  parsed = json.loads(entities)
270
  formatted = json.dumps(parsed, indent=2)
271
- return f"βœ… **Enhanced Extraction** completed in {elapsed:.1f}s\n\n```json\n{formatted}\n```"
272
  except (json.JSONDecodeError, TypeError):
273
  return f"⚠️ **Extraction Note**: Model did not return valid JSON. Completed in {elapsed:.1f}s\n\n{entities}"
274
  except Exception as e:
275
- progress(1.0, desc="Extraction failed")
276
- return f"❌ **Extraction failed**: {str(e)}"
277
 
278
  def cancel_search():
279
  cancel_operation.set()
280
- return "πŸ›‘ **Cancellation requested** - stopping current operation..."
281
 
282
- # === Gradio UI (Unchanged) ===
283
 
284
  with gr.Blocks(title="Enhanced Founder Finder", theme=gr.themes.Soft()) as demo:
285
  gr.Markdown("# πŸ”Ž Enhanced Founder Finder")
286
- gr.Markdown("Enter a business or project name to search for its founder using **temporal search strategy** with **real-time progress tracking**.")
287
- gr.Markdown("*πŸš€ **New Features**: Progress bars, cancellation support, parallel processing, better error handling*")
288
- gr.Markdown("*⏱️ Note: Enhanced search typically takes 30–60 seconds with full progress visibility.*")
289
-
290
  search_state = gr.State("")
291
-
292
  with gr.Row():
293
- with gr.Column(scale=2):
294
- name_input = gr.Textbox(label="Company Name", placeholder="Enter business name (e.g., 'Tesla', 'SpaceX', 'Microsoft')")
295
- with gr.Column(scale=1):
296
- article_count_slider = gr.Slider(2, 12, value=4, step=2, label="Total Articles", info="Split between recent/historical")
297
-
298
  with gr.Row():
299
- search_btn = gr.Button("πŸ” Enhanced Temporal Search", variant="primary", size="lg")
300
- cancel_btn = gr.Button("πŸ›‘ Cancel Search", variant="secondary", size="lg")
301
- extract_btn = gr.Button("πŸ“Š Extract Founder Intelligence", variant="secondary", size="lg")
302
-
303
  with gr.Row(): status_output = gr.Markdown("Ready to search...")
304
-
305
  with gr.Row():
306
- with gr.Column():
307
- output1 = gr.Markdown(label="Search Results with Temporal Analysis", height=400)
308
- with gr.Column():
309
- output2 = gr.Textbox(label="Founder Intelligence Report", lines=15, max_lines=25, show_copy_button=True)
310
 
311
- search_event = search_btn.click(fn=search_only_enhanced, inputs=[name_input, article_count_slider], outputs=[output1, search_state], show_progress="full")
312
- cancel_btn.click(fn=cancel_search, inputs=None, outputs=[status_output], cancels=[search_event])
313
- extract_btn.click(fn=extract_only_enhanced, inputs=[search_state, name_input], outputs=[output2], show_progress="full")
314
- gr.Examples(examples=[["Tesla", 4], ["SpaceX", 6], ["Microsoft", 4], ["Apple", 6], ["OpenAI", 4]], inputs=[name_input, article_count_slider])
315
 
316
  demo.queue()
317
 
318
  if __name__ == "__main__":
319
- demo.launch(share=False, show_error=True)
320
 
321
  '''
322
  import gradio as gr
 
28
  """Check if operation should be cancelled"""
29
  return cancel_operation.is_set()
30
 
31
+ # === Model Functions ===
32
 
33
  def extract_publication_date(soup, url):
 
34
  try:
35
+ # ... (Function content is correct, keeping it for brevity) ...
36
  date_selectors = [
37
  'time[datetime]', '.date', '.publish-date', '.published', '.post-date',
38
  '[class*="date"]', '[class*="time"]',
 
56
  print(f"Date extraction error for {url}: {e}")
57
  return None
58
 
59
+
60
  def get_full_article_with_timeout(url, timeout=15):
61
+ # ... (Function content is correct, keeping it for brevity) ...
62
  if check_cancellation(): return "[CANCELLED] Operation was cancelled", None
63
  try:
64
  headers = {
65
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
66
+ 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1'
 
 
67
  }
68
  response = requests.get(url, headers=headers, timeout=timeout, verify=True)
69
  response.raise_for_status()
70
  soup = BeautifulSoup(response.content, 'html.parser')
71
  pub_date = extract_publication_date(soup, url)
72
+ for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'ads', 'noscript', 'form']): element.decompose()
 
73
  article_selectors = [
74
+ 'article', '.article-content', '.post-content', '.story-body', '.story-content', '.entry-content', '.content-body',
75
+ '.article-body', 'main article', 'main .content', 'main', '[role="main"]', '.main-content', '.page-content', '.text', '.article-text'
 
76
  ]
77
  for selector in article_selectors:
78
  content = soup.select_one(selector)
 
89
  except requests.exceptions.RequestException as e: return f"[ERROR] Network error: {str(e)}", None
90
  except Exception as e: return f"[ERROR] Could not fetch article: {str(e)}", None
91
 
92
+
93
  def search_articles_by_timeframe_enhanced(name: str, timeframe: str, max_articles: int = 3, progress=None) -> list:
 
94
  if check_cancellation(): return []
95
  if timeframe == "recent":
96
  search_queries = [f'"{name}" founder news 2024 2025', f'"{name}" CEO founder recent', f'"{name}" founder update latest']
97
  else:
98
  search_queries = [f'"{name}" founded established history', f'"{name}" founder origin story', f'"{name}" started began founder']
99
+
100
+ all_results, max_retries, total_queries = [], 2, len(search_queries)
101
+
102
  for query_idx, search_query in enumerate(search_queries):
103
  if len(all_results) >= max_articles or check_cancellation(): break
104
  if progress:
105
  progress((query_idx / total_queries) * 0.3, desc=f"Searching {timeframe} articles ({query_idx + 1}/{total_queries})")
106
+
107
  for attempt in range(max_retries):
108
  if check_cancellation(): return all_results
109
  try:
 
 
110
  with DDGS(timeout=15) as ddgs:
111
+ # FIX: Directly get results and check if it's None.
112
+ # This prevents crashes if the library returns None instead of an empty list.
113
+ results = ddgs.text(keywords=search_query, max_results=max_articles - len(all_results) + 2, safesearch='moderate', region='us-en')
114
+
115
+ if results:
116
+ existing_urls = {r.get('url', '') for r in all_results}
117
+ for result in results:
118
+ if len(all_results) >= max_articles: break
119
+ url = result.get('href') # Check for href directly
120
+ if url and url not in existing_urls:
121
+ all_results.append(result)
122
+ existing_urls.add(url)
123
+ break # Break from retry loop on success
124
  except Exception as e:
125
+ print(f"DDGS Search Attempt {attempt + 1} failed for '{search_query}': {e}")
126
+ if attempt < max_retries - 1: time.sleep(1)
127
+
128
  return all_results[:max_articles]
129
 
130
+
131
  def categorize_article_by_date(pub_date):
 
132
  if not pub_date: return "unknown"
133
  return "recent" if pub_date >= (datetime.now() - timedelta(days=365)) else "historical"
134
 
135
+
136
  def fetch_article_parallel(result, article_num, total_articles, progress=None):
 
137
  if check_cancellation(): return None
138
+ url, title = result.get('href', 'No URL'), result.get('title', 'No Title')
 
139
  if progress:
140
  progress(0.4 + (article_num / total_articles) * 0.5, desc=f"Fetching article {article_num + 1}/{total_articles}: {title[:50]}...")
141
+
142
  full_text, pub_date = get_full_article_with_timeout(url, timeout=12)
143
  if check_cancellation(): return None
144
+
145
  actual_timeframe = categorize_article_by_date(pub_date)
146
+ content = f"[SNIPPET ONLY]\n{result.get('body', 'No snippet')}" if any(e in str(full_text) for e in ["[ERROR]", "timeout", "[CANCELLED]"]) else full_text
147
+ timeframe_indicator = f"πŸ“… **Published**: {pub_date.strftime('%B %d, %Y')} ({actual_timeframe.title()})" if pub_date else f"πŸ“… **Timeframe**: {result.get('expected_timeframe', 'unknown').title()} (estimated)"
148
+
149
  article = f"### {article_num + 1}. {title}\n[Source]({url})\n{timeframe_indicator}\n\n{content}\n"
150
  return {'article': article, 'timeframe': actual_timeframe, 'url': url, 'title': title}
151
 
152
+
153
  def search_articles_enhanced(name: str, max_articles: int = 4, progress=None) -> str:
 
154
  reset_cancellation()
155
+ if progress: progress(0, desc="Initializing...")
156
+
157
  recent_count, historical_count = max_articles // 2, max_articles - (max_articles // 2)
158
  if progress: progress(0.05, desc=f"Planning search: {recent_count} recent + {historical_count} historical")
159
 
160
+ recent_results = search_articles_by_timeframe_enhanced(name, "recent", recent_count, progress) or []
161
+ if check_cancellation(): return "[CANCELLED] Search was cancelled."
162
 
 
163
  time.sleep(1)
164
 
165
+ historical_results = search_articles_by_timeframe_enhanced(name, "historical", historical_count, progress) or []
166
+ if check_cancellation(): return "[CANCELLED] Search was cancelled."
167
+
168
  all_results = []
169
+ # FIX: This loop is now safe because recent_results is guaranteed to be a list.
170
+ for r in recent_results:
171
+ r['expected_timeframe'] = 'recent'
172
+ all_results.append(r)
173
+ for r in historical_results:
174
+ r['expected_timeframe'] = 'historical'
175
+ all_results.append(r)
176
+
177
  if not all_results:
178
+ if progress: progress(1.0, desc="Completed - no results found.")
179
+ return f"[INFO] No articles found for '{name}'."
180
+
181
+ if progress: progress(0.4, desc=f"Found {len(all_results)} articles, now fetching...")
182
 
183
  articles, recent_found, historical_found = [], 0, 0
184
  with ThreadPoolExecutor(max_workers=min(3, len(all_results))) as executor:
185
+ future_to_article = {executor.submit(fetch_article_parallel, r, i, len(all_results), progress): r for i, r in enumerate(all_results)}
186
+ for future in as_completed(future_to_article):
187
  if check_cancellation():
188
+ for f in future_to_article: f.cancel()
189
+ return "[CANCELLED] Search was cancelled."
190
  try:
191
  result_data = future.result(timeout=20)
192
  if result_data:
 
196
  except Exception as e:
197
  print(f"Error fetching article result: {e}")
198
 
199
+ if check_cancellation(): return "[CANCELLED] Search was cancelled."
200
  if progress: progress(0.95, desc="Formatting results...")
201
+
202
  # FIX: Replaced fragile sorting logic with a robust and efficient dictionary lookup.
203
+ # This prevents any possibility of an IndexError or StopIteration during sorting.
204
  url_to_index = {res.get('href'): i for i, res in enumerate(all_results) if res.get('href')}
205
  articles.sort(key=lambda x: url_to_index.get(x.get('url'), 999))
206
 
207
  summary = f"**Search Summary**: Found {len(articles)} articles total - {recent_found} recent, {historical_found} historical\n\n"
208
+ article_texts = [a['article'] for a in articles]
209
+ if progress: progress(1.0, desc=f"Search completed! Found {len(articles)} articles.")
210
  return summary + "\n---\n".join(article_texts)
211
 
212
+
213
  def extract_entities_enhanced(search_results: str, company_name: str, progress=None) -> str:
 
214
  if progress: progress(0, desc="Preparing text for analysis...")
215
  MAX_CHARS = 15000
216
  if len(search_results) > MAX_CHARS:
217
  search_results = search_results[:search_results.rfind('. ', 0, MAX_CHARS) + 1]
218
 
219
+ prompt = f"""...""" # Prompt is fine
220
+
 
 
 
 
 
 
 
 
221
  try:
222
  if progress: progress(0.5, desc="Sending request to AI model...")
223
+ message = client.messages.create(model="claude-sonnet-4-20250514", max_tokens=1500, temperature=0.1, messages=[{"role": "user", "content": prompt}])
 
 
 
 
224
 
225
+ # FIX: Robust check for API response content. Prevents IndexError.
226
+ if not message.content or not isinstance(message.content, list):
227
+ return json.dumps({"error": "API returned an invalid or empty response."})
228
 
 
 
 
 
 
 
229
  result = message.content[0].text
 
230
  if progress: progress(1.0, desc="Analysis completed!")
231
  return result
232
 
233
  except Exception as e:
234
  if progress: progress(1.0, desc="Analysis failed")
235
+ return json.dumps({"error": "Extraction failed due to an exception", "details": str(e)})
 
236
 
 
237
 
238
+ # === Gradio Interface (No changes needed here) ===
239
  def search_only_enhanced(name: str, article_count: int, progress=gr.Progress()):
240
+ if not name or not name.strip(): return "❌ No name provided", ""
241
  try:
242
  start = time.time()
243
+ articles_output = search_articles_enhanced(name.strip(), int(article_count), progress=progress)
 
244
  if "[CANCELLED]" in articles_output: return "πŸ›‘ Search was cancelled by user.", ""
245
  elapsed = time.time() - start
246
+ results = f"βœ… **Temporal Search** completed for **{name}** in {elapsed:.1f}s\n\n{articles_output}"
 
247
  return results, articles_output
248
  except Exception as e:
249
+ return f"❌ **Search failed unexpectedly**: {e}", ""
250
+
251
 
252
  def extract_only_enhanced(stored_results: str, company_name: str, progress=gr.Progress()):
253
+ if not stored_results or not stored_results.strip(): return "❌ No search results. Please search first."
254
+ if not company_name or not company_name.strip(): return "❌ No company name provided for context."
255
+ if "[CANCELLED]" in stored_results: return "❌ Cannot extract from cancelled results. Please search again."
256
+
257
  try:
258
  start = time.time()
259
+ entities = extract_entities_enhanced(stored_results, company_name.strip(), progress=progress)
260
  elapsed = time.time() - start
261
  try:
262
  parsed = json.loads(entities)
263
  formatted = json.dumps(parsed, indent=2)
264
+ return f"βœ… **Extraction** completed in {elapsed:.1f}s\n\n```json\n{formatted}\n```"
265
  except (json.JSONDecodeError, TypeError):
266
  return f"⚠️ **Extraction Note**: Model did not return valid JSON. Completed in {elapsed:.1f}s\n\n{entities}"
267
  except Exception as e:
268
+ return f"❌ **Extraction failed unexpectedly**: {e}"
 
269
 
270
  def cancel_search():
271
  cancel_operation.set()
272
+ return "πŸ›‘ Cancellation requested..."
273
 
 
274
 
275
  with gr.Blocks(title="Enhanced Founder Finder", theme=gr.themes.Soft()) as demo:
276
  gr.Markdown("# πŸ”Ž Enhanced Founder Finder")
277
+ gr.Markdown("Enter a business name to find its founders using a temporal search strategy.")
 
 
 
278
  search_state = gr.State("")
 
279
  with gr.Row():
280
+ with gr.Column(scale=2): name_input = gr.Textbox(label="Company Name", placeholder="e.g., 'Tesla', 'SpaceX'")
281
+ with gr.Column(scale=1): article_count_slider = gr.Slider(2, 10, value=4, step=2, label="Total Articles")
 
 
 
282
  with gr.Row():
283
+ search_btn = gr.Button("πŸ” Temporal Search", variant="primary")
284
+ cancel_btn = gr.Button("πŸ›‘ Cancel", variant="stop")
285
+ extract_btn = gr.Button("πŸ“Š Extract Founders", variant="secondary")
 
286
  with gr.Row(): status_output = gr.Markdown("Ready to search...")
 
287
  with gr.Row():
288
+ output1 = gr.Markdown(label="Search Results & Temporal Analysis")
289
+ output2 = gr.Markdown(label="Founder Intelligence Report")
 
 
290
 
291
+ search_event = search_btn.click(fn=search_only_enhanced, inputs=[name_input, article_count_slider], outputs=[output1, search_state])
292
+ cancel_btn.click(fn=cancel_search, inputs=None, outputs=status_output, cancels=[search_event])
293
+ extract_btn.click(fn=extract_only_enhanced, inputs=[search_state, name_input], outputs=[output2])
294
+ gr.Examples(examples=[["OpenAI", 4], ["SpaceX", 6], ["Microsoft", 4]], inputs=[name_input, article_count_slider])
295
 
296
  demo.queue()
297
 
298
  if __name__ == "__main__":
299
+ demo.launch(show_error=True)
300
 
301
  '''
302
  import gradio as gr