Founder_Name_Extraction_v3

Sleeping

App Files Files Community

dygoo commited on Jun 9

Commit

41dc56e

verified ·

1 Parent(s): 054936e

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -143

app.py CHANGED Viewed

@@ -21,18 +21,16 @@ client = anthropic.Anthropic(
 cancel_operation = threading.Event()
 def reset_cancellation():
-    """Reset the cancellation flag"""
     cancel_operation.clear()
 def check_cancellation():
-    """Check if operation should be cancelled"""
     return cancel_operation.is_set()
-# === Model Functions ===
 def extract_publication_date(soup, url):
     try:
-        # ... (Function content is correct, keeping it for brevity) ...
         date_selectors = [
             'time[datetime]', '.date', '.publish-date', '.published', '.post-date',
             '[class*="date"]', '[class*="time"]',
@@ -56,20 +54,21 @@ def extract_publication_date(soup, url):
         print(f"Date extraction error for {url}: {e}")
     return None
 def get_full_article_with_timeout(url, timeout=15):
-    # ... (Function content is correct, keeping it for brevity) ...
     if check_cancellation(): return "[CANCELLED] Operation was cancelled", None
     try:
         headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
             'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1'
         }
         response = requests.get(url, headers=headers, timeout=timeout, verify=True)
         response.raise_for_status()
         soup = BeautifulSoup(response.content, 'html.parser')
         pub_date = extract_publication_date(soup, url)
-        for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'ads', 'noscript', 'form']): element.decompose()
         article_selectors = [
             'article', '.article-content', '.post-content', '.story-body', '.story-content', '.entry-content', '.content-body',
             '.article-body', 'main article', 'main .content', 'main', '[role="main"]', '.main-content', '.page-content', '.text', '.article-text'
@@ -77,221 +76,237 @@ def get_full_article_with_timeout(url, timeout=15):
         for selector in article_selectors:
             content = soup.select_one(selector)
             if content:
-                paragraphs = content.find_all(['p', 'div'], string=True)
-                if paragraphs:
-                    text_parts = [p.get_text(strip=True) for p in paragraphs if len(p.get_text(strip=True)) > 30]
                     full_text = '\n\n'.join(text_parts)
                     if len(full_text) > 300: return full_text[:10000], pub_date
         body_text = soup.get_text(separator='\n\n', strip=True)
-        body_text = re.sub(r'\n{3,}', '\n\n', body_text)
         return (body_text[:10000] if len(body_text) > 300 else "[INFO] Could not extract substantial content"), pub_date
-    except requests.exceptions.Timeout: return "[WARNING] Article fetch timeout - using snippet instead", None
-    except requests.exceptions.RequestException as e: return f"[ERROR] Network error: {str(e)}", None
-    except Exception as e: return f"[ERROR] Could not fetch article: {str(e)}", None
 def search_articles_by_timeframe_enhanced(name: str, timeframe: str, max_articles: int = 3, progress=None) -> list:
     if check_cancellation(): return []
-    if timeframe == "recent":
-        search_queries = [f'"{name}" founder news 2024 2025', f'"{name}" CEO founder recent', f'"{name}" founder update latest']
-    else:
-        search_queries = [f'"{name}" founded established history', f'"{name}" founder origin story', f'"{name}" started began founder']
-    all_results, max_retries, total_queries = [], 2, len(search_queries)
     for query_idx, search_query in enumerate(search_queries):
         if len(all_results) >= max_articles or check_cancellation(): break
         if progress:
-            progress((query_idx / total_queries) * 0.3, desc=f"Searching {timeframe} articles ({query_idx + 1}/{total_queries})")
-        for attempt in range(max_retries):
-            if check_cancellation(): return all_results
-            try:
-                with DDGS(timeout=15) as ddgs:
-                    # FIX: Directly get results and check if it's None.
-                    # This prevents crashes if the library returns None instead of an empty list.
-                    results = ddgs.text(keywords=search_query, max_results=max_articles - len(all_results) + 2, safesearch='moderate', region='us-en')
-                    if results:
-                        existing_urls = {r.get('url', '') for r in all_results}
-                        for result in results:
                             if len(all_results) >= max_articles: break
-                            url = result.get('href') # Check for href directly
-                            if url and url not in existing_urls:
-                                all_results.append(result)
-                                existing_urls.add(url)
-                        break # Break from retry loop on success
-            except Exception as e:
-                print(f"DDGS Search Attempt {attempt + 1} failed for '{search_query}': {e}")
-                if attempt < max_retries - 1: time.sleep(1)
-    return all_results[:max_articles]
 def categorize_article_by_date(pub_date):
     if not pub_date: return "unknown"
     return "recent" if pub_date >= (datetime.now() - timedelta(days=365)) else "historical"
 def fetch_article_parallel(result, article_num, total_articles, progress=None):
-    if check_cancellation(): return None
-    url, title = result.get('href', 'No URL'), result.get('title', 'No Title')
     if progress:
-        progress(0.4 + (article_num / total_articles) * 0.5, desc=f"Fetching article {article_num + 1}/{total_articles}: {title[:50]}...")
-    full_text, pub_date = get_full_article_with_timeout(url, timeout=12)
     if check_cancellation(): return None
     actual_timeframe = categorize_article_by_date(pub_date)
-    content = f"[SNIPPET ONLY]\n{result.get('body', 'No snippet')}" if any(e in str(full_text) for e in ["[ERROR]", "timeout", "[CANCELLED]"]) else full_text
     timeframe_indicator = f"📅 **Published**: {pub_date.strftime('%B %d, %Y')} ({actual_timeframe.title()})" if pub_date else f"📅 **Timeframe**: {result.get('expected_timeframe', 'unknown').title()} (estimated)"
-    article = f"### {article_num + 1}. {title}\n[Source]({url})\n{timeframe_indicator}\n\n{content}\n"
-    return {'article': article, 'timeframe': actual_timeframe, 'url': url, 'title': title}
-def search_articles_enhanced(name: str, max_articles: int = 4, progress=None) -> str:
     reset_cancellation()
     if progress: progress(0, desc="Initializing...")
     recent_count, historical_count = max_articles // 2, max_articles - (max_articles // 2)
-    if progress: progress(0.05, desc=f"Planning search: {recent_count} recent + {historical_count} historical")
-    recent_results = search_articles_by_timeframe_enhanced(name, "recent", recent_count, progress) or []
-    if check_cancellation(): return "[CANCELLED] Search was cancelled."
-    time.sleep(1)
-    historical_results = search_articles_by_timeframe_enhanced(name, "historical", historical_count, progress) or []
-    if check_cancellation(): return "[CANCELLED] Search was cancelled."
-    all_results = []
-    # FIX: This loop is now safe because recent_results is guaranteed to be a list.
-    for r in recent_results:
-        r['expected_timeframe'] = 'recent'
-        all_results.append(r)
-    for r in historical_results:
-        r['expected_timeframe'] = 'historical'
-        all_results.append(r)
-    if not all_results:
-        if progress: progress(1.0, desc="Completed - no results found.")
         return f"[INFO] No articles found for '{name}'."
-    if progress: progress(0.4, desc=f"Found {len(all_results)} articles, now fetching...")
     articles, recent_found, historical_found = [], 0, 0
-    with ThreadPoolExecutor(max_workers=min(3, len(all_results))) as executor:
-        future_to_article = {executor.submit(fetch_article_parallel, r, i, len(all_results), progress): r for i, r in enumerate(all_results)}
         for future in as_completed(future_to_article):
-            if check_cancellation():
-                for f in future_to_article: f.cancel()
-                return "[CANCELLED] Search was cancelled."
             try:
                 result_data = future.result(timeout=20)
                 if result_data:
                     articles.append(result_data)
-                    if result_data['timeframe'] == "recent": recent_found += 1
-                    elif result_data['timeframe'] == "historical": historical_found += 1
             except Exception as e:
-                print(f"Error fetching article result: {e}")
-    if check_cancellation(): return "[CANCELLED] Search was cancelled."
-    if progress: progress(0.95, desc="Formatting results...")
-    # FIX: Replaced fragile sorting logic with a robust and efficient dictionary lookup.
-    # This prevents any possibility of an IndexError or StopIteration during sorting.
-    url_to_index = {res.get('href'): i for i, res in enumerate(all_results) if res.get('href')}
     articles.sort(key=lambda x: url_to_index.get(x.get('url'), 999))
-    summary = f"**Search Summary**: Found {len(articles)} articles total - {recent_found} recent, {historical_found} historical\n\n"
-    article_texts = [a['article'] for a in articles]
-    if progress: progress(1.0, desc=f"Search completed! Found {len(articles)} articles.")
     return summary + "\n---\n".join(article_texts)
 def extract_entities_enhanced(search_results: str, company_name: str, progress=None) -> str:
-    if progress: progress(0, desc="Preparing text for analysis...")
     MAX_CHARS = 15000
     if len(search_results) > MAX_CHARS:
-        search_results = search_results[:search_results.rfind('. ', 0, MAX_CHARS) + 1]
-    prompt = f"""...""" # Prompt is fine
     try:
-        if progress: progress(0.5, desc="Sending request to AI model...")
-        message = client.messages.create(model="claude-sonnet-4-20250514", max_tokens=1500, temperature=0.1, messages=[{"role": "user", "content": prompt}])
-        # FIX: Robust check for API response content. Prevents IndexError.
-        if not message.content or not isinstance(message.content, list):
-            return json.dumps({"error": "API returned an invalid or empty response."})
-        result = message.content[0].text
-        if progress: progress(1.0, desc="Analysis completed!")
-        return result
     except Exception as e:
-        if progress: progress(1.0, desc="Analysis failed")
-        return json.dumps({"error": "Extraction failed due to an exception", "details": str(e)})
-# === Gradio Interface (No changes needed here) ===
-def search_only_enhanced(name: str, article_count: int, progress=gr.Progress()):
-    if not name or not name.strip(): return "❌ No name provided", ""
-    try:
-        start = time.time()
-        articles_output = search_articles_enhanced(name.strip(), int(article_count), progress=progress)
-        if "[CANCELLED]" in articles_output: return "🛑 Search was cancelled by user.", ""
-        elapsed = time.time() - start
-        results = f"✅ **Temporal Search** completed for **{name}** in {elapsed:.1f}s\n\n{articles_output}"
-        return results, articles_output
-    except Exception as e:
-        return f"❌ **Search failed unexpectedly**: {e}", ""
-def extract_only_enhanced(stored_results: str, company_name: str, progress=gr.Progress()):
-    if not stored_results or not stored_results.strip(): return "❌ No search results. Please search first."
-    if not company_name or not company_name.strip(): return "❌ No company name provided for context."
-    if "[CANCELLED]" in stored_results: return "❌ Cannot extract from cancelled results. Please search again."
     try:
-        start = time.time()
-        entities = extract_entities_enhanced(stored_results, company_name.strip(), progress=progress)
-        elapsed = time.time() - start
         try:
-            parsed = json.loads(entities)
             formatted = json.dumps(parsed, indent=2)
-            return f"✅ **Extraction** completed in {elapsed:.1f}s\n\n```json\n{formatted}\n```"
         except (json.JSONDecodeError, TypeError):
-            return f"⚠️ **Extraction Note**: Model did not return valid JSON. Completed in {elapsed:.1f}s\n\n{entities}"
     except Exception as e:
-        return f"❌ **Extraction failed unexpectedly**: {e}"
-def cancel_search():
     cancel_operation.set()
     return "🛑 Cancellation requested..."
 with gr.Blocks(title="Enhanced Founder Finder", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🔎 Enhanced Founder Finder")
-    gr.Markdown("Enter a business name to find its founders using a temporal search strategy.")
-    search_state = gr.State("")
     with gr.Row():
         with gr.Column(scale=2): name_input = gr.Textbox(label="Company Name", placeholder="e.g., 'Tesla', 'SpaceX'")
-        with gr.Column(scale=1): article_count_slider = gr.Slider(2, 10, value=4, step=2, label="Total Articles")
     with gr.Row():
-        search_btn = gr.Button("🔍 Temporal Search", variant="primary")
         cancel_btn = gr.Button("🛑 Cancel", variant="stop")
-        extract_btn = gr.Button("📊 Extract Founders", variant="secondary")
-    with gr.Row(): status_output = gr.Markdown("Ready to search...")
-    with gr.Row():
-        output1 = gr.Markdown(label="Search Results & Temporal Analysis")
-        output2 = gr.Markdown(label="Founder Intelligence Report")
-    search_event = search_btn.click(fn=search_only_enhanced, inputs=[name_input, article_count_slider], outputs=[output1, search_state])
-    cancel_btn.click(fn=cancel_search, inputs=None, outputs=status_output, cancels=[search_event])
-    extract_btn.click(fn=extract_only_enhanced, inputs=[search_state, name_input], outputs=[output2])
-    gr.Examples(examples=[["OpenAI", 4], ["SpaceX", 6], ["Microsoft", 4]], inputs=[name_input, article_count_slider])
 demo.queue()

 cancel_operation = threading.Event()
 def reset_cancellation():
     cancel_operation.clear()
 def check_cancellation():
     return cancel_operation.is_set()
+# === Model Functions (Hardened for Stability) ===
 def extract_publication_date(soup, url):
     try:
+        # This function is generally safe, no changes needed.
         date_selectors = [
             'time[datetime]', '.date', '.publish-date', '.published', '.post-date',
             '[class*="date"]', '[class*="time"]',
         print(f"Date extraction error for {url}: {e}")
     return None
 def get_full_article_with_timeout(url, timeout=15):
+    # This function is generally safe, no changes needed.
     if check_cancellation(): return "[CANCELLED] Operation was cancelled", None
     try:
         headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
             'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1'
         }
         response = requests.get(url, headers=headers, timeout=timeout, verify=True)
         response.raise_for_status()
         soup = BeautifulSoup(response.content, 'html.parser')
         pub_date = extract_publication_date(soup, url)
+        for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'ads', 'noscript', 'form']):
+            element.decompose()
         article_selectors = [
             'article', '.article-content', '.post-content', '.story-body', '.story-content', '.entry-content', '.content-body',
             '.article-body', 'main article', 'main .content', 'main', '[role="main"]', '.main-content', '.page-content', '.text', '.article-text'
         for selector in article_selectors:
             content = soup.select_one(selector)
             if content:
+                text_parts = [p.get_text(strip=True) for p in content.find_all(['p', 'div'], string=True) if len(p.get_text(strip=True)) > 30]
+                if text_parts:
                     full_text = '\n\n'.join(text_parts)
                     if len(full_text) > 300: return full_text[:10000], pub_date
         body_text = soup.get_text(separator='\n\n', strip=True)
         return (body_text[:10000] if len(body_text) > 300 else "[INFO] Could not extract substantial content"), pub_date
+    except requests.exceptions.RequestException as e:
+        return f"[ERROR] Network error for {url}: {e}", None
+    except Exception as e:
+        return f"[ERROR] Could not fetch article {url}: {e}", None
 def search_articles_by_timeframe_enhanced(name: str, timeframe: str, max_articles: int = 3, progress=None) -> list:
     if check_cancellation(): return []
+    queries = {
+        "recent": [f'"{name}" founder news 2024 2025', f'"{name}" CEO founder recent'],
+        "historical": [f'"{name}" founded established history', f'"{name}" founder origin story']
+    }
+    search_queries = queries.get(timeframe, [])
+    all_results, total_queries = [], len(search_queries)
     for query_idx, search_query in enumerate(search_queries):
         if len(all_results) >= max_articles or check_cancellation(): break
         if progress:
+            progress((query_idx / total_queries) * 0.3, desc=f"Searching {timeframe} ({query_idx+1}/{total_queries})")
+        try:
+            with DDGS(timeout=15) as ddgs:
+                # Use safe '.text()' which is designed to handle errors gracefully
+                results = ddgs.text(keywords=search_query, max_results=max_articles - len(all_results) + 2, safesearch='moderate', region='us-en')
+                # Defensively check if results is a list-like object
+                if results:
+                    existing_urls = {r.get('href', '') for r in all_results}
+                    for result in results:
+                        # Ensure result is a dictionary and has the 'href' key
+                        if isinstance(result, dict) and result.get('href') and result.get('href') not in existing_urls:
+                            all_results.append(result)
                             if len(all_results) >= max_articles: break
+        except Exception as e:
+            print(f"DDGS Search failed for '{search_query}': {e}")
+    return all_results
 def categorize_article_by_date(pub_date):
     if not pub_date: return "unknown"
     return "recent" if pub_date >= (datetime.now() - timedelta(days=365)) else "historical"
 def fetch_article_parallel(result, article_num, total_articles, progress=None):
+    if check_cancellation() or not isinstance(result, dict): return None
+    url = result.get('href', 'No URL')
+    title = result.get('title', 'No Title')
     if progress:
+        progress(0.4 + (article_num / total_articles) * 0.5, desc=f"Fetching {article_num+1}/{total_articles}: {title[:50]}...")
+    full_text, pub_date = get_full_article_with_timeout(url)
     if check_cancellation(): return None
     actual_timeframe = categorize_article_by_date(pub_date)
+    content = f"[SNIPPET ONLY]\n{result.get('body', 'No snippet')}" if "[ERROR]" in str(full_text) or "[WARNING]" in str(full_text) else full_text
     timeframe_indicator = f"📅 **Published**: {pub_date.strftime('%B %d, %Y')} ({actual_timeframe.title()})" if pub_date else f"📅 **Timeframe**: {result.get('expected_timeframe', 'unknown').title()} (estimated)"
+    article = f"### {article_num+1}. {title}\n[Source]({url})\n{timeframe_indicator}\n\n{content}\n"
+    return {'article': article, 'timeframe': actual_timeframe, 'url': url}
+def search_articles_enhanced(name: str, max_articles: int, progress=None) -> str:
     reset_cancellation()
     if progress: progress(0, desc="Initializing...")
     recent_count, historical_count = max_articles // 2, max_articles - (max_articles // 2)
+    recent_results = search_articles_by_timeframe_enhanced(name, "recent", recent_count, progress)
+    if check_cancellation(): return "[CANCELLED]"
+    time.sleep(1) # Brief pause
+    historical_results = search_articles_by_timeframe_enhanced(name, "historical", historical_count, progress)
+    if check_cancellation(): return "[CANCELLED]"
+    all_source_results = []
+    for r in recent_results: r['expected_timeframe'] = 'recent'; all_source_results.append(r)
+    for r in historical_results: r['expected_timeframe'] = 'historical'; all_source_results.append(r)
+    if not all_source_results:
         return f"[INFO] No articles found for '{name}'."
+    if progress: progress(0.4, desc=f"Found {len(all_source_results)} articles, fetching content...")
     articles, recent_found, historical_found = [], 0, 0
+    with ThreadPoolExecutor(max_workers=min(3, len(all_source_results))) as executor:
+        future_to_article = {executor.submit(fetch_article_parallel, r, i, len(all_source_results), progress): r for i, r in enumerate(all_source_results)}
         for future in as_completed(future_to_article):
+            if check_cancellation(): return "[CANCELLED]"
             try:
                 result_data = future.result(timeout=20)
                 if result_data:
                     articles.append(result_data)
+                    if result_data.get('timeframe') == "recent": recent_found += 1
+                    elif result_data.get('timeframe') == "historical": historical_found += 1
             except Exception as e:
+                print(f"Error processing article future: {e}")
+    if not articles:
+        return f"[ERROR] Could not fetch content for any of the {len(all_source_results)} articles found."
+    if progress: progress(0.95, desc="Formatting results...")
+    url_to_index = {res.get('href'): i for i, res in enumerate(all_source_results)}
     articles.sort(key=lambda x: url_to_index.get(x.get('url'), 999))
+    summary = f"**Search Summary**: Found content for {len(articles)} articles ({recent_found} recent, {historical_found} historical)\n\n"
+    article_texts = [a.get('article', '[Content Missing]') for a in articles]
     return summary + "\n---\n".join(article_texts)
 def extract_entities_enhanced(search_results: str, company_name: str, progress=None) -> str:
+    """BULLETPROOF VERSION: This function is now hardened against the IndexError."""
+    if progress: progress(0, desc="Preparing text...")
     MAX_CHARS = 15000
     if len(search_results) > MAX_CHARS:
+        search_results = search_results[:search_results.rfind('.', 0, MAX_CHARS) + 1]
+    prompt = f"""Extract all named entities that are described as founders of "{company_name}" specifically from the following text.
+Return a JSON object with the following structure: {{"founders": [{{"name": "Founder Name", "evidence": ["brief quote"]}}]}}
+Respond only with valid JSON.
+Text:
+{search_results}"""
     try:
+        if progress: progress(0.5, desc="Analyzing with AI...")
+        message = client.messages.create(
+            model="claude-sonnet-4-20250514",
+            max_tokens=1500, temperature=0.1,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        # FIX: THE DEFINITIVE FIX FOR THE 'list index out of range' ERROR
+        # Check if the response is valid and has content before accessing it.
+        if message and isinstance(message.content, list) and len(message.content) > 0:
+            # Safely access the first text block
+            first_block = message.content[0]
+            if hasattr(first_block, 'text'):
+                result = first_block.text
+                if progress: progress(1.0, desc="Analysis completed!")
+                return result
+        # If the checks above fail, we land here.
+        if progress: progress(1.0, desc="Analysis failed.")
+        print(f"API Error: Received invalid response from Anthropic. Response: {message}")
+        return json.dumps({"error": "API returned an invalid or empty response.", "details": "The model may have refused to answer due to content filters or an internal error."})
     except Exception as e:
+        if progress: progress(1.0, desc="Analysis failed.")
+        print(f"Extraction Exception: {e}")
+        return json.dumps({"error": "An exception occurred during AI extraction.", "details": str(e)})
+# === Gradio Interface Functions and UI ===
+# This section has been simplified to use the more robust functions above.
+def search_and_extract_flow(name: str, article_count: int, progress=gr.Progress()):
+    """A single, robust function for the entire search and extract workflow."""
+    if not name or not name.strip():
+        return "❌ Please provide a company name.", "", "Ready."
     try:
+        # Step 1: Search for articles
+        start_time = time.time()
+        articles_output = search_articles_enhanced(name.strip(), int(article_count), progress=progress)
+        if "[CANCELLED]" in articles_output:
+            return "🛑 Search cancelled.", "", "Cancelled."
+        if "[ERROR]" in articles_output or "[INFO]" in articles_output:
+            return articles_output, "", "Search completed with info/error."
+        search_results_md = f"✅ **Search** completed in {time.time() - start_time:.1f}s\n\n{articles_output}"
+        # Step 2: Extract entities from the results
+        progress(0, desc="Starting extraction...")
+        extraction_start_time = time.time()
+        entities_json = extract_entities_enhanced(articles_output, name.strip(), progress)
+        # Step 3: Format the JSON for display
         try:
+            parsed = json.loads(entities_json)
             formatted = json.dumps(parsed, indent=2)
+            extraction_results_md = f"✅ **Extraction** completed in {time.time() - extraction_start_time:.1f}s\n\n```json\n{formatted}\n```"
         except (json.JSONDecodeError, TypeError):
+            extraction_results_md = f"⚠️ **Extraction Warning**: Model did not return valid JSON.\n\n{entities_json}"
+        status = f"Completed in {time.time() - start_time:.1f}s"
+        return search_results_md, extraction_results_md, status
     except Exception as e:
+        # This is a final catch-all for any truly unexpected errors.
+        error_message = f"❌ **An unexpected error occurred**: {e}"
+        print(error_message) # Log to console for debugging
+        return error_message, "", "Failed."
+def cancel_flow():
     cancel_operation.set()
     return "🛑 Cancellation requested..."
 with gr.Blocks(title="Enhanced Founder Finder", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🔎 Enhanced Founder Finder")
+    gr.Markdown("Enter a business name to find its founders. The process involves searching for articles and then using AI to extract founder names.")
     with gr.Row():
         with gr.Column(scale=2): name_input = gr.Textbox(label="Company Name", placeholder="e.g., 'Tesla', 'SpaceX'")
+        with gr.Column(scale=1): article_count_slider = gr.Slider(2, 10, value=4, step=2, label="Articles to Search")
     with gr.Row():
+        run_btn = gr.Button("🔍 Find Founders", variant="primary")
         cancel_btn = gr.Button("🛑 Cancel", variant="stop")
+    status_output = gr.Markdown("Ready...")
+    with gr.Tab("Founder Intelligence Report"):
+        output_extract = gr.Markdown(label="Extracted Founder Information")
+    with gr.Tab("Raw Search Results"):
+        output_search = gr.Markdown(label="Article Search & Temporal Analysis")
+    run_event = run_btn.click(
+        fn=search_and_extract_flow,
+        inputs=[name_input, article_count_slider],
+        outputs=[output_search, output_extract, status_output]
+    )
+    cancel_btn.click(fn=cancel_flow, inputs=None, outputs=status_output, cancels=[run_event])
+    gr.Examples(
+        examples=[["OpenAI", 4], ["SpaceX", 6], ["Microsoft", 4], ["Anthropic", 4]],
+        inputs=[name_input, article_count_slider],
+    )
 demo.queue()