Founder_Name_Extraction_v3

Sleeping

App Files Files Community

dygoo commited on Jun 6

Commit

b8a3699

verified ·

1 Parent(s): 63b8e63

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -40

app.py CHANGED Viewed

@@ -3,26 +3,34 @@ import requests
 import time
 from duckduckgo_search import DDGS
-# --- Model Functions ---
 def search_articles(name: str) -> str:
     keywords = ['founders', 'partners', 'funders', 'owners']
     search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(search_query, max_results=3))
         if not results:
             return f"No articles found for {name}"
         articles = []
         for i, result in enumerate(results, 1):
-            article = f"**{i}. {result['title']}**\n"
-            article += f"Source: {result['href']}\n"
-            article += f"{result['body']}\n"
             articles.append(article)
         return "\n\n".join(articles)
     except Exception as e:
-        return f"Search failed: {str(e)}"
 def extract_entities(search_results: str) -> str:
     modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
     prompt = f"""Extract all person names and organization names from the following text.
 Format as:
@@ -32,17 +40,18 @@ Text: {search_results}"""
     try:
         response = requests.post(
             modal_endpoint,
-            json={"prompt": prompt, "max_tokens": 500, "temperature": 0.1},
-            timeout=180  # Increased timeout
         )
         if response.status_code == 200:
             return response.json().get("response", "No entities extracted")
         else:
-            return f"API Error: {response.status_code}"
     except Exception as e:
-        return f"Extraction failed: {str(e)}"
 def find_full_names(search_results: str, entities: str) -> str:
     modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
     prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
 Entities: {entities}
@@ -51,64 +60,130 @@ Provide full names with their roles/titles where mentioned."""
     try:
         response = requests.post(
             modal_endpoint,
-            json={"prompt": prompt, "max_tokens": 300, "temperature": 0.1},
-            timeout=180  # Increased timeout
         )
         if response.status_code == 200:
             return response.json().get("response", "No full names found")
         else:
-            return f"API Error: {response.status_code}"
     except Exception as e:
-        return f"Full name extraction failed: {str(e)}"
-# --- Pipeline Function with Progress & Status ---
-def process_name_with_progress(name: str, progress=gr.Progress(track_tqdm=True)):
     if not name.strip():
-        return "", "", "", "Please enter a name."
     try:
-        progress(0.1)
-        yield "", "", "", "🔍 Searching for articles..."
-        search_results = search_articles(name.strip())
-        progress(0.4)
-        yield search_results, "", "", "📄 Extracting entities from articles..."
-        entities = extract_entities(search_results)
-        progress(0.7)
-        yield search_results, entities, "", "🧠 Finding full names and roles..."
-        full_names = find_full_names(search_results, entities)
-        progress(1.0)
-        yield search_results, entities, full_names, "✅ Complete!"
     except Exception as e:
-        err = f"❌ Error: {str(e)}"
-        yield search_results if 'search_results' in locals() else "", \
-              entities if 'entities' in locals() else "", \
-              full_names if 'full_names' in locals() else "", \
-              err
-# --- Gradio Interface ---
 with gr.Blocks(title="Name Research Tool") as demo:
-    gr.Markdown("# Name Research Tool")
     gr.Markdown("Enter a business or project name to search for related articles and extract key entities.")
     with gr.Row():
         name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
-        search_btn = gr.Button("Search", variant="primary")
     with gr.Column():
-        output1 = gr.Textbox(label="Search Results", lines=10, max_lines=20)
         output2 = gr.Textbox(label="Extracted Entities", lines=5, max_lines=10)
         output3 = gr.Textbox(label="Full Names", lines=5, max_lines=10)
-        status_output = gr.Textbox(label="Status / Progress", lines=1, interactive=False)
-    # Search with progress
     search_btn.click(
         fn=process_name_with_progress,
         inputs=[name_input],
-        outputs=[output1, output2, output3, status_output]
     )
 if __name__ == "__main__":

 import time
 from duckduckgo_search import DDGS
+# === Model functions ===
 def search_articles(name: str) -> str:
+    """Search for 3 newspaper articles containing the name and keywords using DuckDuckGo"""
     keywords = ['founders', 'partners', 'funders', 'owners']
     search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
     try:
+        print(f"[DEBUG] Search query: {search_query}")
         with DDGS() as ddgs:
             results = list(ddgs.text(search_query, max_results=3))
+            print(f"[DEBUG] Raw results: {results}")
         if not results:
             return f"No articles found for {name}"
         articles = []
         for i, result in enumerate(results, 1):
+            article = f"**{i}. {result.get('title', 'No Title')}**\n"
+            article += f"Source: {result.get('href', 'No URL')}\n"
+            article += f"{result.get('body', 'No Body')}\n"
             articles.append(article)
         return "\n\n".join(articles)
     except Exception as e:
+        return f"[ERROR] Search failed: {str(e)}"
 def extract_entities(search_results: str) -> str:
+    """Extract entities using Mistral 7B endpoint"""
     modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
     prompt = f"""Extract all person names and organization names from the following text.
 Format as:
     try:
         response = requests.post(
             modal_endpoint,
+            json={"prompt": prompt, "max_tokens": 500, "temperature": 0.1}
         )
         if response.status_code == 200:
             return response.json().get("response", "No entities extracted")
         else:
+            return f"[ERROR] API Error: {response.status_code}"
     except Exception as e:
+        return f"[ERROR] Extraction failed: {str(e)}"
 def find_full_names(search_results: str, entities: str) -> str:
+    """Find full names using Mistral 7B endpoint"""
     modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
     prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
 Entities: {entities}
     try:
         response = requests.post(
             modal_endpoint,
+            json={"prompt": prompt, "max_tokens": 300, "temperature": 0.1}
         )
         if response.status_code == 200:
             return response.json().get("response", "No full names found")
         else:
+            return f"[ERROR] API Error: {response.status_code}"
     except Exception as e:
+        return f"[ERROR] Full name extraction failed: {str(e)}"
+# === Gradio interface ===
+def process_name_with_progress(name: str, progress=gr.Progress()):
+    """Process name with streamed debug updates to help diagnose issues"""
     if not name.strip():
+        yield "No name provided", "", ""
+        return
+    search_results = ""
+    entities = ""
+    full_names = ""
     try:
+        # Step 1: Search
+        progress(0.1, desc="Searching for articles...")
+        search_results += f"[DEBUG] Starting search for: {name}\n"
+        yield search_results, "", ""
+        search_start = time.time()
+        articles_output = search_articles(name.strip())
+        search_time = time.time() - search_start
+        search_results += f"[DEBUG] Search completed in {search_time:.2f}s\n"
+        search_results += f"{articles_output}\n"
+        yield search_results, "", ""
+        # Step 2: Extract entities
+        progress(0.5, desc="Extracting entities...")
+        search_results += "[DEBUG] Starting entity extraction...\n"
+        yield search_results, "[DEBUG] Extracting entities...", ""
+        extract_start = time.time()
+        entities = extract_entities(articles_output)
+        extract_time = time.time() - extract_start
+        search_results += f"[DEBUG] Entity extraction completed in {extract_time:.2f}s\n"
+        yield search_results, entities, ""
+        # Step 3: Full names
+        progress(0.8, desc="Finding full names...")
+        search_results += "[DEBUG] Starting full name resolution...\n"
+        yield search_results, entities, "[DEBUG] Resolving full names..."
+        names_start = time.time()
+        full_names = find_full_names(articles_output, entities)
+        names_time = time.time() - names_start
+        search_results += f"[DEBUG] Full name extraction completed in {names_time:.2f}s\n"
+        progress(1.0, desc="Complete!")
+        yield search_results, entities, full_names
     except Exception as e:
+        error_msg = f"[ERROR] {str(e)}"
+        yield search_results + error_msg, entities or error_msg, full_names or error_msg
+def process_name_simple(name: str):
+    """Basic version without progress bar - for isolated testing"""
+    if not name.strip():
+        return "", "", ""
+    print(f"Starting process for: {name}")
+    total_start = time.time()
+    print("Step 1: Searching articles...")
+    search_start = time.time()
+    search_results = search_articles(name.strip())
+    search_time = time.time() - search_start
+    print(f"Search completed in: {search_time:.2f}s")
+    print("Step 2: Extracting entities...")
+    extract_start = time.time()
+    entities = extract_entities(search_results)
+    extract_time = time.time() - extract_start
+    print(f"Entity extraction in: {extract_time:.2f}s")
+    print("Step 3: Finding full names...")
+    names_start = time.time()
+    full_names = find_full_names(search_results, entities)
+    names_time = time.time() - names_start
+    print(f"Full name resolution in: {names_time:.2f}s")
+    total_time = time.time() - total_start
+    print(f"Total time: {total_time:.2f}s")
+    return search_results, entities, full_names
+# === Gradio UI ===
 with gr.Blocks(title="Name Research Tool") as demo:
+    gr.Markdown("# 🔎 Name Research Tool")
     gr.Markdown("Enter a business or project name to search for related articles and extract key entities.")
     with gr.Row():
         name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
+        with gr.Column():
+            search_btn = gr.Button("Search (Real-time)", variant="primary")
+            debug_btn = gr.Button("Search (Debug Mode)", variant="secondary")
     with gr.Column():
+        output1 = gr.Textbox(label="Search Results (with debug)", lines=10, max_lines=30)
         output2 = gr.Textbox(label="Extracted Entities", lines=5, max_lines=10)
         output3 = gr.Textbox(label="Full Names", lines=5, max_lines=10)
     search_btn.click(
         fn=process_name_with_progress,
         inputs=[name_input],
+        outputs=[output1, output2, output3]
+    )
+    debug_btn.click(
+        fn=process_name_simple,
+        inputs=[name_input],
+        outputs=[output1, output2, output3]
     )
 if __name__ == "__main__":