Spaces:
Sleeping
Sleeping
File size: 6,940 Bytes
065339b b76c831 065339b b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 b76c831 b8a3699 065339b b8a3699 2ab9f5f b76c831 b8a3699 2ab9f5f b8a3699 2ab9f5f b8a3699 b76c831 3c6afdf b8a3699 2ab9f5f 065339b b8a3699 d0bd726 2ab9f5f 065339b b8a3699 2ab9f5f d0bd726 b8a3699 d0bd726 2ab9f5f 065339b b76c831 065339b b8a3699 b76c831 065339b 2ab9f5f |
|
import gradio as gr
import requests
import time
from duckduckgo_search import DDGS
# === Model functions ===
def search_articles(name: str) -> str:
"""Search for 3 newspaper articles containing the name and keywords using DuckDuckGo"""
keywords = ['founders', 'partners', 'funders', 'owners']
search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
try:
print(f"[DEBUG] Search query: {search_query}")
with DDGS() as ddgs:
results = list(ddgs.text(search_query, max_results=3))
print(f"[DEBUG] Raw results: {results}")
if not results:
return f"No articles found for {name}"
articles = []
for i, result in enumerate(results, 1):
article = f"**{i}. {result.get('title', 'No Title')}**\n"
article += f"Source: {result.get('href', 'No URL')}\n"
article += f"{result.get('body', 'No Body')}\n"
articles.append(article)
return "\n\n".join(articles)
except Exception as e:
return f"[ERROR] Search failed: {str(e)}"
def extract_entities(search_results: str) -> str:
"""Extract entities using Mistral 7B endpoint"""
modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
prompt = f"""Extract all person names and organization names from the following text.
Format as:
PERSON: [name]
ORG: [organization name]
Text: {search_results}"""
try:
response = requests.post(
modal_endpoint,
json={"prompt": prompt, "max_tokens": 500, "temperature": 0.1}
)
if response.status_code == 200:
return response.json().get("response", "No entities extracted")
else:
return f"[ERROR] API Error: {response.status_code}"
except Exception as e:
return f"[ERROR] Extraction failed: {str(e)}"
def find_full_names(search_results: str, entities: str) -> str:
"""Find full names using Mistral 7B endpoint"""
modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
Entities: {entities}
Search Results: {search_results}
Provide full names with their roles/titles where mentioned."""
try:
response = requests.post(
modal_endpoint,
json={"prompt": prompt, "max_tokens": 300, "temperature": 0.1}
)
if response.status_code == 200:
return response.json().get("response", "No full names found")
else:
return f"[ERROR] API Error: {response.status_code}"
except Exception as e:
return f"[ERROR] Full name extraction failed: {str(e)}"
# === Gradio interface ===
def process_name_with_progress(name: str, progress=gr.Progress()):
"""Process name with streamed debug updates to help diagnose issues"""
if not name.strip():
yield "No name provided", "", ""
return
search_results = ""
entities = ""
full_names = ""
try:
# Step 1: Search
progress(0.1, desc="Searching for articles...")
search_results += f"[DEBUG] Starting search for: {name}\n"
yield search_results, "", ""
search_start = time.time()
articles_output = search_articles(name.strip())
search_time = time.time() - search_start
search_results += f"[DEBUG] Search completed in {search_time:.2f}s\n"
search_results += f"{articles_output}\n"
yield search_results, "", ""
# Step 2: Extract entities
progress(0.5, desc="Extracting entities...")
search_results += "[DEBUG] Starting entity extraction...\n"
yield search_results, "[DEBUG] Extracting entities...", ""
extract_start = time.time()
entities = extract_entities(articles_output)
extract_time = time.time() - extract_start
search_results += f"[DEBUG] Entity extraction completed in {extract_time:.2f}s\n"
yield search_results, entities, ""
# Step 3: Full names
progress(0.8, desc="Finding full names...")
search_results += "[DEBUG] Starting full name resolution...\n"
yield search_results, entities, "[DEBUG] Resolving full names..."
names_start = time.time()
full_names = find_full_names(articles_output, entities)
names_time = time.time() - names_start
search_results += f"[DEBUG] Full name extraction completed in {names_time:.2f}s\n"
progress(1.0, desc="Complete!")
yield search_results, entities, full_names
except Exception as e:
error_msg = f"[ERROR] {str(e)}"
yield search_results + error_msg, entities or error_msg, full_names or error_msg
def process_name_simple(name: str):
"""Basic version without progress bar - for isolated testing"""
if not name.strip():
return "", "", ""
print(f"Starting process for: {name}")
total_start = time.time()
print("Step 1: Searching articles...")
search_start = time.time()
search_results = search_articles(name.strip())
search_time = time.time() - search_start
print(f"Search completed in: {search_time:.2f}s")
print("Step 2: Extracting entities...")
extract_start = time.time()
entities = extract_entities(search_results)
extract_time = time.time() - extract_start
print(f"Entity extraction in: {extract_time:.2f}s")
print("Step 3: Finding full names...")
names_start = time.time()
full_names = find_full_names(search_results, entities)
names_time = time.time() - names_start
print(f"Full name resolution in: {names_time:.2f}s")
total_time = time.time() - total_start
print(f"Total time: {total_time:.2f}s")
return search_results, entities, full_names
# === Gradio UI ===
with gr.Blocks(title="Name Research Tool") as demo:
gr.Markdown("# 🔎 Name Research Tool")
gr.Markdown("Enter a business or project name to search for related articles and extract key entities.")
with gr.Row():
name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
with gr.Column():
search_btn = gr.Button("Search (Real-time)", variant="primary")
debug_btn = gr.Button("Search (Debug Mode)", variant="secondary")
with gr.Column():
output1 = gr.Textbox(label="Search Results (with debug)", lines=10, max_lines=30)
output2 = gr.Textbox(label="Extracted Entities", lines=5, max_lines=10)
output3 = gr.Textbox(label="Full Names", lines=5, max_lines=10)
search_btn.click(
fn=process_name_with_progress,
inputs=[name_input],
outputs=[output1, output2, output3]
)
debug_btn.click(
fn=process_name_simple,
inputs=[name_input],
outputs=[output1, output2, output3]
)
if __name__ == "__main__":
demo.launch()
|