dygoo's picture
Update app.py
94df9d2 verified
import gradio as gr
import requests
import re
from duckduckgo_search import DDGS
import anthropic
import os
import json
# Initialize clients
anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
# === 1. Simplified Search Workflow ===
def search_workflow(name: str, api_key: str, progress=gr.Progress()):
"""
A simple function to search for articles, fetching exactly 8 news articles: 4 recent, 4 historical.
"""
if not name or not name.strip():
return "❌ Please enter a company name.", ""
if not api_key or not api_key.strip():
return "❌ Please enter your Anthropic API key.", ""
progress(0, desc="Starting search...")
# Define search queries
recent_keywords = f'"{name}" founder news'
historical_keywords = f'"{name}" founder history origin'
all_articles_markdown = []
raw_text_for_ai = ""
try:
with DDGS(timeout=20) as ddgs:
# --- Fetch 4 Recent Articles (past year) ---
progress(0.1, desc="Searching for recent articles...")
# The 'timelimit="y"' parameter is a reliable way to get recent results.
recent_results = ddgs.text(keywords=recent_keywords, max_results=4, timelimit='y') or []
for i, res in enumerate(recent_results):
title = res.get('title', 'No Title')
url = res.get('href', '#')
body = res.get('body', 'No snippet available.')
# Format for display
markdown = f"### (Recent) {title}\n**Source**: [{url}]({url})\n\n{body}\n"
all_articles_markdown.append(markdown)
# Format for AI
raw_text_for_ai += f"Article (Recent):\nTitle: {title}\nContent: {body}\n\n"
# --- Fetch 4 Historical Articles ---
progress(0.5, desc="Searching for historical articles...")
historical_results = ddgs.text(keywords=historical_keywords, max_results=4) or []
for i, res in enumerate(historical_results):
title = res.get('title', 'No Title')
url = res.get('href', '#')
body = res.get('body', 'No snippet available.')
# Format for display
markdown = f"### (Historical) {title}\n**Source**: [{url}]({url})\n\n{body}\n"
all_articles_markdown.append(markdown)
# Format for AI
raw_text_for_ai += f"Article (Historical):\nTitle: {title}\nContent: {body}\n\n"
except Exception as e:
return f"❌ An error occurred during search: {e}", ""
if not all_articles_markdown:
return "[INFO] No articles found for that company.", ""
progress(1.0, desc="Search complete!")
final_markdown = f"## Found {len(all_articles_markdown)} Articles\n\n" + "\n---\n".join(all_articles_markdown)
return final_markdown, raw_text_for_ai
# === 2. Simplified Extraction Workflow ===
def extraction_workflow(raw_text: str, company_name: str, api_key: str, progress=gr.Progress()):
"""
A simple and robust tool to extract founders from text using the AI model.
"""
if not raw_text or not raw_text.strip():
return "❌ Please run a search first to get text to analyze."
if not api_key or not api_key.strip():
return "❌ Please enter your Anthropic API key."
progress(0, desc="Preparing prompt for AI...")
prompt = f"""From the provided article snippets about "{company_name}", extract the names of individuals explicitly identified as a founder.
Return a single, valid JSON object with the structure: {{"founders": [{{"name": "Founder's Name", "evidence": "A brief quote or context."}}]}}
If no founders are mentioned, return an empty list: {{"founders": []}}.
Do not add any text outside the JSON object.
ARTICLES:
---
{raw_text[:20000]}
---
"""
try:
progress(0.5, desc="Sending request to AI model...")
# Create client with user's API key
client = anthropic.Anthropic(api_key=api_key)
message = client.messages.create(
model="claude-sonnet-4-20250514", # As requested
max_tokens=1024,
temperature=0.0,
messages=[{"role": "user", "content": prompt}]
)
# This robust check prevents the 'list index out of range' error.
if message and message.content and isinstance(message.content, list) and len(message.content) > 0:
text_block = message.content[0]
if hasattr(text_block, 'text'):
json_text = text_block.text
# Clean the response to find the JSON object
match = re.search(r'\{.*\}', json_text, re.DOTALL)
if match:
clean_json = match.group(0)
try:
parsed_json = json.loads(clean_json)
formatted_json = json.dumps(parsed_json, indent=2)
progress(1.0, desc="Extraction complete!")
return f"```json\n{formatted_json}\n```"
except json.JSONDecodeError:
return f"⚠️ **AI Warning**: The model returned malformed JSON.\n\n{clean_json}"
else:
return f"⚠️ **AI Warning**: The model did not return a JSON object.\n\n{json_text}"
return "❌ **API Error**: The AI model returned an empty or invalid response."
except Exception as e:
return f"❌ **An unexpected error occurred during extraction**: {e}"
# === 3. Simplified Gradio UI ===
with gr.Blocks(title="Founder Name Extraction Tool", theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ”Ž Founder Name Extraction")
gr.Markdown("A tool to find the names of company founders. **Step 1:** Enter your API key and company name. **Step 2:** Search for articles. **Step 3:** Extract founders' names from the results.")
# Hidden state to pass text from search to extraction
search_results_for_ai = gr.State("")
with gr.Row():
api_key_input = gr.Textbox(
label="Anthropic API Key",
placeholder="sk-ant-...",
type="password",
scale=2
)
name_input = gr.Textbox(
label="Company Name",
placeholder="e.g., 'OpenAI', 'SpaceX'",
scale=2
)
search_btn = gr.Button("2. πŸ” Search for Articles (Uses DuckDuckGo)", variant="primary", scale=1)
with gr.Row():
extract_btn = gr.Button("3. πŸ“Š Extract Founders from Search Results", variant="secondary")
# Display both sections without tabs
gr.Markdown("### Search Results")
output_search = gr.Markdown()
gr.Markdown("### Founder Intelligence Report")
output_extract = gr.Markdown(value="*Waiting for extraction...*")
# --- Event Wiring ---
# Search button populates the search results and the hidden state
search_btn.click(
fn=search_workflow,
inputs=[name_input, api_key_input], # Added api_key_input
outputs=[output_search, search_results_for_ai],
show_progress="full"
)
# Extract button uses the hidden state to populate the extraction
extract_btn.click(
fn=extraction_workflow,
inputs=[search_results_for_ai, name_input, api_key_input], # Added api_key_input
outputs=[output_extract],
show_progress="full"
)
demo.queue()
if __name__ == "__main__":
demo.launch(show_error=True)