Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
import re | |
from duckduckgo_search import DDGS | |
import anthropic | |
import os | |
import json | |
# Initialize clients | |
anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) | |
# === 1. Simplified Search Workflow === | |
def search_workflow(name: str, api_key: str, progress=gr.Progress()): | |
""" | |
A simple function to search for articles, fetching exactly 8 news articles: 4 recent, 4 historical. | |
""" | |
if not name or not name.strip(): | |
return "β Please enter a company name.", "" | |
if not api_key or not api_key.strip(): | |
return "β Please enter your Anthropic API key.", "" | |
progress(0, desc="Starting search...") | |
# Define search queries | |
recent_keywords = f'"{name}" founder news' | |
historical_keywords = f'"{name}" founder history origin' | |
all_articles_markdown = [] | |
raw_text_for_ai = "" | |
try: | |
with DDGS(timeout=20) as ddgs: | |
# --- Fetch 4 Recent Articles (past year) --- | |
progress(0.1, desc="Searching for recent articles...") | |
# The 'timelimit="y"' parameter is a reliable way to get recent results. | |
recent_results = ddgs.text(keywords=recent_keywords, max_results=4, timelimit='y') or [] | |
for i, res in enumerate(recent_results): | |
title = res.get('title', 'No Title') | |
url = res.get('href', '#') | |
body = res.get('body', 'No snippet available.') | |
# Format for display | |
markdown = f"### (Recent) {title}\n**Source**: [{url}]({url})\n\n{body}\n" | |
all_articles_markdown.append(markdown) | |
# Format for AI | |
raw_text_for_ai += f"Article (Recent):\nTitle: {title}\nContent: {body}\n\n" | |
# --- Fetch 4 Historical Articles --- | |
progress(0.5, desc="Searching for historical articles...") | |
historical_results = ddgs.text(keywords=historical_keywords, max_results=4) or [] | |
for i, res in enumerate(historical_results): | |
title = res.get('title', 'No Title') | |
url = res.get('href', '#') | |
body = res.get('body', 'No snippet available.') | |
# Format for display | |
markdown = f"### (Historical) {title}\n**Source**: [{url}]({url})\n\n{body}\n" | |
all_articles_markdown.append(markdown) | |
# Format for AI | |
raw_text_for_ai += f"Article (Historical):\nTitle: {title}\nContent: {body}\n\n" | |
except Exception as e: | |
return f"β An error occurred during search: {e}", "" | |
if not all_articles_markdown: | |
return "[INFO] No articles found for that company.", "" | |
progress(1.0, desc="Search complete!") | |
final_markdown = f"## Found {len(all_articles_markdown)} Articles\n\n" + "\n---\n".join(all_articles_markdown) | |
return final_markdown, raw_text_for_ai | |
# === 2. Simplified Extraction Workflow === | |
def extraction_workflow(raw_text: str, company_name: str, api_key: str, progress=gr.Progress()): | |
""" | |
A simple and robust tool to extract founders from text using the AI model. | |
""" | |
if not raw_text or not raw_text.strip(): | |
return "β Please run a search first to get text to analyze." | |
if not api_key or not api_key.strip(): | |
return "β Please enter your Anthropic API key." | |
progress(0, desc="Preparing prompt for AI...") | |
prompt = f"""From the provided article snippets about "{company_name}", extract the names of individuals explicitly identified as a founder. | |
Return a single, valid JSON object with the structure: {{"founders": [{{"name": "Founder's Name", "evidence": "A brief quote or context."}}]}} | |
If no founders are mentioned, return an empty list: {{"founders": []}}. | |
Do not add any text outside the JSON object. | |
ARTICLES: | |
--- | |
{raw_text[:20000]} | |
--- | |
""" | |
try: | |
progress(0.5, desc="Sending request to AI model...") | |
# Create client with user's API key | |
client = anthropic.Anthropic(api_key=api_key) | |
message = client.messages.create( | |
model="claude-sonnet-4-20250514", # As requested | |
max_tokens=1024, | |
temperature=0.0, | |
messages=[{"role": "user", "content": prompt}] | |
) | |
# This robust check prevents the 'list index out of range' error. | |
if message and message.content and isinstance(message.content, list) and len(message.content) > 0: | |
text_block = message.content[0] | |
if hasattr(text_block, 'text'): | |
json_text = text_block.text | |
# Clean the response to find the JSON object | |
match = re.search(r'\{.*\}', json_text, re.DOTALL) | |
if match: | |
clean_json = match.group(0) | |
try: | |
parsed_json = json.loads(clean_json) | |
formatted_json = json.dumps(parsed_json, indent=2) | |
progress(1.0, desc="Extraction complete!") | |
return f"```json\n{formatted_json}\n```" | |
except json.JSONDecodeError: | |
return f"β οΈ **AI Warning**: The model returned malformed JSON.\n\n{clean_json}" | |
else: | |
return f"β οΈ **AI Warning**: The model did not return a JSON object.\n\n{json_text}" | |
return "β **API Error**: The AI model returned an empty or invalid response." | |
except Exception as e: | |
return f"β **An unexpected error occurred during extraction**: {e}" | |
# === 3. Simplified Gradio UI === | |
with gr.Blocks(title="Founder Name Extraction Tool", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# π Founder Name Extraction") | |
gr.Markdown("A tool to find the names of company founders. **Step 1:** Enter your API key and company name. **Step 2:** Search for articles. **Step 3:** Extract founders' names from the results.") | |
# Hidden state to pass text from search to extraction | |
search_results_for_ai = gr.State("") | |
with gr.Row(): | |
api_key_input = gr.Textbox( | |
label="Anthropic API Key", | |
placeholder="sk-ant-...", | |
type="password", | |
scale=2 | |
) | |
name_input = gr.Textbox( | |
label="Company Name", | |
placeholder="e.g., 'OpenAI', 'SpaceX'", | |
scale=2 | |
) | |
search_btn = gr.Button("2. π Search for Articles (Uses DuckDuckGo)", variant="primary", scale=1) | |
with gr.Row(): | |
extract_btn = gr.Button("3. π Extract Founders from Search Results", variant="secondary") | |
# Display both sections without tabs | |
gr.Markdown("### Search Results") | |
output_search = gr.Markdown() | |
gr.Markdown("### Founder Intelligence Report") | |
output_extract = gr.Markdown(value="*Waiting for extraction...*") | |
# --- Event Wiring --- | |
# Search button populates the search results and the hidden state | |
search_btn.click( | |
fn=search_workflow, | |
inputs=[name_input, api_key_input], # Added api_key_input | |
outputs=[output_search, search_results_for_ai], | |
show_progress="full" | |
) | |
# Extract button uses the hidden state to populate the extraction | |
extract_btn.click( | |
fn=extraction_workflow, | |
inputs=[search_results_for_ai, name_input, api_key_input], # Added api_key_input | |
outputs=[output_extract], | |
show_progress="full" | |
) | |
demo.queue() | |
if __name__ == "__main__": | |
demo.launch(show_error=True) | |