import gradio as gr import requests import re from duckduckgo_search import DDGS import anthropic import os import json # Initialize clients anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) # === 1. Simplified Search Workflow === def search_workflow(name: str, api_key: str, progress=gr.Progress()): """ A simple function to search for articles, fetching exactly 8 news articles: 4 recent, 4 historical. """ if not name or not name.strip(): return "❌ Please enter a company name.", "" if not api_key or not api_key.strip(): return "❌ Please enter your Anthropic API key.", "" progress(0, desc="Starting search...") # Define search queries recent_keywords = f'"{name}" founder news' historical_keywords = f'"{name}" founder history origin' all_articles_markdown = [] raw_text_for_ai = "" try: with DDGS(timeout=20) as ddgs: # --- Fetch 4 Recent Articles (past year) --- progress(0.1, desc="Searching for recent articles...") # The 'timelimit="y"' parameter is a reliable way to get recent results. recent_results = ddgs.text(keywords=recent_keywords, max_results=4, timelimit='y') or [] for i, res in enumerate(recent_results): title = res.get('title', 'No Title') url = res.get('href', '#') body = res.get('body', 'No snippet available.') # Format for display markdown = f"### (Recent) {title}\n**Source**: [{url}]({url})\n\n{body}\n" all_articles_markdown.append(markdown) # Format for AI raw_text_for_ai += f"Article (Recent):\nTitle: {title}\nContent: {body}\n\n" # --- Fetch 4 Historical Articles --- progress(0.5, desc="Searching for historical articles...") historical_results = ddgs.text(keywords=historical_keywords, max_results=4) or [] for i, res in enumerate(historical_results): title = res.get('title', 'No Title') url = res.get('href', '#') body = res.get('body', 'No snippet available.') # Format for display markdown = f"### (Historical) {title}\n**Source**: [{url}]({url})\n\n{body}\n" all_articles_markdown.append(markdown) # Format for AI raw_text_for_ai += f"Article (Historical):\nTitle: {title}\nContent: {body}\n\n" except Exception as e: return f"❌ An error occurred during search: {e}", "" if not all_articles_markdown: return "[INFO] No articles found for that company.", "" progress(1.0, desc="Search complete!") final_markdown = f"## Found {len(all_articles_markdown)} Articles\n\n" + "\n---\n".join(all_articles_markdown) return final_markdown, raw_text_for_ai # === 2. Simplified Extraction Workflow === def extraction_workflow(raw_text: str, company_name: str, api_key: str, progress=gr.Progress()): """ A simple and robust tool to extract founders from text using the AI model. """ if not raw_text or not raw_text.strip(): return "❌ Please run a search first to get text to analyze." if not api_key or not api_key.strip(): return "❌ Please enter your Anthropic API key." progress(0, desc="Preparing prompt for AI...") prompt = f"""From the provided article snippets about "{company_name}", extract the names of individuals explicitly identified as a founder. Return a single, valid JSON object with the structure: {{"founders": [{{"name": "Founder's Name", "evidence": "A brief quote or context."}}]}} If no founders are mentioned, return an empty list: {{"founders": []}}. Do not add any text outside the JSON object. ARTICLES: --- {raw_text[:20000]} --- """ try: progress(0.5, desc="Sending request to AI model...") # Create client with user's API key client = anthropic.Anthropic(api_key=api_key) message = client.messages.create( model="claude-sonnet-4-20250514", # As requested max_tokens=1024, temperature=0.0, messages=[{"role": "user", "content": prompt}] ) # This robust check prevents the 'list index out of range' error. if message and message.content and isinstance(message.content, list) and len(message.content) > 0: text_block = message.content[0] if hasattr(text_block, 'text'): json_text = text_block.text # Clean the response to find the JSON object match = re.search(r'\{.*\}', json_text, re.DOTALL) if match: clean_json = match.group(0) try: parsed_json = json.loads(clean_json) formatted_json = json.dumps(parsed_json, indent=2) progress(1.0, desc="Extraction complete!") return f"```json\n{formatted_json}\n```" except json.JSONDecodeError: return f"⚠️ **AI Warning**: The model returned malformed JSON.\n\n{clean_json}" else: return f"⚠️ **AI Warning**: The model did not return a JSON object.\n\n{json_text}" return "❌ **API Error**: The AI model returned an empty or invalid response." except Exception as e: return f"❌ **An unexpected error occurred during extraction**: {e}" # === 3. Simplified Gradio UI === with gr.Blocks(title="Founder Name Extraction Tool", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🔎 Founder Name Extraction") gr.Markdown("A tool to find the names of company founders. **Step 1:** Enter your API key and company name. **Step 2:** Search for articles. **Step 3:** Extract founders' names from the results.") # Hidden state to pass text from search to extraction search_results_for_ai = gr.State("") with gr.Row(): api_key_input = gr.Textbox( label="Anthropic API Key", placeholder="sk-ant-...", type="password", scale=2 ) name_input = gr.Textbox( label="Company Name", placeholder="e.g., 'OpenAI', 'SpaceX'", scale=2 ) search_btn = gr.Button("2. 🔍 Search for Articles (Uses DuckDuckGo)", variant="primary", scale=1) with gr.Row(): extract_btn = gr.Button("3. 📊 Extract Founders from Search Results", variant="secondary") # Display both sections without tabs gr.Markdown("### Search Results") output_search = gr.Markdown() gr.Markdown("### Founder Intelligence Report") output_extract = gr.Markdown(value="*Waiting for extraction...*") # --- Event Wiring --- # Search button populates the search results and the hidden state search_btn.click( fn=search_workflow, inputs=[name_input, api_key_input], # Added api_key_input outputs=[output_search, search_results_for_ai], show_progress="full" ) # Extract button uses the hidden state to populate the extraction extract_btn.click( fn=extraction_workflow, inputs=[search_results_for_ai, name_input, api_key_input], # Added api_key_input outputs=[output_extract], show_progress="full" ) demo.queue() if __name__ == "__main__": demo.launch(show_error=True)