Spaces:
Sleeping
Sleeping
File size: 7,698 Bytes
cdb081c a34296b b3950a6 3238b9e cdb081c 725cd97 cdb081c 725cd97 39fcc34 5aafe64 ddd51b5 5aafe64 cdb081c 39fcc34 3238b9e cdb081c 5aafe64 cdb081c 5aafe64 41dc56e cdb081c 054936e cdb081c 054936e cdb081c 3238b9e cdb081c 725cd97 cdb081c 41dc56e cdb081c 3238b9e 39fcc34 cdb081c ddd51b5 cdb081c 5aafe64 cdb081c 39fcc34 cdb081c 054936e cdb081c 5aafe64 3238b9e cdb081c 39fcc34 5aafe64 cdb081c 5aafe64 cdb081c 5aafe64 cdb081c 41dc56e 3238b9e 5aafe64 3238b9e cdb081c 41dc56e 7c6b357 94df9d2 41dc56e cdb081c 5aafe64 1a9c6ca 725cd97 39fcc34 94df9d2 41dc56e cdb081c 39fcc34 cdb081c 1a9c6ca 39fcc34 1a9c6ca cdb081c 5aafe64 39fcc34 cdb081c 41dc56e 5aafe64 1a9c6ca cdb081c 5aafe64 39fcc34 cdb081c 5aafe64 8230bce 196bf92 3238b9e 054936e 196bf92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
import gradio as gr
import requests
import re
from duckduckgo_search import DDGS
import anthropic
import os
import json
# Initialize clients
anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
# === 1. Simplified Search Workflow ===
def search_workflow(name: str, api_key: str, progress=gr.Progress()):
"""
A simple function to search for articles, fetching exactly 8 news articles: 4 recent, 4 historical.
"""
if not name or not name.strip():
return "β Please enter a company name.", ""
if not api_key or not api_key.strip():
return "β Please enter your Anthropic API key.", ""
progress(0, desc="Starting search...")
# Define search queries
recent_keywords = f'"{name}" founder news'
historical_keywords = f'"{name}" founder history origin'
all_articles_markdown = []
raw_text_for_ai = ""
try:
with DDGS(timeout=20) as ddgs:
# --- Fetch 4 Recent Articles (past year) ---
progress(0.1, desc="Searching for recent articles...")
# The 'timelimit="y"' parameter is a reliable way to get recent results.
recent_results = ddgs.text(keywords=recent_keywords, max_results=4, timelimit='y') or []
for i, res in enumerate(recent_results):
title = res.get('title', 'No Title')
url = res.get('href', '#')
body = res.get('body', 'No snippet available.')
# Format for display
markdown = f"### (Recent) {title}\n**Source**: [{url}]({url})\n\n{body}\n"
all_articles_markdown.append(markdown)
# Format for AI
raw_text_for_ai += f"Article (Recent):\nTitle: {title}\nContent: {body}\n\n"
# --- Fetch 4 Historical Articles ---
progress(0.5, desc="Searching for historical articles...")
historical_results = ddgs.text(keywords=historical_keywords, max_results=4) or []
for i, res in enumerate(historical_results):
title = res.get('title', 'No Title')
url = res.get('href', '#')
body = res.get('body', 'No snippet available.')
# Format for display
markdown = f"### (Historical) {title}\n**Source**: [{url}]({url})\n\n{body}\n"
all_articles_markdown.append(markdown)
# Format for AI
raw_text_for_ai += f"Article (Historical):\nTitle: {title}\nContent: {body}\n\n"
except Exception as e:
return f"β An error occurred during search: {e}", ""
if not all_articles_markdown:
return "[INFO] No articles found for that company.", ""
progress(1.0, desc="Search complete!")
final_markdown = f"## Found {len(all_articles_markdown)} Articles\n\n" + "\n---\n".join(all_articles_markdown)
return final_markdown, raw_text_for_ai
# === 2. Simplified Extraction Workflow ===
def extraction_workflow(raw_text: str, company_name: str, api_key: str, progress=gr.Progress()):
"""
A simple and robust tool to extract founders from text using the AI model.
"""
if not raw_text or not raw_text.strip():
return "β Please run a search first to get text to analyze."
if not api_key or not api_key.strip():
return "β Please enter your Anthropic API key."
progress(0, desc="Preparing prompt for AI...")
prompt = f"""From the provided article snippets about "{company_name}", extract the names of individuals explicitly identified as a founder.
Return a single, valid JSON object with the structure: {{"founders": [{{"name": "Founder's Name", "evidence": "A brief quote or context."}}]}}
If no founders are mentioned, return an empty list: {{"founders": []}}.
Do not add any text outside the JSON object.
ARTICLES:
---
{raw_text[:20000]}
---
"""
try:
progress(0.5, desc="Sending request to AI model...")
# Create client with user's API key
client = anthropic.Anthropic(api_key=api_key)
message = client.messages.create(
model="claude-sonnet-4-20250514", # As requested
max_tokens=1024,
temperature=0.0,
messages=[{"role": "user", "content": prompt}]
)
# This robust check prevents the 'list index out of range' error.
if message and message.content and isinstance(message.content, list) and len(message.content) > 0:
text_block = message.content[0]
if hasattr(text_block, 'text'):
json_text = text_block.text
# Clean the response to find the JSON object
match = re.search(r'\{.*\}', json_text, re.DOTALL)
if match:
clean_json = match.group(0)
try:
parsed_json = json.loads(clean_json)
formatted_json = json.dumps(parsed_json, indent=2)
progress(1.0, desc="Extraction complete!")
return f"```json\n{formatted_json}\n```"
except json.JSONDecodeError:
return f"β οΈ **AI Warning**: The model returned malformed JSON.\n\n{clean_json}"
else:
return f"β οΈ **AI Warning**: The model did not return a JSON object.\n\n{json_text}"
return "β **API Error**: The AI model returned an empty or invalid response."
except Exception as e:
return f"β **An unexpected error occurred during extraction**: {e}"
# === 3. Simplified Gradio UI ===
with gr.Blocks(title="Founder Name Extraction Tool", theme=gr.themes.Soft()) as demo:
gr.Markdown("# π Founder Name Extraction")
gr.Markdown("A tool to find the names of company founders. **Step 1:** Enter your API key and company name. **Step 2:** Search for articles. **Step 3:** Extract founders' names from the results.")
# Hidden state to pass text from search to extraction
search_results_for_ai = gr.State("")
with gr.Row():
api_key_input = gr.Textbox(
label="Anthropic API Key",
placeholder="sk-ant-...",
type="password",
scale=2
)
name_input = gr.Textbox(
label="Company Name",
placeholder="e.g., 'OpenAI', 'SpaceX'",
scale=2
)
search_btn = gr.Button("2. π Search for Articles (Uses DuckDuckGo)", variant="primary", scale=1)
with gr.Row():
extract_btn = gr.Button("3. π Extract Founders from Search Results", variant="secondary")
# Display both sections without tabs
gr.Markdown("### Search Results")
output_search = gr.Markdown()
gr.Markdown("### Founder Intelligence Report")
output_extract = gr.Markdown(value="*Waiting for extraction...*")
# --- Event Wiring ---
# Search button populates the search results and the hidden state
search_btn.click(
fn=search_workflow,
inputs=[name_input, api_key_input], # Added api_key_input
outputs=[output_search, search_results_for_ai],
show_progress="full"
)
# Extract button uses the hidden state to populate the extraction
extract_btn.click(
fn=extraction_workflow,
inputs=[search_results_for_ai, name_input, api_key_input], # Added api_key_input
outputs=[output_extract],
show_progress="full"
)
demo.queue()
if __name__ == "__main__":
demo.launch(show_error=True)
|