File size: 7,698 Bytes
cdb081c
a34296b
 
 
b3950a6
 
3238b9e
 
cdb081c
 
725cd97
cdb081c
725cd97
39fcc34
5aafe64
ddd51b5
5aafe64
cdb081c
 
39fcc34
 
 
3238b9e
cdb081c
 
 
 
 
 
 
 
5aafe64
cdb081c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5aafe64
41dc56e
cdb081c
054936e
cdb081c
 
054936e
cdb081c
3238b9e
cdb081c
725cd97
cdb081c
41dc56e
 
cdb081c
3238b9e
39fcc34
cdb081c
ddd51b5
cdb081c
5aafe64
cdb081c
39fcc34
 
 
cdb081c
 
054936e
cdb081c
 
 
5aafe64
 
 
 
 
 
 
3238b9e
cdb081c
39fcc34
 
 
 
 
5aafe64
cdb081c
5aafe64
 
 
cdb081c
 
 
5aafe64
 
 
cdb081c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41dc56e
3238b9e
5aafe64
3238b9e
 
cdb081c
41dc56e
7c6b357
 
94df9d2
41dc56e
cdb081c
5aafe64
1a9c6ca
725cd97
39fcc34
 
 
 
 
 
 
 
 
 
 
94df9d2
41dc56e
cdb081c
39fcc34
cdb081c
1a9c6ca
 
 
 
 
39fcc34
1a9c6ca
 
 
cdb081c
5aafe64
39fcc34
cdb081c
 
41dc56e
5aafe64
1a9c6ca
cdb081c
5aafe64
39fcc34
cdb081c
 
5aafe64
8230bce
196bf92
 
3238b9e
054936e
196bf92
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import gradio as gr
import requests
import re
from duckduckgo_search import DDGS
import anthropic
import os
import json

# Initialize clients
anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

# === 1. Simplified Search Workflow ===

def search_workflow(name: str, api_key: str, progress=gr.Progress()):
    """
    A simple function to search for articles, fetching exactly 8 news articles: 4 recent, 4 historical.
    """
    if not name or not name.strip():
        return "❌ Please enter a company name.", ""
    
    if not api_key or not api_key.strip():
        return "❌ Please enter your Anthropic API key.", ""

    progress(0, desc="Starting search...")
    
    # Define search queries
    recent_keywords = f'"{name}" founder news'
    historical_keywords = f'"{name}" founder history origin'
    
    all_articles_markdown = []
    raw_text_for_ai = ""

    try:
        with DDGS(timeout=20) as ddgs:
            # --- Fetch 4 Recent Articles (past year) ---
            progress(0.1, desc="Searching for recent articles...")
            # The 'timelimit="y"' parameter is a reliable way to get recent results.
            recent_results = ddgs.text(keywords=recent_keywords, max_results=4, timelimit='y') or []
            
            for i, res in enumerate(recent_results):
                title = res.get('title', 'No Title')
                url = res.get('href', '#')
                body = res.get('body', 'No snippet available.')
                
                # Format for display
                markdown = f"### (Recent) {title}\n**Source**: [{url}]({url})\n\n{body}\n"
                all_articles_markdown.append(markdown)
                
                # Format for AI
                raw_text_for_ai += f"Article (Recent):\nTitle: {title}\nContent: {body}\n\n"

            # --- Fetch 4 Historical Articles ---
            progress(0.5, desc="Searching for historical articles...")
            historical_results = ddgs.text(keywords=historical_keywords, max_results=4) or []

            for i, res in enumerate(historical_results):
                title = res.get('title', 'No Title')
                url = res.get('href', '#')
                body = res.get('body', 'No snippet available.')
                
                # Format for display
                markdown = f"### (Historical) {title}\n**Source**: [{url}]({url})\n\n{body}\n"
                all_articles_markdown.append(markdown)
                
                # Format for AI
                raw_text_for_ai += f"Article (Historical):\nTitle: {title}\nContent: {body}\n\n"

    except Exception as e:
        return f"❌ An error occurred during search: {e}", ""

    if not all_articles_markdown:
        return "[INFO] No articles found for that company.", ""
    
    progress(1.0, desc="Search complete!")
    
    final_markdown = f"## Found {len(all_articles_markdown)} Articles\n\n" + "\n---\n".join(all_articles_markdown)
    
    return final_markdown, raw_text_for_ai


# === 2. Simplified Extraction Workflow ===

def extraction_workflow(raw_text: str, company_name: str, api_key: str, progress=gr.Progress()):
    """
    A simple and robust tool to extract founders from text using the AI model.
    """
    if not raw_text or not raw_text.strip():
        return "❌ Please run a search first to get text to analyze."
    
    if not api_key or not api_key.strip():
        return "❌ Please enter your Anthropic API key."

    progress(0, desc="Preparing prompt for AI...")
    
    prompt = f"""From the provided article snippets about "{company_name}", extract the names of individuals explicitly identified as a founder.
Return a single, valid JSON object with the structure: {{"founders": [{{"name": "Founder's Name", "evidence": "A brief quote or context."}}]}}
If no founders are mentioned, return an empty list: {{"founders": []}}.
Do not add any text outside the JSON object.

ARTICLES:
---
{raw_text[:20000]}
---
"""
    try:
        progress(0.5, desc="Sending request to AI model...")
        
        # Create client with user's API key
        client = anthropic.Anthropic(api_key=api_key)
        
        message = client.messages.create(
            model="claude-sonnet-4-20250514", # As requested
            max_tokens=1024,
            temperature=0.0,
            messages=[{"role": "user", "content": prompt}]
        )

        # This robust check prevents the 'list index out of range' error.
        if message and message.content and isinstance(message.content, list) and len(message.content) > 0:
            text_block = message.content[0]
            if hasattr(text_block, 'text'):
                json_text = text_block.text
                
                # Clean the response to find the JSON object
                match = re.search(r'\{.*\}', json_text, re.DOTALL)
                if match:
                    clean_json = match.group(0)
                    try:
                        parsed_json = json.loads(clean_json)
                        formatted_json = json.dumps(parsed_json, indent=2)
                        progress(1.0, desc="Extraction complete!")
                        return f"```json\n{formatted_json}\n```"
                    except json.JSONDecodeError:
                        return f"⚠️ **AI Warning**: The model returned malformed JSON.\n\n{clean_json}"
                else:
                    return f"⚠️ **AI Warning**: The model did not return a JSON object.\n\n{json_text}"
            
        return "❌ **API Error**: The AI model returned an empty or invalid response."

    except Exception as e:
        return f"❌ **An unexpected error occurred during extraction**: {e}"


# === 3. Simplified Gradio UI ===

with gr.Blocks(title="Founder Name Extraction Tool", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# πŸ”Ž Founder Name Extraction")
    gr.Markdown("A tool to find the names of company founders. **Step 1:** Enter your API key and company name. **Step 2:** Search for articles. **Step 3:** Extract founders' names from the results.")
    
    # Hidden state to pass text from search to extraction
    search_results_for_ai = gr.State("")
    
    with gr.Row():
        api_key_input = gr.Textbox(
            label="Anthropic API Key", 
            placeholder="sk-ant-...", 
            type="password",
            scale=2
        )
        name_input = gr.Textbox(
            label="Company Name", 
            placeholder="e.g., 'OpenAI', 'SpaceX'", 
            scale=2
        )
        search_btn = gr.Button("2. πŸ” Search for Articles (Uses DuckDuckGo)", variant="primary", scale=1)
    
    with gr.Row():
        extract_btn = gr.Button("3. πŸ“Š Extract Founders from Search Results", variant="secondary")
    
    # Display both sections without tabs
    gr.Markdown("### Search Results")
    output_search = gr.Markdown()
    
    gr.Markdown("### Founder Intelligence Report")
    output_extract = gr.Markdown(value="*Waiting for extraction...*")
    
    # --- Event Wiring ---
    # Search button populates the search results and the hidden state
    search_btn.click(
        fn=search_workflow,
        inputs=[name_input, api_key_input],  # Added api_key_input
        outputs=[output_search, search_results_for_ai],
        show_progress="full"
    )
    
    # Extract button uses the hidden state to populate the extraction
    extract_btn.click(
        fn=extraction_workflow,
        inputs=[search_results_for_ai, name_input, api_key_input],  # Added api_key_input
        outputs=[output_extract],
        show_progress="full"
    )

demo.queue()

if __name__ == "__main__":
    demo.launch(show_error=True)