File size: 6,940 Bytes
065339b
b76c831
 
 
065339b
b8a3699
 
b76c831
b8a3699
b76c831
 
 
b8a3699
b76c831
 
b8a3699
b76c831
 
b8a3699
b76c831
 
b8a3699
 
 
b76c831
b8a3699
b76c831
 
b8a3699
 
b76c831
 
b8a3699
b76c831
 
 
 
 
 
 
 
 
b8a3699
b76c831
 
 
 
b8a3699
b76c831
b8a3699
 
b76c831
 
b8a3699
b76c831
 
 
 
 
 
 
 
b8a3699
b76c831
 
 
 
b8a3699
b76c831
b8a3699
 
 
 
b76c831
b8a3699
 
065339b
b8a3699
 
 
 
 
 
2ab9f5f
b76c831
b8a3699
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ab9f5f
b8a3699
 
 
 
 
2ab9f5f
b8a3699
 
 
 
 
 
 
 
 
 
 
 
b76c831
3c6afdf
b8a3699
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ab9f5f
065339b
b8a3699
d0bd726
2ab9f5f
065339b
 
b8a3699
 
 
2ab9f5f
d0bd726
b8a3699
d0bd726
 
2ab9f5f
065339b
b76c831
065339b
b8a3699
 
 
 
 
 
 
b76c831
065339b
 
2ab9f5f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import gradio as gr
import requests
import time
from duckduckgo_search import DDGS

# === Model functions ===

def search_articles(name: str) -> str:
    """Search for 3 newspaper articles containing the name and keywords using DuckDuckGo"""
    keywords = ['founders', 'partners', 'funders', 'owners']
    search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
    try:
        print(f"[DEBUG] Search query: {search_query}")
        with DDGS() as ddgs:
            results = list(ddgs.text(search_query, max_results=3))
            print(f"[DEBUG] Raw results: {results}")
        if not results:
            return f"No articles found for {name}"

        articles = []
        for i, result in enumerate(results, 1):
            article = f"**{i}. {result.get('title', 'No Title')}**\n"
            article += f"Source: {result.get('href', 'No URL')}\n"
            article += f"{result.get('body', 'No Body')}\n"
            articles.append(article)

        return "\n\n".join(articles)
    except Exception as e:
        return f"[ERROR] Search failed: {str(e)}"


def extract_entities(search_results: str) -> str:
    """Extract entities using Mistral 7B endpoint"""
    modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
    prompt = f"""Extract all person names and organization names from the following text.
Format as:
PERSON: [name]
ORG: [organization name]
Text: {search_results}"""
    try:
        response = requests.post(
            modal_endpoint,
            json={"prompt": prompt, "max_tokens": 500, "temperature": 0.1}
        )
        if response.status_code == 200:
            return response.json().get("response", "No entities extracted")
        else:
            return f"[ERROR] API Error: {response.status_code}"
    except Exception as e:
        return f"[ERROR] Extraction failed: {str(e)}"


def find_full_names(search_results: str, entities: str) -> str:
    """Find full names using Mistral 7B endpoint"""
    modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
    prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
Entities: {entities}
Search Results: {search_results}
Provide full names with their roles/titles where mentioned."""
    try:
        response = requests.post(
            modal_endpoint,
            json={"prompt": prompt, "max_tokens": 300, "temperature": 0.1}
        )
        if response.status_code == 200:
            return response.json().get("response", "No full names found")
        else:
            return f"[ERROR] API Error: {response.status_code}"
    except Exception as e:
        return f"[ERROR] Full name extraction failed: {str(e)}"


# === Gradio interface ===

def process_name_with_progress(name: str, progress=gr.Progress()):
    """Process name with streamed debug updates to help diagnose issues"""
    if not name.strip():
        yield "No name provided", "", ""
        return

    search_results = ""
    entities = ""
    full_names = ""

    try:
        # Step 1: Search
        progress(0.1, desc="Searching for articles...")
        search_results += f"[DEBUG] Starting search for: {name}\n"
        yield search_results, "", ""

        search_start = time.time()
        articles_output = search_articles(name.strip())
        search_time = time.time() - search_start

        search_results += f"[DEBUG] Search completed in {search_time:.2f}s\n"
        search_results += f"{articles_output}\n"
        yield search_results, "", ""

        # Step 2: Extract entities
        progress(0.5, desc="Extracting entities...")
        search_results += "[DEBUG] Starting entity extraction...\n"
        yield search_results, "[DEBUG] Extracting entities...", ""

        extract_start = time.time()
        entities = extract_entities(articles_output)
        extract_time = time.time() - extract_start
        search_results += f"[DEBUG] Entity extraction completed in {extract_time:.2f}s\n"
        yield search_results, entities, ""

        # Step 3: Full names
        progress(0.8, desc="Finding full names...")
        search_results += "[DEBUG] Starting full name resolution...\n"
        yield search_results, entities, "[DEBUG] Resolving full names..."

        names_start = time.time()
        full_names = find_full_names(articles_output, entities)
        names_time = time.time() - names_start
        search_results += f"[DEBUG] Full name extraction completed in {names_time:.2f}s\n"

        progress(1.0, desc="Complete!")
        yield search_results, entities, full_names

    except Exception as e:
        error_msg = f"[ERROR] {str(e)}"
        yield search_results + error_msg, entities or error_msg, full_names or error_msg


def process_name_simple(name: str):
    """Basic version without progress bar - for isolated testing"""
    if not name.strip():
        return "", "", ""

    print(f"Starting process for: {name}")
    total_start = time.time()

    print("Step 1: Searching articles...")
    search_start = time.time()
    search_results = search_articles(name.strip())
    search_time = time.time() - search_start
    print(f"Search completed in: {search_time:.2f}s")

    print("Step 2: Extracting entities...")
    extract_start = time.time()
    entities = extract_entities(search_results)
    extract_time = time.time() - extract_start
    print(f"Entity extraction in: {extract_time:.2f}s")

    print("Step 3: Finding full names...")
    names_start = time.time()
    full_names = find_full_names(search_results, entities)
    names_time = time.time() - names_start
    print(f"Full name resolution in: {names_time:.2f}s")

    total_time = time.time() - total_start
    print(f"Total time: {total_time:.2f}s")

    return search_results, entities, full_names


# === Gradio UI ===

with gr.Blocks(title="Name Research Tool") as demo:
    gr.Markdown("# 🔎 Name Research Tool")
    gr.Markdown("Enter a business or project name to search for related articles and extract key entities.")

    with gr.Row():
        name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
        with gr.Column():
            search_btn = gr.Button("Search (Real-time)", variant="primary")
            debug_btn = gr.Button("Search (Debug Mode)", variant="secondary")

    with gr.Column():
        output1 = gr.Textbox(label="Search Results (with debug)", lines=10, max_lines=30)
        output2 = gr.Textbox(label="Extracted Entities", lines=5, max_lines=10)
        output3 = gr.Textbox(label="Full Names", lines=5, max_lines=10)

    search_btn.click(
        fn=process_name_with_progress,
        inputs=[name_input],
        outputs=[output1, output2, output3]
    )

    debug_btn.click(
        fn=process_name_simple,
        inputs=[name_input],
        outputs=[output1, output2, output3]
    )

if __name__ == "__main__":
    demo.launch()