dygoo commited on
Commit
b76c831
·
verified ·
1 Parent(s): 6bc66b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -6
app.py CHANGED
@@ -1,19 +1,163 @@
1
  import gradio as gr
2
- from model import search_articles, extract_entities, find_full_names
 
 
 
 
 
3
 
4
- def process_name(name: str):
5
- """Process name through search and entity extraction pipeline"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  if not name.strip():
7
  return "", "", ""
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Search for articles
 
 
10
  search_results = search_articles(name.strip())
 
 
11
 
12
  # Extract entities from search results
 
 
13
  entities = extract_entities(search_results)
 
 
14
 
15
  # Find full names of entities
 
 
16
  full_names = find_full_names(search_results, entities)
 
 
 
 
 
17
 
18
  return search_results, entities, full_names
19
 
@@ -24,20 +168,28 @@ with gr.Blocks(title="Name Research Tool") as demo:
24
 
25
  with gr.Row():
26
  name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
27
- search_btn = gr.Button("Search", variant="primary")
 
 
28
 
29
  with gr.Column():
30
  output1 = gr.Textbox(label="Search Results", lines=10, max_lines=20)
31
  output2 = gr.Textbox(label="Extracted Entities", lines=5, max_lines=10)
32
  output3 = gr.Textbox(label="Full Names", lines=5, max_lines=10)
33
 
 
34
  search_btn.click(
35
- fn=process_name,
36
  inputs=[name_input],
37
  outputs=[output1, output2, output3]
38
  )
39
 
40
- # TODO: Add CSV upload functionality here
 
 
 
 
 
41
 
42
  if __name__ == "__main__":
43
  demo.launch()
 
1
  import gradio as gr
2
+ import requests
3
+ import re
4
+ from typing import List, Dict
5
+ import os
6
+ import time
7
+ from duckduckgo_search import DDGS
8
 
9
+ # Model functions
10
+ def search_articles(name: str) -> str:
11
+ """Search for 3 newspaper articles containing the name and keywords using DuckDuckGo"""
12
+ keywords = ['founders', 'partners', 'funders', 'owners']
13
+ search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
14
+ try:
15
+ with DDGS() as ddgs:
16
+ results = list(ddgs.text(search_query, max_results=3))
17
+ if not results:
18
+ return f"No articles found for {name}"
19
+ articles = []
20
+ for i, result in enumerate(results, 1):
21
+ article = f"**{i}. {result['title']}**\n"
22
+ article += f"Source: {result['href']}\n"
23
+ article += f"{result['body']}\n"
24
+ articles.append(article)
25
+ return "\n\n".join(articles)
26
+ except Exception as e:
27
+ return f"Search failed: {str(e)}"
28
+
29
+ def extract_entities(search_results: str) -> str:
30
+ """Extract entities using Mistral 7B endpoint"""
31
+ modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
32
+
33
+ prompt = f"""Extract all person names and organization names from the following text.
34
+ Format as:
35
+ PERSON: [name]
36
+ ORG: [organization name]
37
+
38
+ Text: {search_results}"""
39
+
40
+ try:
41
+ response = requests.post(
42
+ modal_endpoint,
43
+ json={
44
+ "prompt": prompt,
45
+ "max_tokens": 500,
46
+ "temperature": 0.1
47
+ }
48
+ )
49
+ if response.status_code == 200:
50
+ return response.json().get("response", "No entities extracted")
51
+ else:
52
+ return f"API Error: {response.status_code}"
53
+ except Exception as e:
54
+ return f"Extraction failed: {str(e)}"
55
+
56
+ def find_full_names(search_results: str, entities: str) -> str:
57
+ """Find full names using Mistral 7B endpoint"""
58
+ modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
59
+
60
+ prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
61
+
62
+ Entities: {entities}
63
+
64
+ Search Results: {search_results}
65
+
66
+ Provide full names with their roles/titles where mentioned."""
67
+
68
+ try:
69
+ response = requests.post(
70
+ modal_endpoint,
71
+ json={
72
+ "prompt": prompt,
73
+ "max_tokens": 300,
74
+ "temperature": 0.1
75
+ }
76
+ )
77
+ if response.status_code == 200:
78
+ return response.json().get("response", "No full names found")
79
+ else:
80
+ return f"API Error: {response.status_code}"
81
+ except Exception as e:
82
+ return f"Full name extraction failed: {str(e)}"
83
+
84
+ # Gradio interface functions
85
+ def process_name_with_progress(name: str, progress=gr.Progress()):
86
+ """Process name through search and entity extraction pipeline with real-time updates"""
87
  if not name.strip():
88
  return "", "", ""
89
 
90
+ # Initialize outputs
91
+ search_results = ""
92
+ entities = ""
93
+ full_names = ""
94
+
95
+ try:
96
+ # Step 1: Search for articles
97
+ progress(0.1, desc="Searching for articles...")
98
+ yield "Searching for articles...", "", ""
99
+
100
+ search_start = time.time()
101
+ search_results = search_articles(name.strip())
102
+ search_time = time.time() - search_start
103
+ print(f"Search took: {search_time:.2f} seconds")
104
+
105
+ progress(0.4, desc="Articles found! Extracting entities...")
106
+ yield search_results, "Extracting entities from articles...", ""
107
+
108
+ # Step 2: Extract entities from search results
109
+ extract_start = time.time()
110
+ entities = extract_entities(search_results)
111
+ extract_time = time.time() - extract_start
112
+ print(f"Entity extraction took: {extract_time:.2f} seconds")
113
+
114
+ progress(0.7, desc="Entities extracted! Finding full names...")
115
+ yield search_results, entities, "Finding full names and roles..."
116
+
117
+ # Step 3: Find full names of entities
118
+ names_start = time.time()
119
+ full_names = find_full_names(search_results, entities)
120
+ names_time = time.time() - names_start
121
+ print(f"Full name extraction took: {names_time:.2f} seconds")
122
+
123
+ progress(1.0, desc="Complete!")
124
+ yield search_results, entities, full_names
125
+
126
+ except Exception as e:
127
+ error_msg = f"Error: {str(e)}"
128
+ yield search_results or error_msg, entities or error_msg, full_names or error_msg
129
+
130
+ def process_name_simple(name: str):
131
+ """Simple version without progress - for debugging bottlenecks"""
132
+ if not name.strip():
133
+ return "", "", ""
134
+
135
+ print(f"Starting process for: {name}")
136
+ total_start = time.time()
137
+
138
  # Search for articles
139
+ print("Step 1: Searching articles...")
140
+ search_start = time.time()
141
  search_results = search_articles(name.strip())
142
+ search_time = time.time() - search_start
143
+ print(f"Search completed in: {search_time:.2f} seconds")
144
 
145
  # Extract entities from search results
146
+ print("Step 2: Extracting entities...")
147
+ extract_start = time.time()
148
  entities = extract_entities(search_results)
149
+ extract_time = time.time() - extract_start
150
+ print(f"Entity extraction completed in: {extract_time:.2f} seconds")
151
 
152
  # Find full names of entities
153
+ print("Step 3: Finding full names...")
154
+ names_start = time.time()
155
  full_names = find_full_names(search_results, entities)
156
+ names_time = time.time() - names_start
157
+ print(f"Full name extraction completed in: {names_time:.2f} seconds")
158
+
159
+ total_time = time.time() - total_start
160
+ print(f"Total process time: {total_time:.2f} seconds")
161
 
162
  return search_results, entities, full_names
163
 
 
168
 
169
  with gr.Row():
170
  name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
171
+ with gr.Column():
172
+ search_btn = gr.Button("Search (Real-time)", variant="primary")
173
+ debug_btn = gr.Button("Search (Debug Mode)", variant="secondary")
174
 
175
  with gr.Column():
176
  output1 = gr.Textbox(label="Search Results", lines=10, max_lines=20)
177
  output2 = gr.Textbox(label="Extracted Entities", lines=5, max_lines=10)
178
  output3 = gr.Textbox(label="Full Names", lines=5, max_lines=10)
179
 
180
+ # Real-time search with progress
181
  search_btn.click(
182
+ fn=process_name_with_progress,
183
  inputs=[name_input],
184
  outputs=[output1, output2, output3]
185
  )
186
 
187
+ # Debug search with timing info
188
+ debug_btn.click(
189
+ fn=process_name_simple,
190
+ inputs=[name_input],
191
+ outputs=[output1, output2, output3]
192
+ )
193
 
194
  if __name__ == "__main__":
195
  demo.launch()