dygoo commited on
Commit
cd3dba4
·
verified ·
1 Parent(s): a5a5de8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -95
app.py CHANGED
@@ -3,34 +3,27 @@ import requests
3
  import time
4
  from duckduckgo_search import DDGS
5
 
6
- # === Model functions ===
7
-
8
  def search_articles(name: str) -> str:
9
- """Search for 3 newspaper articles containing the name and keywords using DuckDuckGo"""
10
  keywords = ['founders', 'partners', 'funders', 'owners']
11
  search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
12
  try:
13
- print(f"[DEBUG] Search query: {search_query}")
14
  with DDGS() as ddgs:
15
  results = list(ddgs.text(search_query, max_results=3))
16
- print(f"[DEBUG] Raw results: {results}")
17
  if not results:
18
  return f"No articles found for {name}"
19
-
20
  articles = []
21
  for i, result in enumerate(results, 1):
22
- article = f"**{i}. {result.get('title', 'No Title')}**\n"
23
- article += f"Source: {result.get('href', 'No URL')}\n"
24
- article += f"{result.get('body', 'No Body')}\n"
25
  articles.append(article)
26
-
27
  return "\n\n".join(articles)
28
  except Exception as e:
29
- return f"[ERROR] Search failed: {str(e)}"
30
-
31
 
 
32
  def extract_entities(search_results: str) -> str:
33
- """Extract entities using Mistral 7B endpoint"""
34
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
35
  prompt = f"""Extract all person names and organization names from the following text.
36
  Format as:
@@ -40,18 +33,17 @@ Text: {search_results}"""
40
  try:
41
  response = requests.post(
42
  modal_endpoint,
43
- json={"prompt": prompt, "max_tokens": 500, "temperature": 0.1}
 
44
  )
45
  if response.status_code == 200:
46
  return response.json().get("response", "No entities extracted")
47
  else:
48
- return f"[ERROR] API Error: {response.status_code}"
49
  except Exception as e:
50
- return f"[ERROR] Extraction failed: {str(e)}"
51
-
52
 
53
  def find_full_names(search_results: str, entities: str) -> str:
54
- """Find full names using Mistral 7B endpoint"""
55
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
56
  prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
57
  Entities: {entities}
@@ -60,100 +52,59 @@ Provide full names with their roles/titles where mentioned."""
60
  try:
61
  response = requests.post(
62
  modal_endpoint,
63
- json={"prompt": prompt, "max_tokens": 300, "temperature": 0.1}
 
64
  )
65
  if response.status_code == 200:
66
  return response.json().get("response", "No full names found")
67
  else:
68
- return f"[ERROR] API Error: {response.status_code}"
69
  except Exception as e:
70
- return f"[ERROR] Full name extraction failed: {str(e)}"
71
-
72
-
73
- # === Gradio interface ===
74
 
 
75
  def process_name_with_progress(name: str, progress=gr.Progress()):
76
  if not name.strip():
77
- return "", "", ""
78
-
79
- # Initialize outputs
80
- search_results = ""
81
- entities = ""
82
- full_names = ""
83
 
84
  try:
85
- # Step 1: Search for articles
86
- progress(0.1, desc="Searching for articles...")
87
- yield "", "", "" # Initial blank yield to reset UI
88
-
89
- search_start = time.time()
90
  search_results = search_articles(name.strip())
91
- search_time = time.time() - search_start
92
- print(f"Search took: {search_time:.2f} seconds")
93
 
94
- progress(0.4, desc="Articles found! Extracting entities...")
95
- yield search_results, "", "" # Only show real search results
96
-
97
- # Step 2: Extract entities from search results
98
- extract_start = time.time()
99
  entities = extract_entities(search_results)
100
- extract_time = time.time() - extract_start
101
- print(f"Entity extraction took: {extract_time:.2f} seconds")
102
-
103
- progress(0.7, desc="Entities extracted! Finding full names...")
104
- yield search_results, entities, "" # Show real entities
105
 
106
- # Step 3: Find full names of entities
107
- names_start = time.time()
108
  full_names = find_full_names(search_results, entities)
109
- names_time = time.time() - names_start
110
- print(f"Full name extraction took: {names_time:.2f} seconds")
111
 
112
- progress(1.0, desc="Complete!")
113
- yield search_results, entities, full_names # Final result
114
 
115
  except Exception as e:
116
- error_msg = f"Error: {str(e)}"
117
- yield search_results or error_msg, entities or error_msg, full_names or error_msg
118
-
119
 
 
120
  def process_name_simple(name: str):
121
- """Basic version without progress bar - for isolated testing"""
122
  if not name.strip():
123
- return "", "", ""
124
-
125
- print(f"Starting process for: {name}")
126
- total_start = time.time()
127
-
128
- print("Step 1: Searching articles...")
129
- search_start = time.time()
130
- search_results = search_articles(name.strip())
131
- search_time = time.time() - search_start
132
- print(f"Search completed in: {search_time:.2f}s")
133
-
134
- print("Step 2: Extracting entities...")
135
- extract_start = time.time()
136
- entities = extract_entities(search_results)
137
- extract_time = time.time() - extract_start
138
- print(f"Entity extraction in: {extract_time:.2f}s")
139
-
140
- print("Step 3: Finding full names...")
141
- names_start = time.time()
142
- full_names = find_full_names(search_results, entities)
143
- names_time = time.time() - names_start
144
- print(f"Full name resolution in: {names_time:.2f}s")
145
-
146
- total_time = time.time() - total_start
147
- print(f"Total time: {total_time:.2f}s")
148
-
149
- return search_results, entities, full_names
150
-
151
-
152
- # === Gradio UI ===
153
 
 
154
  with gr.Blocks(title="Name Research Tool") as demo:
155
- gr.Markdown("# 🔎 Name Research Tool")
156
- gr.Markdown("Enter a business or project name to search for related articles and extract key entities.")
157
 
158
  with gr.Row():
159
  name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
@@ -161,22 +112,25 @@ with gr.Blocks(title="Name Research Tool") as demo:
161
  search_btn = gr.Button("Search (Real-time)", variant="primary")
162
  debug_btn = gr.Button("Search (Debug Mode)", variant="secondary")
163
 
164
- with gr.Column():
165
- output1 = gr.Textbox(label="Search Results (with debug)", lines=10, max_lines=30)
166
- output2 = gr.Textbox(label="Extracted Entities", lines=5, max_lines=10)
167
- output3 = gr.Textbox(label="Full Names", lines=5, max_lines=10)
 
 
168
 
169
  search_btn.click(
170
  fn=process_name_with_progress,
171
  inputs=[name_input],
172
- outputs=[output1, output2, output3]
173
  )
174
 
175
  debug_btn.click(
176
  fn=process_name_simple,
177
  inputs=[name_input],
178
- outputs=[output1, output2, output3]
179
  )
180
 
181
  if __name__ == "__main__":
182
  demo.launch()
 
 
3
  import time
4
  from duckduckgo_search import DDGS
5
 
6
+ # DuckDuckGo search
 
7
  def search_articles(name: str) -> str:
 
8
  keywords = ['founders', 'partners', 'funders', 'owners']
9
  search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
10
  try:
 
11
  with DDGS() as ddgs:
12
  results = list(ddgs.text(search_query, max_results=3))
 
13
  if not results:
14
  return f"No articles found for {name}"
 
15
  articles = []
16
  for i, result in enumerate(results, 1):
17
+ article = f"**{i}. {result['title']}**\n"
18
+ article += f"Source: {result['href']}\n"
19
+ article += f"{result['body']}\n"
20
  articles.append(article)
 
21
  return "\n\n".join(articles)
22
  except Exception as e:
23
+ return f"Search failed: {str(e)}"
 
24
 
25
+ # Call your Modal Mistral endpoint
26
  def extract_entities(search_results: str) -> str:
 
27
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
28
  prompt = f"""Extract all person names and organization names from the following text.
29
  Format as:
 
33
  try:
34
  response = requests.post(
35
  modal_endpoint,
36
+ json={"prompt": prompt, "max_tokens": 500, "temperature": 0.1},
37
+ timeout=30
38
  )
39
  if response.status_code == 200:
40
  return response.json().get("response", "No entities extracted")
41
  else:
42
+ return f"API Error: {response.status_code}"
43
  except Exception as e:
44
+ return f"Extraction failed: {str(e)}"
 
45
 
46
  def find_full_names(search_results: str, entities: str) -> str:
 
47
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
48
  prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
49
  Entities: {entities}
 
52
  try:
53
  response = requests.post(
54
  modal_endpoint,
55
+ json={"prompt": prompt, "max_tokens": 300, "temperature": 0.1},
56
+ timeout=30
57
  )
58
  if response.status_code == 200:
59
  return response.json().get("response", "No full names found")
60
  else:
61
+ return f"API Error: {response.status_code}"
62
  except Exception as e:
63
+ return f"Full name extraction failed: {str(e)}"
 
 
 
64
 
65
+ # Pipeline with separate status
66
  def process_name_with_progress(name: str, progress=gr.Progress()):
67
  if not name.strip():
68
+ yield "", "", "", "Please enter a name"
69
+ return
 
 
 
 
70
 
71
  try:
72
+ yield "", "", "", "Searching for articles..."
73
+ progress(0.1)
 
 
 
74
  search_results = search_articles(name.strip())
 
 
75
 
76
+ yield search_results, "", "", "Extracting entities..."
77
+ progress(0.4)
 
 
 
78
  entities = extract_entities(search_results)
 
 
 
 
 
79
 
80
+ yield search_results, entities, "", "Finding full names..."
81
+ progress(0.7)
82
  full_names = find_full_names(search_results, entities)
 
 
83
 
84
+ progress(1.0)
85
+ yield search_results, entities, full_names, "Done!"
86
 
87
  except Exception as e:
88
+ error = f"Error: {str(e)}"
89
+ yield error, error, error, error
 
90
 
91
+ # Simple non-streamed debug version
92
  def process_name_simple(name: str):
 
93
  if not name.strip():
94
+ return "", "", "", "Please enter a name"
95
+ try:
96
+ search_results = search_articles(name.strip())
97
+ entities = extract_entities(search_results)
98
+ full_names = find_full_names(search_results, entities)
99
+ return search_results, entities, full_names, "Done!"
100
+ except Exception as e:
101
+ error = f"Error: {str(e)}"
102
+ return error, error, error, error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ # UI layout
105
  with gr.Blocks(title="Name Research Tool") as demo:
106
+ gr.Markdown("# 🔍 Name Research Tool")
107
+ gr.Markdown("Enter a business or project name to search related articles and extract key entities.")
108
 
109
  with gr.Row():
110
  name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
 
112
  search_btn = gr.Button("Search (Real-time)", variant="primary")
113
  debug_btn = gr.Button("Search (Debug Mode)", variant="secondary")
114
 
115
+ with gr.Row():
116
+ with gr.Column():
117
+ output1 = gr.Textbox(label="Search Results", lines=10)
118
+ output2 = gr.Textbox(label="Extracted Entities", lines=5)
119
+ output3 = gr.Textbox(label="Full Names & Roles", lines=5)
120
+ status = gr.Textbox(label="Status", interactive=False)
121
 
122
  search_btn.click(
123
  fn=process_name_with_progress,
124
  inputs=[name_input],
125
+ outputs=[output1, output2, output3, status]
126
  )
127
 
128
  debug_btn.click(
129
  fn=process_name_simple,
130
  inputs=[name_input],
131
+ outputs=[output1, output2, output3, status]
132
  )
133
 
134
  if __name__ == "__main__":
135
  demo.launch()
136
+