dygoo commited on
Commit
b8a3699
·
verified ·
1 Parent(s): 63b8e63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -40
app.py CHANGED
@@ -3,26 +3,34 @@ import requests
3
  import time
4
  from duckduckgo_search import DDGS
5
 
6
- # --- Model Functions ---
 
7
  def search_articles(name: str) -> str:
 
8
  keywords = ['founders', 'partners', 'funders', 'owners']
9
  search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
10
  try:
 
11
  with DDGS() as ddgs:
12
  results = list(ddgs.text(search_query, max_results=3))
 
13
  if not results:
14
  return f"No articles found for {name}"
 
15
  articles = []
16
  for i, result in enumerate(results, 1):
17
- article = f"**{i}. {result['title']}**\n"
18
- article += f"Source: {result['href']}\n"
19
- article += f"{result['body']}\n"
20
  articles.append(article)
 
21
  return "\n\n".join(articles)
22
  except Exception as e:
23
- return f"Search failed: {str(e)}"
 
24
 
25
  def extract_entities(search_results: str) -> str:
 
26
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
27
  prompt = f"""Extract all person names and organization names from the following text.
28
  Format as:
@@ -32,17 +40,18 @@ Text: {search_results}"""
32
  try:
33
  response = requests.post(
34
  modal_endpoint,
35
- json={"prompt": prompt, "max_tokens": 500, "temperature": 0.1},
36
- timeout=180 # Increased timeout
37
  )
38
  if response.status_code == 200:
39
  return response.json().get("response", "No entities extracted")
40
  else:
41
- return f"API Error: {response.status_code}"
42
  except Exception as e:
43
- return f"Extraction failed: {str(e)}"
 
44
 
45
  def find_full_names(search_results: str, entities: str) -> str:
 
46
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
47
  prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
48
  Entities: {entities}
@@ -51,64 +60,130 @@ Provide full names with their roles/titles where mentioned."""
51
  try:
52
  response = requests.post(
53
  modal_endpoint,
54
- json={"prompt": prompt, "max_tokens": 300, "temperature": 0.1},
55
- timeout=180 # Increased timeout
56
  )
57
  if response.status_code == 200:
58
  return response.json().get("response", "No full names found")
59
  else:
60
- return f"API Error: {response.status_code}"
61
  except Exception as e:
62
- return f"Full name extraction failed: {str(e)}"
 
 
 
63
 
64
- # --- Pipeline Function with Progress & Status ---
65
- def process_name_with_progress(name: str, progress=gr.Progress(track_tqdm=True)):
66
  if not name.strip():
67
- return "", "", "", "Please enter a name."
 
 
 
 
 
68
 
69
  try:
70
- progress(0.1)
71
- yield "", "", "", "🔍 Searching for articles..."
72
-
73
- search_results = search_articles(name.strip())
74
- progress(0.4)
75
- yield search_results, "", "", "📄 Extracting entities from articles..."
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- entities = extract_entities(search_results)
78
- progress(0.7)
79
- yield search_results, entities, "", "🧠 Finding full names and roles..."
 
 
80
 
81
- full_names = find_full_names(search_results, entities)
82
- progress(1.0)
83
- yield search_results, entities, full_names, " Complete!"
 
 
 
 
 
 
 
 
 
84
 
85
  except Exception as e:
86
- err = f" Error: {str(e)}"
87
- yield search_results if 'search_results' in locals() else "", \
88
- entities if 'entities' in locals() else "", \
89
- full_names if 'full_names' in locals() else "", \
90
- err
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- # --- Gradio Interface ---
93
  with gr.Blocks(title="Name Research Tool") as demo:
94
- gr.Markdown("# Name Research Tool")
95
  gr.Markdown("Enter a business or project name to search for related articles and extract key entities.")
96
 
97
  with gr.Row():
98
  name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
99
- search_btn = gr.Button("Search", variant="primary")
 
 
100
 
101
  with gr.Column():
102
- output1 = gr.Textbox(label="Search Results", lines=10, max_lines=20)
103
  output2 = gr.Textbox(label="Extracted Entities", lines=5, max_lines=10)
104
  output3 = gr.Textbox(label="Full Names", lines=5, max_lines=10)
105
- status_output = gr.Textbox(label="Status / Progress", lines=1, interactive=False)
106
 
107
- # Search with progress
108
  search_btn.click(
109
  fn=process_name_with_progress,
110
  inputs=[name_input],
111
- outputs=[output1, output2, output3, status_output]
 
 
 
 
 
 
112
  )
113
 
114
  if __name__ == "__main__":
 
3
  import time
4
  from duckduckgo_search import DDGS
5
 
6
+ # === Model functions ===
7
+
8
  def search_articles(name: str) -> str:
9
+ """Search for 3 newspaper articles containing the name and keywords using DuckDuckGo"""
10
  keywords = ['founders', 'partners', 'funders', 'owners']
11
  search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
12
  try:
13
+ print(f"[DEBUG] Search query: {search_query}")
14
  with DDGS() as ddgs:
15
  results = list(ddgs.text(search_query, max_results=3))
16
+ print(f"[DEBUG] Raw results: {results}")
17
  if not results:
18
  return f"No articles found for {name}"
19
+
20
  articles = []
21
  for i, result in enumerate(results, 1):
22
+ article = f"**{i}. {result.get('title', 'No Title')}**\n"
23
+ article += f"Source: {result.get('href', 'No URL')}\n"
24
+ article += f"{result.get('body', 'No Body')}\n"
25
  articles.append(article)
26
+
27
  return "\n\n".join(articles)
28
  except Exception as e:
29
+ return f"[ERROR] Search failed: {str(e)}"
30
+
31
 
32
  def extract_entities(search_results: str) -> str:
33
+ """Extract entities using Mistral 7B endpoint"""
34
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
35
  prompt = f"""Extract all person names and organization names from the following text.
36
  Format as:
 
40
  try:
41
  response = requests.post(
42
  modal_endpoint,
43
+ json={"prompt": prompt, "max_tokens": 500, "temperature": 0.1}
 
44
  )
45
  if response.status_code == 200:
46
  return response.json().get("response", "No entities extracted")
47
  else:
48
+ return f"[ERROR] API Error: {response.status_code}"
49
  except Exception as e:
50
+ return f"[ERROR] Extraction failed: {str(e)}"
51
+
52
 
53
  def find_full_names(search_results: str, entities: str) -> str:
54
+ """Find full names using Mistral 7B endpoint"""
55
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
56
  prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
57
  Entities: {entities}
 
60
  try:
61
  response = requests.post(
62
  modal_endpoint,
63
+ json={"prompt": prompt, "max_tokens": 300, "temperature": 0.1}
 
64
  )
65
  if response.status_code == 200:
66
  return response.json().get("response", "No full names found")
67
  else:
68
+ return f"[ERROR] API Error: {response.status_code}"
69
  except Exception as e:
70
+ return f"[ERROR] Full name extraction failed: {str(e)}"
71
+
72
+
73
+ # === Gradio interface ===
74
 
75
+ def process_name_with_progress(name: str, progress=gr.Progress()):
76
+ """Process name with streamed debug updates to help diagnose issues"""
77
  if not name.strip():
78
+ yield "No name provided", "", ""
79
+ return
80
+
81
+ search_results = ""
82
+ entities = ""
83
+ full_names = ""
84
 
85
  try:
86
+ # Step 1: Search
87
+ progress(0.1, desc="Searching for articles...")
88
+ search_results += f"[DEBUG] Starting search for: {name}\n"
89
+ yield search_results, "", ""
90
+
91
+ search_start = time.time()
92
+ articles_output = search_articles(name.strip())
93
+ search_time = time.time() - search_start
94
+
95
+ search_results += f"[DEBUG] Search completed in {search_time:.2f}s\n"
96
+ search_results += f"{articles_output}\n"
97
+ yield search_results, "", ""
98
+
99
+ # Step 2: Extract entities
100
+ progress(0.5, desc="Extracting entities...")
101
+ search_results += "[DEBUG] Starting entity extraction...\n"
102
+ yield search_results, "[DEBUG] Extracting entities...", ""
103
 
104
+ extract_start = time.time()
105
+ entities = extract_entities(articles_output)
106
+ extract_time = time.time() - extract_start
107
+ search_results += f"[DEBUG] Entity extraction completed in {extract_time:.2f}s\n"
108
+ yield search_results, entities, ""
109
 
110
+ # Step 3: Full names
111
+ progress(0.8, desc="Finding full names...")
112
+ search_results += "[DEBUG] Starting full name resolution...\n"
113
+ yield search_results, entities, "[DEBUG] Resolving full names..."
114
+
115
+ names_start = time.time()
116
+ full_names = find_full_names(articles_output, entities)
117
+ names_time = time.time() - names_start
118
+ search_results += f"[DEBUG] Full name extraction completed in {names_time:.2f}s\n"
119
+
120
+ progress(1.0, desc="Complete!")
121
+ yield search_results, entities, full_names
122
 
123
  except Exception as e:
124
+ error_msg = f"[ERROR] {str(e)}"
125
+ yield search_results + error_msg, entities or error_msg, full_names or error_msg
126
+
127
+
128
+ def process_name_simple(name: str):
129
+ """Basic version without progress bar - for isolated testing"""
130
+ if not name.strip():
131
+ return "", "", ""
132
+
133
+ print(f"Starting process for: {name}")
134
+ total_start = time.time()
135
+
136
+ print("Step 1: Searching articles...")
137
+ search_start = time.time()
138
+ search_results = search_articles(name.strip())
139
+ search_time = time.time() - search_start
140
+ print(f"Search completed in: {search_time:.2f}s")
141
+
142
+ print("Step 2: Extracting entities...")
143
+ extract_start = time.time()
144
+ entities = extract_entities(search_results)
145
+ extract_time = time.time() - extract_start
146
+ print(f"Entity extraction in: {extract_time:.2f}s")
147
+
148
+ print("Step 3: Finding full names...")
149
+ names_start = time.time()
150
+ full_names = find_full_names(search_results, entities)
151
+ names_time = time.time() - names_start
152
+ print(f"Full name resolution in: {names_time:.2f}s")
153
+
154
+ total_time = time.time() - total_start
155
+ print(f"Total time: {total_time:.2f}s")
156
+
157
+ return search_results, entities, full_names
158
+
159
+
160
+ # === Gradio UI ===
161
 
 
162
  with gr.Blocks(title="Name Research Tool") as demo:
163
+ gr.Markdown("# 🔎 Name Research Tool")
164
  gr.Markdown("Enter a business or project name to search for related articles and extract key entities.")
165
 
166
  with gr.Row():
167
  name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
168
+ with gr.Column():
169
+ search_btn = gr.Button("Search (Real-time)", variant="primary")
170
+ debug_btn = gr.Button("Search (Debug Mode)", variant="secondary")
171
 
172
  with gr.Column():
173
+ output1 = gr.Textbox(label="Search Results (with debug)", lines=10, max_lines=30)
174
  output2 = gr.Textbox(label="Extracted Entities", lines=5, max_lines=10)
175
  output3 = gr.Textbox(label="Full Names", lines=5, max_lines=10)
 
176
 
 
177
  search_btn.click(
178
  fn=process_name_with_progress,
179
  inputs=[name_input],
180
+ outputs=[output1, output2, output3]
181
+ )
182
+
183
+ debug_btn.click(
184
+ fn=process_name_simple,
185
+ inputs=[name_input],
186
+ outputs=[output1, output2, output3]
187
  )
188
 
189
  if __name__ == "__main__":