dygoo commited on
Commit
d0bd726
·
verified ·
1 Parent(s): cd3dba4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -41
app.py CHANGED
@@ -1,12 +1,20 @@
1
  import gradio as gr
2
  import requests
 
 
 
3
  import time
4
  from duckduckgo_search import DDGS
5
 
6
- # DuckDuckGo search
 
 
 
7
  def search_articles(name: str) -> str:
 
8
  keywords = ['founders', 'partners', 'funders', 'owners']
9
  search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
 
10
  try:
11
  with DDGS() as ddgs:
12
  results = list(ddgs.text(search_query, max_results=3))
@@ -22,19 +30,21 @@ def search_articles(name: str) -> str:
22
  except Exception as e:
23
  return f"Search failed: {str(e)}"
24
 
25
- # Call your Modal Mistral endpoint
26
  def extract_entities(search_results: str) -> str:
 
27
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
 
28
  prompt = f"""Extract all person names and organization names from the following text.
29
  Format as:
30
  PERSON: [name]
31
  ORG: [organization name]
32
  Text: {search_results}"""
 
33
  try:
34
  response = requests.post(
35
  modal_endpoint,
36
  json={"prompt": prompt, "max_tokens": 500, "temperature": 0.1},
37
- timeout=30
38
  )
39
  if response.status_code == 200:
40
  return response.json().get("response", "No entities extracted")
@@ -44,16 +54,19 @@ Text: {search_results}"""
44
  return f"Extraction failed: {str(e)}"
45
 
46
  def find_full_names(search_results: str, entities: str) -> str:
 
47
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
 
48
  prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
49
  Entities: {entities}
50
  Search Results: {search_results}
51
  Provide full names with their roles/titles where mentioned."""
 
52
  try:
53
  response = requests.post(
54
  modal_endpoint,
55
  json={"prompt": prompt, "max_tokens": 300, "temperature": 0.1},
56
- timeout=30
57
  )
58
  if response.status_code == 200:
59
  return response.json().get("response", "No full names found")
@@ -62,73 +75,81 @@ Provide full names with their roles/titles where mentioned."""
62
  except Exception as e:
63
  return f"Full name extraction failed: {str(e)}"
64
 
65
- # Pipeline with separate status
66
- def process_name_with_progress(name: str, progress=gr.Progress()):
 
 
 
67
  if not name.strip():
68
- yield "", "", "", "Please enter a name"
69
- return
 
 
 
 
70
 
71
  try:
72
- yield "", "", "", "Searching for articles..."
73
- progress(0.1)
74
  search_results = search_articles(name.strip())
 
75
 
76
- yield search_results, "", "", "Extracting entities..."
77
- progress(0.4)
78
  entities = extract_entities(search_results)
 
79
 
80
- yield search_results, entities, "", "Finding full names..."
81
- progress(0.7)
82
  full_names = find_full_names(search_results, entities)
83
-
84
- progress(1.0)
85
- yield search_results, entities, full_names, "Done!"
86
 
87
  except Exception as e:
88
- error = f"Error: {str(e)}"
89
- yield error, error, error, error
90
 
91
- # Simple non-streamed debug version
92
  def process_name_simple(name: str):
 
93
  if not name.strip():
94
- return "", "", "", "Please enter a name"
95
- try:
96
- search_results = search_articles(name.strip())
97
- entities = extract_entities(search_results)
98
- full_names = find_full_names(search_results, entities)
99
- return search_results, entities, full_names, "Done!"
100
- except Exception as e:
101
- error = f"Error: {str(e)}"
102
- return error, error, error, error
 
103
 
104
- # UI layout
105
  with gr.Blocks(title="Name Research Tool") as demo:
106
  gr.Markdown("# 🔍 Name Research Tool")
107
- gr.Markdown("Enter a business or project name to search related articles and extract key entities.")
108
 
109
  with gr.Row():
110
  name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
111
  with gr.Column():
112
- search_btn = gr.Button("Search (Real-time)", variant="primary")
113
- debug_btn = gr.Button("Search (Debug Mode)", variant="secondary")
114
 
115
- with gr.Row():
116
- with gr.Column():
117
- output1 = gr.Textbox(label="Search Results", lines=10)
118
- output2 = gr.Textbox(label="Extracted Entities", lines=5)
119
- output3 = gr.Textbox(label="Full Names & Roles", lines=5)
120
- status = gr.Textbox(label="Status", interactive=False)
121
 
 
122
  search_btn.click(
123
  fn=process_name_with_progress,
124
  inputs=[name_input],
125
- outputs=[output1, output2, output3, status]
126
  )
127
 
 
128
  debug_btn.click(
129
  fn=process_name_simple,
130
  inputs=[name_input],
131
- outputs=[output1, output2, output3, status]
132
  )
133
 
134
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import requests
3
+ import re
4
+ from typing import List, Dict
5
+ import os
6
  import time
7
  from duckduckgo_search import DDGS
8
 
9
+ # ----------------------------
10
+ # Model functions
11
+ # ----------------------------
12
+
13
  def search_articles(name: str) -> str:
14
+ """Search for 3 newspaper articles containing the name and keywords using DuckDuckGo"""
15
  keywords = ['founders', 'partners', 'funders', 'owners']
16
  search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
17
+
18
  try:
19
  with DDGS() as ddgs:
20
  results = list(ddgs.text(search_query, max_results=3))
 
30
  except Exception as e:
31
  return f"Search failed: {str(e)}"
32
 
 
33
  def extract_entities(search_results: str) -> str:
34
+ """Extract entities using Mistral 7B endpoint"""
35
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
36
+
37
  prompt = f"""Extract all person names and organization names from the following text.
38
  Format as:
39
  PERSON: [name]
40
  ORG: [organization name]
41
  Text: {search_results}"""
42
+
43
  try:
44
  response = requests.post(
45
  modal_endpoint,
46
  json={"prompt": prompt, "max_tokens": 500, "temperature": 0.1},
47
+ timeout=90 # Increased timeout
48
  )
49
  if response.status_code == 200:
50
  return response.json().get("response", "No entities extracted")
 
54
  return f"Extraction failed: {str(e)}"
55
 
56
  def find_full_names(search_results: str, entities: str) -> str:
57
+ """Find full names using Mistral 7B endpoint"""
58
  modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
59
+
60
  prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
61
  Entities: {entities}
62
  Search Results: {search_results}
63
  Provide full names with their roles/titles where mentioned."""
64
+
65
  try:
66
  response = requests.post(
67
  modal_endpoint,
68
  json={"prompt": prompt, "max_tokens": 300, "temperature": 0.1},
69
+ timeout=90 # Increased timeout
70
  )
71
  if response.status_code == 200:
72
  return response.json().get("response", "No full names found")
 
75
  except Exception as e:
76
  return f"Full name extraction failed: {str(e)}"
77
 
78
+ # ----------------------------
79
+ # Gradio Interface Logic
80
+ # ----------------------------
81
+
82
+ def process_name_with_progress(name: str, progress=gr.Progress(track_tqdm=True)):
83
  if not name.strip():
84
+ yield "", "", ""
85
+
86
+ # Start with all outputs empty
87
+ search_results = ""
88
+ entities = ""
89
+ full_names = ""
90
 
91
  try:
92
+ # Step 1: Search
93
+ progress(0.1, desc="Searching for articles...")
94
  search_results = search_articles(name.strip())
95
+ yield search_results, "", ""
96
 
97
+ # Step 2: Extract Entities
98
+ progress(0.4, desc="Extracting entities...")
99
  entities = extract_entities(search_results)
100
+ yield search_results, entities, ""
101
 
102
+ # Step 3: Resolve Full Names
103
+ progress(0.7, desc="Finding full names...")
104
  full_names = find_full_names(search_results, entities)
105
+ progress(1.0, desc="Done.")
106
+ yield search_results, entities, full_names
 
107
 
108
  except Exception as e:
109
+ error_msg = f"Error: {str(e)}"
110
+ yield search_results or error_msg, entities or error_msg, full_names or error_msg
111
 
 
112
  def process_name_simple(name: str):
113
+ """Simple debugging version (no progress bars)"""
114
  if not name.strip():
115
+ return "", "", ""
116
+
117
+ search_results = search_articles(name.strip())
118
+ entities = extract_entities(search_results)
119
+ full_names = find_full_names(search_results, entities)
120
+ return search_results, entities, full_names
121
+
122
+ # ----------------------------
123
+ # Gradio UI
124
+ # ----------------------------
125
 
 
126
  with gr.Blocks(title="Name Research Tool") as demo:
127
  gr.Markdown("# 🔍 Name Research Tool")
128
+ gr.Markdown("Enter a business or project name to search for related articles and extract key entities.")
129
 
130
  with gr.Row():
131
  name_input = gr.Textbox(label="Name", placeholder="Enter business or project name")
132
  with gr.Column():
133
+ search_btn = gr.Button("🔎 Search (Real-time)", variant="primary")
134
+ debug_btn = gr.Button("🐞 Search (Debug Mode)", variant="secondary")
135
 
136
+ with gr.Column():
137
+ output1 = gr.Textbox(label="Search Results", lines=10, max_lines=20)
138
+ output2 = gr.Textbox(label="Extracted Entities", lines=5, max_lines=10)
139
+ output3 = gr.Textbox(label="Full Names", lines=5, max_lines=10)
 
 
140
 
141
+ # Real-time (streamed) version
142
  search_btn.click(
143
  fn=process_name_with_progress,
144
  inputs=[name_input],
145
+ outputs=[output1, output2, output3]
146
  )
147
 
148
+ # Debug version (no progress)
149
  debug_btn.click(
150
  fn=process_name_simple,
151
  inputs=[name_input],
152
+ outputs=[output1, output2, output3]
153
  )
154
 
155
  if __name__ == "__main__":