sagarnildass commited on
Commit
24df625
·
verified ·
1 Parent(s): efb7371

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Deep Research Assistant Agent
3
- emoji: 🏃
4
- colorFrom: green
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 5.29.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Deep_Research_Assistant_Agent
3
+ app_file: deep_research.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.44.1
 
 
6
  ---
 
 
__pycache__/clarifier_agent.cpython-39.pyc ADDED
Binary file (905 Bytes). View file
 
__pycache__/coordinator_agent.cpython-39.pyc ADDED
Binary file (2.67 kB). View file
 
__pycache__/email_agent.cpython-39.pyc ADDED
Binary file (1.52 kB). View file
 
__pycache__/planner_agent.cpython-39.pyc ADDED
Binary file (1.06 kB). View file
 
__pycache__/research_manager.cpython-39.pyc ADDED
Binary file (4.68 kB). View file
 
__pycache__/search_agent.cpython-39.pyc ADDED
Binary file (1.02 kB). View file
 
__pycache__/writer_agent.cpython-39.pyc ADDED
Binary file (1.24 kB). View file
 
clarifier_agent.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from agents import Agent
3
+
4
+ class ClarifyingQuestions(BaseModel):
5
+ questions: list[str]
6
+ """Three clarifying questions to better understand the user's query."""
7
+
8
+ clarifier_agent = Agent(
9
+ name="ClarifierAgent",
10
+ instructions=(
11
+ "You are a research assistant. Your task is to ask 3 clarifying questions that help refine and understand "
12
+ "a research query better. After the user answers them, hand off control to the Research Coordinator to perform the full research."
13
+ ),
14
+ model="gpt-4o-mini",
15
+ output_type=ClarifyingQuestions,
16
+ )
deep_research.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # deep_research.py
2
+
3
+ import gradio as gr
4
+ from dotenv import load_dotenv
5
+ from clarifier_agent import clarifier_agent
6
+ from research_manager import ResearchManagerAgent
7
+ from agents import Runner
8
+ from collections import defaultdict
9
+ from datetime import datetime
10
+ import time
11
+ import logging
12
+
13
+ load_dotenv(override=True)
14
+
15
+ # --- Rate Limiter ---
16
+ class RateLimiter:
17
+ # Rate limit to 2 requests per minute, 10 requests per day
18
+ def __init__(self, max_requests=2, time_window=60, daily_quota=10):
19
+ self.max_requests = max_requests
20
+ self.time_window = time_window # seconds
21
+ self.request_history = defaultdict(list)
22
+ self.daily_quota = daily_quota
23
+ self.daily_counts = defaultdict(lambda: {'date': self._today(), 'count': 0})
24
+
25
+ def _today(self):
26
+ return datetime.utcnow().strftime('%Y-%m-%d')
27
+
28
+ def is_rate_limited(self, user_id):
29
+ now = time.time()
30
+ self.request_history[user_id] = [
31
+ t for t in self.request_history[user_id] if now - t < self.time_window
32
+ ]
33
+ if len(self.request_history[user_id]) >= self.max_requests:
34
+ return True
35
+ self.request_history[user_id].append(now)
36
+ return False
37
+
38
+ def is_quota_exceeded(self, user_id):
39
+ today = self._today()
40
+ user_quota = self.daily_counts[user_id]
41
+ if user_quota['date'] != today:
42
+ user_quota['date'] = today
43
+ user_quota['count'] = 0
44
+ if user_quota['count'] >= self.daily_quota:
45
+ return True
46
+ user_quota['count'] += 1
47
+ self.daily_counts[user_id] = user_quota
48
+ return False
49
+
50
+ rate_limiter = RateLimiter()
51
+ logger = logging.getLogger(__name__)
52
+ logger.setLevel(logging.DEBUG)
53
+
54
+ async def get_user_id(request: gr.Request = None):
55
+ user_id = "default_user"
56
+ if request is not None:
57
+ try:
58
+ forwarded = request.headers.get("X-Forwarded-For")
59
+ if forwarded:
60
+ user_id = forwarded.split(",")[0].strip()
61
+ else:
62
+ user_id = getattr(request.client, 'host', 'default_user')
63
+ except Exception:
64
+ pass
65
+ logger.debug(f"[RateLimiter] user_id={user_id}")
66
+ return user_id
67
+
68
+ # Step 1 — Generate clarifying questions
69
+ async def get_clarifying_questions(query, request: gr.Request = None):
70
+ user_id = await get_user_id(request)
71
+ if rate_limiter.is_rate_limited(user_id):
72
+ return ["Rate limit exceeded. Please wait a minute."], "", "", ""
73
+ if rate_limiter.is_quota_exceeded(user_id):
74
+ return ["Daily quota exceeded. Try again tomorrow."], "", "", ""
75
+
76
+ result = await Runner.run(clarifier_agent, input=query)
77
+ return result.final_output.questions
78
+
79
+ # Step 2 — Run full research pipeline via coordinator agent (handoff style)
80
+ async def run_with_handoff(query, q1, q2, q3, a1, a2, a3, send_email_flag, recipient_email, request: gr.Request = None):
81
+ user_id = await get_user_id(request)
82
+ if rate_limiter.is_rate_limited(user_id):
83
+ yield "Rate limit exceeded. Please wait a minute."
84
+ return
85
+ if rate_limiter.is_quota_exceeded(user_id):
86
+ yield "You have reached your daily quota. Try again tomorrow."
87
+ return
88
+
89
+ questions = [q1, q2, q3]
90
+ answers = [a1, a2, a3]
91
+ async for chunk in ResearchManagerAgent().run(
92
+ query,
93
+ questions,
94
+ answers,
95
+ send_email_flag=send_email_flag,
96
+ recipient_email=recipient_email,
97
+ ):
98
+ yield chunk
99
+
100
+ with gr.Blocks(theme=gr.themes.Default(primary_hue="sky")) as ui:
101
+ gr.Markdown("# 🔍 Deep Research Agent (Clarify ➡️ Research ➡️ Email)")
102
+
103
+ query = gr.Textbox(label="🔎 What would you like to research?")
104
+
105
+ get_questions_btn = gr.Button("Generate Clarifying Questions", variant="primary")
106
+
107
+ clar_q1 = gr.Textbox(label="Clarifying Question 1", interactive=False)
108
+ clar_q2 = gr.Textbox(label="Clarifying Question 2", interactive=False)
109
+ clar_q3 = gr.Textbox(label="Clarifying Question 3", interactive=False)
110
+
111
+ answer_1 = gr.Textbox(label="Your Answer to Q1")
112
+ answer_2 = gr.Textbox(label="Your Answer to Q2")
113
+ answer_3 = gr.Textbox(label="Your Answer to Q3")
114
+
115
+ send_email_checkbox = gr.Checkbox(label="📧 Send Report via Email?")
116
+ email_box = gr.Textbox(label="Recipient Email", visible=False)
117
+
118
+ # Show/hide email textbox based on checkbox
119
+ send_email_checkbox.change(fn=lambda checked: gr.update(visible=checked), inputs=send_email_checkbox, outputs=email_box)
120
+
121
+ submit_answers_btn = gr.Button("✅ Submit & Run Full Research")
122
+ report = gr.Markdown(label="📄 Research Report")
123
+
124
+ # Step 1
125
+ get_questions_btn.click(
126
+ fn=get_clarifying_questions,
127
+ inputs=query,
128
+ outputs=[clar_q1, clar_q2, clar_q3]
129
+ ).then(lambda *_: "", outputs=report)
130
+
131
+ # Step 2
132
+ submit_answers_btn.click(
133
+ fn=run_with_handoff,
134
+ inputs=[query, clar_q1, clar_q2, clar_q3, answer_1, answer_2, answer_3, send_email_checkbox, email_box],
135
+ outputs=report
136
+ )
137
+
138
+ ui.launch(inbrowser=True)
email_agent.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import requests
4
+ from agents import Agent, function_tool
5
+
6
+ @function_tool
7
+ def send_email(subject: str, html_body: str, to: str):
8
+ """Send out an email with the given subject and HTML body to a specified recipient using Mailgun"""
9
+ MAILGUN_API_KEY = os.environ.get('MAILGUN_API_KEY')
10
+ MAILGUN_DOMAIN = os.environ.get('MAILGUN_DOMAIN')
11
+
12
+ if not all([MAILGUN_API_KEY, MAILGUN_DOMAIN, to]):
13
+ return {"status": "failure", "response": "Missing configuration or recipient"}
14
+
15
+ auth = base64.b64encode(f'api:{MAILGUN_API_KEY}'.encode()).decode()
16
+ response = requests.post(
17
+ f'https://api.mailgun.net/v3/{MAILGUN_DOMAIN}/messages',
18
+ headers={
19
+ 'Authorization': f'Basic {auth}'
20
+ },
21
+ data={
22
+ 'from': f'Research Agent <mailgun@{MAILGUN_DOMAIN}>',
23
+ 'to': to,
24
+ 'subject': subject,
25
+ 'html': html_body
26
+ }
27
+ )
28
+
29
+ return {
30
+ "status": "success" if response.status_code == 200 else "failure",
31
+ "response": response.text
32
+ }
33
+
34
+ INSTRUCTIONS = """You are able to send a nicely formatted HTML email based on a detailed report.
35
+ You will be provided with a detailed report and a recipient email. Use your tool to send one email,
36
+ providing the report as HTML with an appropriate subject line."""
37
+
38
+ email_agent = Agent(
39
+ name="Email agent",
40
+ instructions=INSTRUCTIONS,
41
+ tools=[send_email],
42
+ model="gpt-4o-mini",
43
+ )
planner_agent.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from agents import Agent
3
+
4
+ HOW_MANY_SEARCHES = 5
5
+
6
+ INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
7
+ to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for."
8
+
9
+
10
+ class WebSearchItem(BaseModel):
11
+ reason: str
12
+ "Your reasoning for why this search is important to the query."
13
+
14
+ query: str
15
+ "The search term to use for the web search."
16
+
17
+
18
+ class WebSearchPlan(BaseModel):
19
+ searches: list[WebSearchItem]
20
+ """A list of web searches to perform to best answer the query."""
21
+
22
+
23
+ planner_agent = Agent(
24
+ name="PlannerAgent",
25
+ instructions=INSTRUCTIONS,
26
+ model="gpt-4o-mini",
27
+ output_type=WebSearchPlan,
28
+ )
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ requests
2
+ python-dotenv
3
+ gradio
4
+ pypdf
5
+ openai
6
+ openai-agents
research_manager.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agents import Runner, trace, gen_trace_id
2
+ from search_agent import search_agent
3
+ from planner_agent import planner_agent, WebSearchItem, WebSearchPlan
4
+ from writer_agent import writer_agent, ReportData
5
+ from email_agent import email_agent
6
+ import asyncio
7
+ from typing import Optional
8
+
9
+ class ResearchManagerAgent:
10
+
11
+ async def run(
12
+ self,
13
+ query: str,
14
+ clarifying_questions: list[str],
15
+ clarifying_answers: list[str],
16
+ send_email_flag: bool = False,
17
+ recipient_email: Optional[str] = None,
18
+ ):
19
+ """ Run the deep research process using user-provided clarification answers. """
20
+ trace_id = gen_trace_id()
21
+ with trace("Research trace", trace_id=trace_id):
22
+ print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}")
23
+ yield f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}"
24
+ yield "Planning search based on clarifications..."
25
+
26
+ print(f"Clarifying questions: {clarifying_questions}")
27
+ print(f"Clarifying answers: {clarifying_answers}")
28
+
29
+ # Plan searches using clarifications and user answers
30
+ search_plan = await self.plan_searches(query, clarifying_questions, clarifying_answers)
31
+
32
+ yield "Searches planned, starting to search..."
33
+ search_results = await self.perform_searches(search_plan)
34
+
35
+ yield "Searches complete, writing report..."
36
+ report = await self.write_report(query, search_results)
37
+
38
+ if send_email_flag and recipient_email:
39
+ yield f"Sending report to {recipient_email}..."
40
+ await self.send_email(report, recipient_email)
41
+ yield "Email sent"
42
+ else:
43
+ yield "Skipping email step"
44
+
45
+ yield "Email sent"
46
+ yield report.markdown_report
47
+
48
+ async def plan_searches(self, query: str, questions: list[str], answers: list[str]) -> WebSearchPlan:
49
+ """ Plan the searches to perform based on clarifications """
50
+ print("Planning searches...")
51
+
52
+ # Combine clarifying Q&A into structured prompt
53
+ clarifying_context = "\n".join(
54
+ f"Q: {q}\nA: {a}" for q, a in zip(questions, answers)
55
+ )
56
+ final_prompt = f"Query: {query}\nClarifications:\n{clarifying_context}"
57
+
58
+ result = await Runner.run(
59
+ planner_agent,
60
+ input=final_prompt,
61
+ )
62
+ print(f"Will perform {len(result.final_output.searches)} searches")
63
+ return result.final_output_as(WebSearchPlan)
64
+
65
+ async def perform_searches(self, search_plan: WebSearchPlan) -> list[str]:
66
+ """ Perform the searches for the planned queries """
67
+ print("Searching...")
68
+ num_completed = 0
69
+ tasks = [asyncio.create_task(self.search(item)) for item in search_plan.searches]
70
+ results = []
71
+ for task in asyncio.as_completed(tasks):
72
+ result = await task
73
+ if result is not None:
74
+ results.append(result)
75
+ num_completed += 1
76
+ print(f"Searching... {num_completed}/{len(tasks)} completed")
77
+ print("Finished searching")
78
+ return results
79
+
80
+ async def search(self, item: WebSearchItem) -> Optional[str]:
81
+ """ Perform a single web search """
82
+ input_text = f"Search term: {item.query}\nReason for searching: {item.reason}"
83
+ try:
84
+ result = await Runner.run(
85
+ search_agent,
86
+ input_text,
87
+ )
88
+ return str(result.final_output)
89
+ except Exception as e:
90
+ print(f"Search failed: {e}")
91
+ return None
92
+
93
+ async def write_report(self, query: str, search_results: list[str]) -> ReportData:
94
+ """ Write a markdown report from search results """
95
+ print("Thinking about report...")
96
+ input_text = f"Original query: {query}\nSummarized search results: {search_results}"
97
+ result = await Runner.run(
98
+ writer_agent,
99
+ input_text,
100
+ )
101
+ print("Finished writing report")
102
+ return result.final_output_as(ReportData)
103
+
104
+ async def send_email(self, report: ReportData, recipient_email: str) -> None:
105
+ """ Send the report via email """
106
+
107
+ email_prompt = f"""Send the following report as an email.
108
+ To: {recipient_email}
109
+ Body (HTML):
110
+ {report.markdown_report}
111
+ """
112
+ print(f"Sending email to: {recipient_email}")
113
+ await Runner.run(email_agent, input=email_prompt)
114
+ print("✅ Email sent")
search_agent.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agents import Agent, WebSearchTool, ModelSettings
2
+
3
+ INSTRUCTIONS = (
4
+ "You are a research assistant. Given a search term, you search the web for that term and "
5
+ "produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 "
6
+ "words. Capture the main points. Write succintly, no need to have complete sentences or good "
7
+ "grammar. This will be consumed by someone synthesizing a report, so its vital you capture the "
8
+ "essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
9
+ )
10
+
11
+ search_agent = Agent(
12
+ name="Search agent",
13
+ instructions=INSTRUCTIONS,
14
+ tools=[WebSearchTool(search_context_size="low")],
15
+ model="gpt-4o-mini",
16
+ model_settings=ModelSettings(tool_choice="required"),
17
+ )
writer_agent.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from agents import Agent
3
+
4
+ INSTRUCTIONS = (
5
+ "You are a senior researcher tasked with writing a cohesive report for a research query. "
6
+ "You will be provided with the original query, and some initial research done by a research assistant.\n"
7
+ "You should first come up with an outline for the report that describes the structure and "
8
+ "flow of the report. Then, generate the report and return that as your final output.\n"
9
+ "The final output should be in markdown format, and it should be lengthy and detailed. Aim "
10
+ "for 5-10 pages of content, at least 1000 words."
11
+ )
12
+
13
+
14
+ class ReportData(BaseModel):
15
+ short_summary: str
16
+ """A short 2-3 sentence summary of the findings."""
17
+
18
+ markdown_report: str
19
+ """The final report"""
20
+
21
+ follow_up_questions: list[str]
22
+ """Suggested topics to research further"""
23
+
24
+
25
+ writer_agent = Agent(
26
+ name="WriterAgent",
27
+ instructions=INSTRUCTIONS,
28
+ model="gpt-4o-mini",
29
+ output_type=ReportData,
30
+ )