Spaces:

Agents-MCP-Hackathon
/

MailQuery

Running

App Files Files Community

MailQuery / agentic_implementation /tools.py

Da-123

scrape fix (#5)

f61da97 verified 6 days ago

raw

history blame

8.4 kB

	from schemas import (
	FetchEmailsParams,
	ShowEmailParams,
	AnalyzeEmailsParams,
	DraftReplyParams,
	SendReplyParams,
	)
	from typing import Any, Dict
	from email_scraper import scrape_emails_from_sender, scrape_emails_by_text_search, _load_email_db, _save_email_db, _is_date_in_range
	from datetime import datetime, timedelta
	from typing import List
	from openai import OpenAI
	import json
	from dotenv import load_dotenv
	import os

	# Load environment variables from .env file
	load_dotenv()

	# Initialize OpenAI client
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	client = OpenAI(api_key=OPENAI_API_KEY)


	def extract_query_info(query: str) -> Dict[str, str]:
	"""
	Use an LLM to extract sender information and date range from a user query.
	Returns {"sender_keyword": "company/sender name", "start_date":"DD-MMM-YYYY","end_date":"DD-MMM-YYYY"}.
	"""
	today_str = datetime.today().strftime("%d-%b-%Y")
	five_days_ago = (datetime.today() - timedelta(days=5)).strftime("%d-%b-%Y")

	system_prompt = f"""
	You are a query parser for email search. Today is {today_str}.

	Given a user query, extract the sender/company keyword and date range. Return _only_ valid JSON with:
	{{
	"sender_keyword": "keyword or company name to search for",
	"start_date": "DD-MMM-YYYY",
	"end_date": "DD-MMM-YYYY"
	}}

	Rules:
	1. Extract sender keywords from phrases like "from swiggy", "swiggy emails", "mails from amazon", etc.
	2. If no time is mentioned, use last 5 days: {five_days_ago} to {today_str}
	3. Interpret relative dates as:
	- "today" → {today_str} to {today_str}
	- "yesterday" → 1 day ago to 1 day ago
	- "last week" → 7 days ago to {today_str}
	- "last month" → 30 days ago to {today_str}
	- "last N days" → N days ago to {today_str}

	Examples:
	- "show me mails for last week from swiggy"
	→ {{"sender_keyword": "swiggy", "start_date": "01-Jun-2025", "end_date": "{today_str}"}}
	- "emails from amazon yesterday"
	→ {{"sender_keyword": "amazon", "start_date": "06-Jun-2025", "end_date": "06-Jun-2025"}}
	- "show flipkart emails"
	→ {{"sender_keyword": "flipkart", "start_date": "{five_days_ago}", "end_date": "{today_str}"}}

	Return _only_ the JSON object—no extra text.
	"""

	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": query}
	]
	resp = client.chat.completions.create(
	model="gpt-4o-mini",
	temperature=0.0,
	messages=messages
	)
	content = resp.choices[0].message.content.strip()

	# Try direct parse; if the model added fluff, strip to the JSON block.
	try:
	return json.loads(content)
	except json.JSONDecodeError:
	start = content.find("{")
	end = content.rfind("}") + 1
	return json.loads(content[start:end])


	def fetch_emails(query: str) -> Dict:
	"""
	Fetch emails based on a natural language query that contains sender information and date range.
	Now uses text-based search and returns only summary information, not full content.

	Args:
	query: The natural language query (e.g., "show me mails for last week from swiggy")

	Returns:
	Dict with query_info, email_summary, analysis, and email_count
	"""
	# Extract sender keyword and date range from query
	query_info = extract_query_info(query)
	sender_keyword = query_info.get("sender_keyword", "")
	start_date = query_info.get("start_date")
	end_date = query_info.get("end_date")

	print(f"Searching for emails with keyword '{sender_keyword}' between {start_date} and {end_date}")

	# Use the new text-based search function
	full_emails = scrape_emails_by_text_search(sender_keyword, start_date, end_date)

	if not full_emails:
	return {
	"query_info": query_info,
	"email_summary": [],
	"analysis": {"summary": f"No emails found for '{sender_keyword}' in the specified date range.", "insights": []},
	"email_count": 0
	}

	# Create summary version without full content
	email_summary = []
	for email in full_emails:
	summary_email = {
	"date": email.get("date"),
	"time": email.get("time"),
	"subject": email.get("subject"),
	"from": email.get("from", "Unknown Sender"),
	"message_id": email.get("message_id")
	# Note: Removed 'content' to keep response clean
	}
	email_summary.append(summary_email)

	# Auto-analyze the emails for insights
	analysis = analyze_emails(full_emails) # Use full emails for analysis but don't return them

	# Return summary info with analysis
	return {
	"query_info": query_info,
	"email_summary": email_summary,
	"analysis": analysis,
	"email_count": len(full_emails)
	}


	def show_email(message_id: str) -> Dict:
	"""
	Retrieve the full email record (date, time, subject, content, etc.)
	from the local cache by message_id.
	"""
	db = _load_email_db() # returns { sender_email: { "emails": [...], "last_scraped": ... }, ... }

	# Search each sender's email list
	for sender_data in db.values():
	for email in sender_data.get("emails", []):
	if email.get("message_id") == message_id:
	return email

	# If we didn't find it, raise or return an error structure
	raise ValueError(f"No email found with message_id '{message_id}'")


	def draft_reply(email: Dict, tone: str) -> str:
	# call LLM to generate reply
	# return a dummy reply for now
	print(f"Drafting reply for email {email['id']} with tone: {tone}")
	return f"Drafted reply for email {email['id']} with tone {tone}."
	...


	def send_reply(message_id: str, reply_body: str) -> Dict:
	# SMTP / Gmail API send
	print(f"Sending reply to message {message_id} with body: {reply_body}")
	...


	def analyze_emails(emails: List[Dict]) -> Dict:
	"""
	Summarize and extract insights from a list of emails.
	Returns a dict with this schema:
	{
	"summary": str, # a concise overview of all emails
	"insights": [str, ...] # list of key observations or stats
	}
	"""
	if not emails:
	return {"summary": "No emails to analyze.", "insights": []}

	# 1) Create a simplified email summary for analysis (without full content)
	simplified_emails = []
	for email in emails:
	simplified_email = {
	"date": email.get("date"),
	"time": email.get("time"),
	"subject": email.get("subject"),
	"from": email.get("from", "Unknown Sender"),
	"content_preview": email.get("content", "")[:200] + "..." if email.get("content") else ""
	}
	simplified_emails.append(simplified_email)

	emails_payload = json.dumps(simplified_emails, ensure_ascii=False)

	# 2) Build the LLM prompt
	system_prompt = """
	You are an expert email analyst. You will be given a JSON array of email objects,
	each with keys: date, time, subject, from, content_preview.

	Your job is to produce _only_ valid JSON with two fields:
	1. summary: a 1–2 sentence high-level overview of these emails.
	2. insights: a list of 3–5 bullet-style observations or statistics
	(e.g. "5 emails from Swiggy", "mostly promotional content", "received over 3 days").

	Focus on metadata like senders, subjects, dates, and patterns rather than detailed content analysis.

	Output exactly:

	{
	"summary": "...",
	"insights": ["...", "...", ...]
	}
	"""
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": f"Here are the emails:\n{emails_payload}"}
	]

	# 3) Call the LLM
	response = client.chat.completions.create(
	model="gpt-4o-mini",
	temperature=0.0,
	messages=messages
	)

	# 4) Parse and return
	content = response.choices[0].message.content.strip()
	try:
	return json.loads(content)
	except json.JSONDecodeError:
	# In case the model outputs extra text, extract the JSON block
	start = content.find('{')
	end = content.rfind('}') + 1
	return json.loads(content[start:end])


	TOOL_MAPPING = {
	"fetch_emails": fetch_emails,
	"show_email": show_email,
	"analyze_emails": analyze_emails,
	"draft_reply": draft_reply,
	"send_reply": send_reply,
	}