Spaces:
Sleeping
Sleeping
from schemas import ( | |
FetchEmailsParams, | |
ShowEmailParams, | |
AnalyzeEmailsParams, | |
DraftReplyParams, | |
SendReplyParams, | |
) | |
from typing import Any, Dict | |
from email_scraper import scrape_emails_from_sender, scrape_emails_by_text_search, _load_email_db, _save_email_db, _is_date_in_range | |
from datetime import datetime, timedelta | |
from typing import List | |
from openai import OpenAI | |
import json | |
from dotenv import load_dotenv | |
import os | |
# Load environment variables from .env file | |
load_dotenv() | |
# Initialize OpenAI client | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
client = OpenAI(api_key=OPENAI_API_KEY) | |
def extract_query_info(query: str) -> Dict[str, str]: | |
""" | |
Use an LLM to extract sender information and date range from a user query. | |
Returns {"sender_keyword": "company/sender name", "start_date":"DD-MMM-YYYY","end_date":"DD-MMM-YYYY"}. | |
""" | |
today_str = datetime.today().strftime("%d-%b-%Y") | |
five_days_ago = (datetime.today() - timedelta(days=5)).strftime("%d-%b-%Y") | |
system_prompt = f""" | |
You are a query parser for email search. Today is {today_str}. | |
Given a user query, extract the sender/company keyword and date range. Return _only_ valid JSON with: | |
{{ | |
"sender_keyword": "keyword or company name to search for", | |
"start_date": "DD-MMM-YYYY", | |
"end_date": "DD-MMM-YYYY" | |
}} | |
Rules: | |
1. Extract sender keywords from phrases like "from swiggy", "swiggy emails", "mails from amazon", etc. | |
2. If no time is mentioned, use last 5 days: {five_days_ago} to {today_str} | |
3. Interpret relative dates as: | |
- "today" → {today_str} to {today_str} | |
- "yesterday" → 1 day ago to 1 day ago | |
- "last week" → 7 days ago to {today_str} | |
- "last month" → 30 days ago to {today_str} | |
- "last N days" → N days ago to {today_str} | |
Examples: | |
- "show me mails for last week from swiggy" | |
→ {{"sender_keyword": "swiggy", "start_date": "01-Jun-2025", "end_date": "{today_str}"}} | |
- "emails from amazon yesterday" | |
→ {{"sender_keyword": "amazon", "start_date": "06-Jun-2025", "end_date": "06-Jun-2025"}} | |
- "show flipkart emails" | |
→ {{"sender_keyword": "flipkart", "start_date": "{five_days_ago}", "end_date": "{today_str}"}} | |
Return _only_ the JSON object—no extra text. | |
""" | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": query} | |
] | |
resp = client.chat.completions.create( | |
model="gpt-4o-mini", | |
temperature=0.0, | |
messages=messages | |
) | |
content = resp.choices[0].message.content.strip() | |
# Try direct parse; if the model added fluff, strip to the JSON block. | |
try: | |
return json.loads(content) | |
except json.JSONDecodeError: | |
start = content.find("{") | |
end = content.rfind("}") + 1 | |
return json.loads(content[start:end]) | |
def fetch_emails(query: str) -> Dict: | |
""" | |
Fetch emails based on a natural language query that contains sender information and date range. | |
Now uses text-based search and returns only summary information, not full content. | |
Args: | |
query: The natural language query (e.g., "show me mails for last week from swiggy") | |
Returns: | |
Dict with query_info, email_summary, analysis, and email_count | |
""" | |
# Extract sender keyword and date range from query | |
query_info = extract_query_info(query) | |
sender_keyword = query_info.get("sender_keyword", "") | |
start_date = query_info.get("start_date") | |
end_date = query_info.get("end_date") | |
print(f"Searching for emails with keyword '{sender_keyword}' between {start_date} and {end_date}") | |
# Use the new text-based search function | |
full_emails = scrape_emails_by_text_search(sender_keyword, start_date, end_date) | |
if not full_emails: | |
return { | |
"query_info": query_info, | |
"email_summary": [], | |
"analysis": {"summary": f"No emails found for '{sender_keyword}' in the specified date range.", "insights": []}, | |
"email_count": 0 | |
} | |
# Create summary version without full content | |
email_summary = [] | |
for email in full_emails: | |
summary_email = { | |
"date": email.get("date"), | |
"time": email.get("time"), | |
"subject": email.get("subject"), | |
"from": email.get("from", "Unknown Sender"), | |
"message_id": email.get("message_id") | |
# Note: Removed 'content' to keep response clean | |
} | |
email_summary.append(summary_email) | |
# Auto-analyze the emails for insights | |
analysis = analyze_emails(full_emails) # Use full emails for analysis but don't return them | |
# Return summary info with analysis | |
return { | |
"query_info": query_info, | |
"email_summary": email_summary, | |
"analysis": analysis, | |
"email_count": len(full_emails) | |
} | |
def show_email(message_id: str) -> Dict: | |
""" | |
Retrieve the full email record (date, time, subject, content, etc.) | |
from the local cache by message_id. | |
""" | |
db = _load_email_db() # returns { sender_email: { "emails": [...], "last_scraped": ... }, ... } | |
# Search each sender's email list | |
for sender_data in db.values(): | |
for email in sender_data.get("emails", []): | |
if email.get("message_id") == message_id: | |
return email | |
# If we didn't find it, raise or return an error structure | |
raise ValueError(f"No email found with message_id '{message_id}'") | |
def draft_reply(email: Dict, tone: str) -> str: | |
# call LLM to generate reply | |
# return a dummy reply for now | |
print(f"Drafting reply for email {email['id']} with tone: {tone}") | |
return f"Drafted reply for email {email['id']} with tone {tone}." | |
... | |
def send_reply(message_id: str, reply_body: str) -> Dict: | |
# SMTP / Gmail API send | |
print(f"Sending reply to message {message_id} with body: {reply_body}") | |
... | |
def analyze_emails(emails: List[Dict]) -> Dict: | |
""" | |
Summarize and extract insights from a list of emails. | |
Returns a dict with this schema: | |
{ | |
"summary": str, # a concise overview of all emails | |
"insights": [str, ...] # list of key observations or stats | |
} | |
""" | |
if not emails: | |
return {"summary": "No emails to analyze.", "insights": []} | |
# 1) Create a simplified email summary for analysis (without full content) | |
simplified_emails = [] | |
for email in emails: | |
simplified_email = { | |
"date": email.get("date"), | |
"time": email.get("time"), | |
"subject": email.get("subject"), | |
"from": email.get("from", "Unknown Sender"), | |
"content_preview": email.get("content", "")[:200] + "..." if email.get("content") else "" | |
} | |
simplified_emails.append(simplified_email) | |
emails_payload = json.dumps(simplified_emails, ensure_ascii=False) | |
# 2) Build the LLM prompt | |
system_prompt = """ | |
You are an expert email analyst. You will be given a JSON array of email objects, | |
each with keys: date, time, subject, from, content_preview. | |
Your job is to produce _only_ valid JSON with two fields: | |
1. summary: a 1–2 sentence high-level overview of these emails. | |
2. insights: a list of 3–5 bullet-style observations or statistics | |
(e.g. "5 emails from Swiggy", "mostly promotional content", "received over 3 days"). | |
Focus on metadata like senders, subjects, dates, and patterns rather than detailed content analysis. | |
Output exactly: | |
{ | |
"summary": "...", | |
"insights": ["...", "...", ...] | |
} | |
""" | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": f"Here are the emails:\n{emails_payload}"} | |
] | |
# 3) Call the LLM | |
response = client.chat.completions.create( | |
model="gpt-4o-mini", | |
temperature=0.0, | |
messages=messages | |
) | |
# 4) Parse and return | |
content = response.choices[0].message.content.strip() | |
try: | |
return json.loads(content) | |
except json.JSONDecodeError: | |
# In case the model outputs extra text, extract the JSON block | |
start = content.find('{') | |
end = content.rfind('}') + 1 | |
return json.loads(content[start:end]) | |
TOOL_MAPPING = { | |
"fetch_emails": fetch_emails, | |
"show_email": show_email, | |
"analyze_emails": analyze_emails, | |
"draft_reply": draft_reply, | |
"send_reply": send_reply, | |
} |