MailQuery / server /query_parser.py
devangshrivastava
first commit
9b40609
raw
history blame
5.92 kB
#!/usr/bin/env python3
"""
Query Parser with Intent Classification and Name-to-Email Resolution
"""
import json
import os
from datetime import datetime, timedelta
from openai import OpenAI
from typing import Dict, Optional, Tuple
from dotenv import load_dotenv # <-- Add this
# Load environment variables from .env file
load_dotenv() # <-- Add this
# Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# File paths
NAME_MAPPING_FILE = "name_mapping.json"
EMAIL_DB_FILE = "email_db.json"
def _llm(messages, model="gpt-4o-mini", temperature=0):
"""Helper function to call OpenAI API"""
rsp = client.chat.completions.create(
model=model,
temperature=temperature,
messages=messages,
)
return rsp.choices[0].message.content.strip()
def _load_name_mapping() -> Dict[str, str]:
"""Load name to email mapping from JSON file"""
if not os.path.exists(NAME_MAPPING_FILE):
return {}
try:
with open(NAME_MAPPING_FILE, "r") as f:
return json.load(f)
except (json.JSONDecodeError, IOError):
return {}
def _save_name_mapping(mapping: Dict[str, str]):
"""Save name to email mapping to JSON file"""
with open(NAME_MAPPING_FILE, "w") as f:
json.dump(mapping, f, indent=2)
def _load_email_db() -> Dict:
"""Load email database"""
if not os.path.exists(EMAIL_DB_FILE):
return {}
try:
with open(EMAIL_DB_FILE, "r") as f:
return json.load(f)
except (json.JSONDecodeError, IOError):
return {}
def _save_email_db(db: Dict):
"""Save email database"""
with open(EMAIL_DB_FILE, "w") as f:
json.dump(db, f, indent=2)
def extract_query_info(query: str) -> Dict:
"""
Extract intent and date range from user query using LLM
"""
today_str = datetime.today().strftime("%d-%b-%Y")
system_prompt = f"""
You are an email query parser. Today is {today_str}.
Given a user query, extract:
1. sender_intent: The person/entity they want emails from (could be name or email)
2. start_date and end_date: Date range in DD-MMM-YYYY format
For relative dates:
- "last week" = 7 days ago to today
- "yesterday" = yesterday only
- "last month" = 30 days ago to today
- "last 3 days" = 3 days ago to today
Examples:
- "emails from dev agarwal last week" β†’ sender_intent: "dev agarwal"
- "show amazon emails from last month" β†’ sender_intent: "amazon"
- "emails from [email protected] yesterday" β†’ sender_intent: "[email protected]"
Return ONLY valid JSON:
{{
"sender_intent": "extracted name or email",
"start_date": "DD-MMM-YYYY",
"end_date": "DD-MMM-YYYY"
}}
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": query}
]
result = _llm(messages)
return json.loads(result)
def resolve_sender_email(sender_intent: str) -> Tuple[Optional[str], bool]:
"""
Resolve sender intent to actual email address
Returns: (email_address, needs_user_input)
"""
# Check if it's already an email address
if "@" in sender_intent:
return sender_intent.lower(), False
# Load name mapping
name_mapping = _load_name_mapping()
# Normalize the intent (lowercase for comparison)
normalized_intent = sender_intent.lower().strip()
# Check direct match
if normalized_intent in name_mapping:
return name_mapping[normalized_intent], False
# Check partial matches (fuzzy matching)
for name, email in name_mapping.items():
if normalized_intent in name.lower() or name.lower() in normalized_intent:
return email, False
# No match found
return None, True
def store_name_email_mapping(name: str, email: str):
"""Store new name to email mapping"""
name_mapping = _load_name_mapping()
name_mapping[name.lower().strip()] = email.lower().strip()
_save_name_mapping(name_mapping)
def parse_email_query(query: str) -> Dict:
"""
Main function to parse email query
Returns structured response with next steps
"""
try:
# Step 1: Extract intent and dates
query_info = extract_query_info(query)
sender_intent = query_info["sender_intent"]
start_date = query_info["start_date"]
end_date = query_info["end_date"]
# Step 2: Resolve sender email
email_address, needs_input = resolve_sender_email(sender_intent)
if needs_input:
# Need to ask user for email address
return {
"status": "need_email_input",
"sender_intent": sender_intent,
"start_date": start_date,
"end_date": end_date,
"message": f"I don't have an email address for '{sender_intent}'. Please provide the email address."
}
else:
# Ready to proceed with email scraping
return {
"status": "ready_to_scrape",
"sender_intent": sender_intent,
"resolved_email": email_address,
"start_date": start_date,
"end_date": end_date,
"message": f"Found email: {email_address} for '{sender_intent}'"
}
except Exception as e:
return {
"status": "error",
"error": str(e),
"message": "Failed to parse query"
}
# Test the parser
if __name__ == "__main__":
# Test cases
test_queries = [
"Show me emails from dev agarwal last week",
"emails from amazon in the last month",
"get [email protected] emails yesterday",
"emails from new person last 3 days"
]
for query in test_queries:
print(f"\nQuery: {query}")
result = parse_email_query(query)
print(f"Result: {json.dumps(result, indent=2)}")