Spaces:
Sleeping
Sleeping
#!/usr/bin/env python3 | |
""" | |
Query Parser with Intent Classification and Name-to-Email Resolution | |
""" | |
import json | |
import os | |
from datetime import datetime, timedelta | |
from openai import OpenAI | |
from typing import Dict, Optional, Tuple | |
from dotenv import load_dotenv # <-- Add this | |
# Load environment variables from .env file | |
load_dotenv() # <-- Add this | |
# Initialize OpenAI client | |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
# File paths | |
NAME_MAPPING_FILE = "name_mapping.json" | |
EMAIL_DB_FILE = "email_db.json" | |
def _llm(messages, model="gpt-4o-mini", temperature=0): | |
"""Helper function to call OpenAI API""" | |
rsp = client.chat.completions.create( | |
model=model, | |
temperature=temperature, | |
messages=messages, | |
) | |
return rsp.choices[0].message.content.strip() | |
def _load_name_mapping() -> Dict[str, str]: | |
"""Load name to email mapping from JSON file""" | |
if not os.path.exists(NAME_MAPPING_FILE): | |
return {} | |
try: | |
with open(NAME_MAPPING_FILE, "r") as f: | |
return json.load(f) | |
except (json.JSONDecodeError, IOError): | |
return {} | |
def _save_name_mapping(mapping: Dict[str, str]): | |
"""Save name to email mapping to JSON file""" | |
with open(NAME_MAPPING_FILE, "w") as f: | |
json.dump(mapping, f, indent=2) | |
def _load_email_db() -> Dict: | |
"""Load email database""" | |
if not os.path.exists(EMAIL_DB_FILE): | |
return {} | |
try: | |
with open(EMAIL_DB_FILE, "r") as f: | |
return json.load(f) | |
except (json.JSONDecodeError, IOError): | |
return {} | |
def _save_email_db(db: Dict): | |
"""Save email database""" | |
with open(EMAIL_DB_FILE, "w") as f: | |
json.dump(db, f, indent=2) | |
def extract_query_info(query: str) -> Dict: | |
""" | |
Extract intent and date range from user query using LLM | |
""" | |
today_str = datetime.today().strftime("%d-%b-%Y") | |
system_prompt = f""" | |
You are an email query parser. Today is {today_str}. | |
Given a user query, extract: | |
1. sender_intent: The person/entity they want emails from (could be name or email) | |
2. start_date and end_date: Date range in DD-MMM-YYYY format | |
For relative dates: | |
- "last week" = 7 days ago to today | |
- "yesterday" = yesterday only | |
- "last month" = 30 days ago to today | |
- "last 3 days" = 3 days ago to today | |
Examples: | |
- "emails from dev agarwal last week" → sender_intent: "dev agarwal" | |
- "show amazon emails from last month" → sender_intent: "amazon" | |
- "emails from [email protected] yesterday" → sender_intent: "[email protected]" | |
Return ONLY valid JSON: | |
{{ | |
"sender_intent": "extracted name or email", | |
"start_date": "DD-MMM-YYYY", | |
"end_date": "DD-MMM-YYYY" | |
}} | |
""" | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": query} | |
] | |
result = _llm(messages) | |
return json.loads(result) | |
def resolve_sender_email(sender_intent: str) -> Tuple[Optional[str], bool]: | |
""" | |
Resolve sender intent to actual email address | |
Returns: (email_address, needs_user_input) | |
""" | |
# Check if it's already an email address | |
if "@" in sender_intent: | |
return sender_intent.lower(), False | |
# Load name mapping | |
name_mapping = _load_name_mapping() | |
# Normalize the intent (lowercase for comparison) | |
normalized_intent = sender_intent.lower().strip() | |
# Check direct match | |
if normalized_intent in name_mapping: | |
return name_mapping[normalized_intent], False | |
# Check partial matches (fuzzy matching) | |
for name, email in name_mapping.items(): | |
if normalized_intent in name.lower() or name.lower() in normalized_intent: | |
return email, False | |
# No match found | |
return None, True | |
def store_name_email_mapping(name: str, email: str): | |
"""Store new name to email mapping""" | |
name_mapping = _load_name_mapping() | |
name_mapping[name.lower().strip()] = email.lower().strip() | |
_save_name_mapping(name_mapping) | |
def parse_email_query(query: str) -> Dict: | |
""" | |
Main function to parse email query | |
Returns structured response with next steps | |
""" | |
try: | |
# Step 1: Extract intent and dates | |
query_info = extract_query_info(query) | |
sender_intent = query_info["sender_intent"] | |
start_date = query_info["start_date"] | |
end_date = query_info["end_date"] | |
# Step 2: Resolve sender email | |
email_address, needs_input = resolve_sender_email(sender_intent) | |
if needs_input: | |
# Need to ask user for email address | |
return { | |
"status": "need_email_input", | |
"sender_intent": sender_intent, | |
"start_date": start_date, | |
"end_date": end_date, | |
"message": f"I don't have an email address for '{sender_intent}'. Please provide the email address." | |
} | |
else: | |
# Ready to proceed with email scraping | |
return { | |
"status": "ready_to_scrape", | |
"sender_intent": sender_intent, | |
"resolved_email": email_address, | |
"start_date": start_date, | |
"end_date": end_date, | |
"message": f"Found email: {email_address} for '{sender_intent}'" | |
} | |
except Exception as e: | |
return { | |
"status": "error", | |
"error": str(e), | |
"message": "Failed to parse query" | |
} | |
# Test the parser | |
if __name__ == "__main__": | |
# Test cases | |
test_queries = [ | |
"Show me emails from dev agarwal last week", | |
"emails from amazon in the last month", | |
"get [email protected] emails yesterday", | |
"emails from new person last 3 days" | |
] | |
for query in test_queries: | |
print(f"\nQuery: {query}") | |
result = parse_email_query(query) | |
print(f"Result: {json.dumps(result, indent=2)}") |