|
|
|
""" |
|
Query Parser with Intent Classification and Name-to-Email Resolution |
|
""" |
|
|
|
import json |
|
import os |
|
from datetime import datetime, timedelta |
|
from openai import OpenAI |
|
from typing import Dict, Optional, Tuple |
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
|
NAME_MAPPING_FILE = "name_mapping.json" |
|
EMAIL_DB_FILE = "email_db.json" |
|
|
|
def _llm(messages, model="gpt-4o-mini", temperature=0): |
|
"""Helper function to call OpenAI API""" |
|
rsp = client.chat.completions.create( |
|
model=model, |
|
temperature=temperature, |
|
messages=messages, |
|
) |
|
return rsp.choices[0].message.content.strip() |
|
|
|
def _load_name_mapping() -> Dict[str, str]: |
|
"""Load name to email mapping from JSON file""" |
|
if not os.path.exists(NAME_MAPPING_FILE): |
|
return {} |
|
try: |
|
with open(NAME_MAPPING_FILE, "r") as f: |
|
return json.load(f) |
|
except (json.JSONDecodeError, IOError): |
|
return {} |
|
|
|
def _save_name_mapping(mapping: Dict[str, str]): |
|
"""Save name to email mapping to JSON file""" |
|
with open(NAME_MAPPING_FILE, "w") as f: |
|
json.dump(mapping, f, indent=2) |
|
|
|
def _load_email_db() -> Dict: |
|
"""Load email database""" |
|
if not os.path.exists(EMAIL_DB_FILE): |
|
return {} |
|
try: |
|
with open(EMAIL_DB_FILE, "r") as f: |
|
return json.load(f) |
|
except (json.JSONDecodeError, IOError): |
|
return {} |
|
|
|
def _save_email_db(db: Dict): |
|
"""Save email database""" |
|
with open(EMAIL_DB_FILE, "w") as f: |
|
json.dump(db, f, indent=2) |
|
|
|
def extract_query_info(query: str) -> Dict: |
|
""" |
|
Extract intent and date range from user query using LLM |
|
""" |
|
today_str = datetime.today().strftime("%d-%b-%Y") |
|
|
|
system_prompt = f""" |
|
You are an email query parser. Today is {today_str}. |
|
|
|
Given a user query, extract: |
|
1. sender_intent: The person/entity they want emails from (could be name or email) |
|
2. start_date and end_date: Date range in DD-MMM-YYYY format |
|
|
|
For relative dates: |
|
- "last week" = 7 days ago to today |
|
- "yesterday" = yesterday only |
|
- "last month" = 30 days ago to today |
|
- "last 3 days" = 3 days ago to today |
|
|
|
Examples: |
|
- "emails from dev agarwal last week" β sender_intent: "dev agarwal" |
|
- "show amazon emails from last month" β sender_intent: "amazon" |
|
- "emails from [email protected] yesterday" β sender_intent: "[email protected]" |
|
|
|
Return ONLY valid JSON: |
|
{{ |
|
"sender_intent": "extracted name or email", |
|
"start_date": "DD-MMM-YYYY", |
|
"end_date": "DD-MMM-YYYY" |
|
}} |
|
""" |
|
|
|
messages = [ |
|
{"role": "system", "content": system_prompt}, |
|
{"role": "user", "content": query} |
|
] |
|
|
|
result = _llm(messages) |
|
return json.loads(result) |
|
|
|
def resolve_sender_email(sender_intent: str) -> Tuple[Optional[str], bool]: |
|
""" |
|
Resolve sender intent to actual email address |
|
Returns: (email_address, needs_user_input) |
|
""" |
|
|
|
if "@" in sender_intent: |
|
return sender_intent.lower(), False |
|
|
|
|
|
name_mapping = _load_name_mapping() |
|
|
|
|
|
normalized_intent = sender_intent.lower().strip() |
|
|
|
|
|
if normalized_intent in name_mapping: |
|
return name_mapping[normalized_intent], False |
|
|
|
|
|
for name, email in name_mapping.items(): |
|
if normalized_intent in name.lower() or name.lower() in normalized_intent: |
|
return email, False |
|
|
|
|
|
return None, True |
|
|
|
def store_name_email_mapping(name: str, email: str): |
|
"""Store new name to email mapping""" |
|
name_mapping = _load_name_mapping() |
|
name_mapping[name.lower().strip()] = email.lower().strip() |
|
_save_name_mapping(name_mapping) |
|
|
|
def parse_email_query(query: str) -> Dict: |
|
""" |
|
Main function to parse email query |
|
Returns structured response with next steps |
|
""" |
|
try: |
|
|
|
query_info = extract_query_info(query) |
|
sender_intent = query_info["sender_intent"] |
|
start_date = query_info["start_date"] |
|
end_date = query_info["end_date"] |
|
|
|
|
|
email_address, needs_input = resolve_sender_email(sender_intent) |
|
|
|
if needs_input: |
|
|
|
return { |
|
"status": "need_email_input", |
|
"sender_intent": sender_intent, |
|
"start_date": start_date, |
|
"end_date": end_date, |
|
"message": f"I don't have an email address for '{sender_intent}'. Please provide the email address." |
|
} |
|
else: |
|
|
|
return { |
|
"status": "ready_to_scrape", |
|
"sender_intent": sender_intent, |
|
"resolved_email": email_address, |
|
"start_date": start_date, |
|
"end_date": end_date, |
|
"message": f"Found email: {email_address} for '{sender_intent}'" |
|
} |
|
|
|
except Exception as e: |
|
return { |
|
"status": "error", |
|
"error": str(e), |
|
"message": "Failed to parse query" |
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
test_queries = [ |
|
"Show me emails from dev agarwal last week", |
|
"emails from amazon in the last month", |
|
"get [email protected] emails yesterday", |
|
"emails from new person last 3 days" |
|
] |
|
|
|
for query in test_queries: |
|
print(f"\nQuery: {query}") |
|
result = parse_email_query(query) |
|
print(f"Result: {json.dumps(result, indent=2)}") |