#!/usr/bin/env python3 """ Query Parser with Intent Classification and Name-to-Email Resolution """ import json import os from datetime import datetime, timedelta from openai import OpenAI from typing import Dict, Optional, Tuple from dotenv import load_dotenv # <-- Add this # Load environment variables from .env file load_dotenv() # <-- Add this # Initialize OpenAI client client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) # File paths NAME_MAPPING_FILE = "name_mapping.json" EMAIL_DB_FILE = "email_db.json" def _llm(messages, model="gpt-4o-mini", temperature=0): """Helper function to call OpenAI API""" rsp = client.chat.completions.create( model=model, temperature=temperature, messages=messages, ) return rsp.choices[0].message.content.strip() def _load_name_mapping() -> Dict[str, str]: """Load name to email mapping from JSON file""" if not os.path.exists(NAME_MAPPING_FILE): return {} try: with open(NAME_MAPPING_FILE, "r") as f: return json.load(f) except (json.JSONDecodeError, IOError): return {} def _save_name_mapping(mapping: Dict[str, str]): """Save name to email mapping to JSON file""" with open(NAME_MAPPING_FILE, "w") as f: json.dump(mapping, f, indent=2) def _load_email_db() -> Dict: """Load email database""" if not os.path.exists(EMAIL_DB_FILE): return {} try: with open(EMAIL_DB_FILE, "r") as f: return json.load(f) except (json.JSONDecodeError, IOError): return {} def _save_email_db(db: Dict): """Save email database""" with open(EMAIL_DB_FILE, "w") as f: json.dump(db, f, indent=2) def extract_query_info(query: str) -> Dict: """ Extract intent and date range from user query using LLM """ today_str = datetime.today().strftime("%d-%b-%Y") system_prompt = f""" You are an email query parser. Today is {today_str}. Given a user query, extract: 1. sender_intent: The person/entity they want emails from (could be name or email) 2. start_date and end_date: Date range in DD-MMM-YYYY format For relative dates: - "last week" = 7 days ago to today - "yesterday" = yesterday only - "last month" = 30 days ago to today - "last 3 days" = 3 days ago to today Examples: - "emails from dev agarwal last week" → sender_intent: "dev agarwal" - "show amazon emails from last month" → sender_intent: "amazon" - "emails from john@company.com yesterday" → sender_intent: "john@company.com" Return ONLY valid JSON: {{ "sender_intent": "extracted name or email", "start_date": "DD-MMM-YYYY", "end_date": "DD-MMM-YYYY" }} """ messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": query} ] result = _llm(messages) return json.loads(result) def resolve_sender_email(sender_intent: str) -> Tuple[Optional[str], bool]: """ Resolve sender intent to actual email address Returns: (email_address, needs_user_input) """ # Check if it's already an email address if "@" in sender_intent: return sender_intent.lower(), False # Load name mapping name_mapping = _load_name_mapping() # Normalize the intent (lowercase for comparison) normalized_intent = sender_intent.lower().strip() # Check direct match if normalized_intent in name_mapping: return name_mapping[normalized_intent], False # Check partial matches (fuzzy matching) for name, email in name_mapping.items(): if normalized_intent in name.lower() or name.lower() in normalized_intent: return email, False # No match found return None, True def store_name_email_mapping(name: str, email: str): """Store new name to email mapping""" name_mapping = _load_name_mapping() name_mapping[name.lower().strip()] = email.lower().strip() _save_name_mapping(name_mapping) def parse_email_query(query: str) -> Dict: """ Main function to parse email query Returns structured response with next steps """ try: # Step 1: Extract intent and dates query_info = extract_query_info(query) sender_intent = query_info["sender_intent"] start_date = query_info["start_date"] end_date = query_info["end_date"] # Step 2: Resolve sender email email_address, needs_input = resolve_sender_email(sender_intent) if needs_input: # Need to ask user for email address return { "status": "need_email_input", "sender_intent": sender_intent, "start_date": start_date, "end_date": end_date, "message": f"I don't have an email address for '{sender_intent}'. Please provide the email address." } else: # Ready to proceed with email scraping return { "status": "ready_to_scrape", "sender_intent": sender_intent, "resolved_email": email_address, "start_date": start_date, "end_date": end_date, "message": f"Found email: {email_address} for '{sender_intent}'" } except Exception as e: return { "status": "error", "error": str(e), "message": "Failed to parse query" } # Test the parser if __name__ == "__main__": # Test cases test_queries = [ "Show me emails from dev agarwal last week", "emails from amazon in the last month", "get john@company.com emails yesterday", "emails from new person last 3 days" ] for query in test_queries: print(f"\nQuery: {query}") result = parse_email_query(query) print(f"Result: {json.dumps(result, indent=2)}")