Da-123 commited on
Commit
f61da97
·
verified ·
1 Parent(s): b0ee7e5
.gitignore CHANGED
@@ -1,5 +1,6 @@
1
  .env
2
  myenv/
 
3
 
4
  __pycache__/
5
  *.py[cod]
 
1
  .env
2
  myenv/
3
+ venv/
4
 
5
  __pycache__/
6
  *.py[cod]
agentic_implementation/email_scraper.py CHANGED
@@ -19,17 +19,102 @@ load_dotenv()
19
  # Email credentials
20
  APP_PASSWORD = os.getenv("APP_PASSWORD")
21
  EMAIL_ID = os.getenv("EMAIL_ID")
 
22
  EMAIL_DB_FILE = "email_db.json"
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def _imap_connect():
25
  """Connect to Gmail IMAP server"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  try:
 
27
  mail = imaplib.IMAP4_SSL("imap.gmail.com")
28
- mail.login(EMAIL_ID, APP_PASSWORD)
29
- mail.select('"[Gmail]/All Mail"')
 
 
 
 
 
 
 
 
 
30
  return mail
 
 
 
 
 
 
 
 
 
31
  except Exception as e:
32
- print(f"IMAP connection failed: {e}")
 
 
 
 
33
  raise
34
 
35
  def _email_to_clean_text(msg):
@@ -249,6 +334,123 @@ def scrape_emails_from_sender(sender_email: str, start_date: str, end_date: str)
249
  print(f"Email scraping failed: {e}")
250
  raise
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  # Test the scraper
253
  if __name__ == "__main__":
254
  # Test scraping
 
19
  # Email credentials
20
  APP_PASSWORD = os.getenv("APP_PASSWORD")
21
  EMAIL_ID = os.getenv("EMAIL_ID")
22
+ print("EMAIL_ID: ", EMAIL_ID)
23
  EMAIL_DB_FILE = "email_db.json"
24
 
25
+ def validate_email_setup():
26
+ """Validate email setup and credentials"""
27
+ print("=== Email Setup Validation ===")
28
+
29
+ # Check .env file existence
30
+ env_file_exists = os.path.exists('.env')
31
+ print(f".env file exists: {'✅ Yes' if env_file_exists else '❌ No'}")
32
+
33
+ if not env_file_exists:
34
+ print("❌ No .env file found! Create one with:")
35
+ print(" [email protected]")
36
+ print(" APP_PASSWORD=your_16_char_app_password")
37
+ print(" OPENAI_API_KEY=your_openai_key")
38
+ return False
39
+
40
+ # Check environment variables
41
+ issues = []
42
+
43
+ if not EMAIL_ID:
44
+ issues.append("EMAIL_ID not set or empty")
45
+ elif '@' not in EMAIL_ID:
46
+ issues.append("EMAIL_ID doesn't look like an email address")
47
+ elif not EMAIL_ID.endswith('@gmail.com'):
48
+ issues.append("EMAIL_ID should be a Gmail address (@gmail.com)")
49
+
50
+ if not APP_PASSWORD:
51
+ issues.append("APP_PASSWORD not set or empty")
52
+ elif len(APP_PASSWORD) != 16:
53
+ issues.append(f"APP_PASSWORD should be 16 characters, got {len(APP_PASSWORD)}")
54
+ elif ' ' in APP_PASSWORD:
55
+ issues.append("APP_PASSWORD should not contain spaces (remove spaces from app password)")
56
+
57
+ if not os.getenv("OPENAI_API_KEY"):
58
+ issues.append("OPENAI_API_KEY not set (needed for query processing)")
59
+
60
+ if issues:
61
+ print("❌ Issues found:")
62
+ for issue in issues:
63
+ print(f" - {issue}")
64
+ return False
65
+ else:
66
+ print("✅ All credentials look good!")
67
+ return True
68
+
69
  def _imap_connect():
70
  """Connect to Gmail IMAP server"""
71
+ print("=== IMAP Connection Debug ===")
72
+
73
+ # Check if environment variables are loaded
74
+ print(f"EMAIL_ID loaded: {'✅ Yes' if EMAIL_ID else '❌ No (None/Empty)'}")
75
+ print(f"APP_PASSWORD loaded: {'✅ Yes' if APP_PASSWORD else '❌ No (None/Empty)'}")
76
+
77
+ if EMAIL_ID:
78
+ print(f"Email ID: {EMAIL_ID[:5]}...@{EMAIL_ID.split('@')[1] if '@' in EMAIL_ID else 'INVALID'}")
79
+ if APP_PASSWORD:
80
+ print(f"App Password length: {len(APP_PASSWORD)} characters")
81
+ print(f"App Password format: {'✅ Looks correct (16 chars)' if len(APP_PASSWORD) == 16 else f'❌ Expected 16 chars, got {len(APP_PASSWORD)}'}")
82
+
83
+ if not EMAIL_ID or not APP_PASSWORD:
84
+ error_msg = "Missing credentials in environment variables!"
85
+ print(f"❌ {error_msg}")
86
+ raise Exception(error_msg)
87
+
88
  try:
89
+ print("🔄 Attempting IMAP SSL connection to imap.gmail.com:993...")
90
  mail = imaplib.IMAP4_SSL("imap.gmail.com")
91
+ print("✅ SSL connection established")
92
+
93
+ print("🔄 Attempting login...")
94
+ result = mail.login(EMAIL_ID, APP_PASSWORD)
95
+ print(f"✅ Login successful: {result}")
96
+
97
+ print("🔄 Selecting mailbox: [Gmail]/All Mail...")
98
+ result = mail.select('"[Gmail]/All Mail"')
99
+ print(f"✅ Mailbox selected: {result}")
100
+
101
+ print("=== IMAP Connection Successful ===")
102
  return mail
103
+
104
+ except imaplib.IMAP4.error as e:
105
+ print(f"❌ IMAP Error: {e}")
106
+ print("💡 Possible causes:")
107
+ print(" - App Password is incorrect or expired")
108
+ print(" - 2FA not enabled on Gmail account")
109
+ print(" - IMAP access not enabled in Gmail settings")
110
+ print(" - Gmail account locked or requires security verification")
111
+ raise
112
  except Exception as e:
113
+ print(f" Connection Error: {e}")
114
+ print("💡 Possible causes:")
115
+ print(" - Network connectivity issues")
116
+ print(" - Gmail IMAP server temporarily unavailable")
117
+ print(" - Firewall blocking IMAP port 993")
118
  raise
119
 
120
  def _email_to_clean_text(msg):
 
334
  print(f"Email scraping failed: {e}")
335
  raise
336
 
337
+ def scrape_emails_by_text_search(keyword: str, start_date: str, end_date: str) -> List[Dict]:
338
+ """
339
+ Scrape emails containing a specific keyword (like company name) within date range.
340
+ Uses IMAP text search to find emails from senders containing the keyword.
341
+ """
342
+ print(f"Searching emails containing '{keyword}' between {start_date} and {end_date}")
343
+
344
+ # Validate setup first
345
+ if not validate_email_setup():
346
+ raise Exception("Email setup validation failed. Please check your .env file and credentials.")
347
+
348
+ try:
349
+ mail = _imap_connect()
350
+
351
+ # Prepare IMAP search criteria with text search
352
+ start_imap = _date_to_imap_format(start_date)
353
+ # Add one day to end_date for BEFORE criteria (IMAP BEFORE is exclusive)
354
+ end_dt = datetime.strptime(end_date, "%d-%b-%Y") + timedelta(days=1)
355
+ end_imap = end_dt.strftime("%d-%b-%Y")
356
+
357
+ # Search for emails containing the keyword in FROM field or SUBJECT or BODY
358
+ # We'll search multiple criteria and combine results
359
+ search_criteria_list = [
360
+ f'FROM "{keyword}" SINCE "{start_imap}" BEFORE "{end_imap}"',
361
+ f'SUBJECT "{keyword}" SINCE "{start_imap}" BEFORE "{end_imap}"',
362
+ f'BODY "{keyword}" SINCE "{start_imap}" BEFORE "{end_imap}"'
363
+ ]
364
+
365
+ all_email_ids = set()
366
+
367
+ # Search with multiple criteria to catch emails containing the keyword
368
+ for search_criteria in search_criteria_list:
369
+ try:
370
+ print(f"IMAP search: {search_criteria}")
371
+ status, data = mail.search(None, search_criteria)
372
+ if status == 'OK' and data[0]:
373
+ email_ids = data[0].split()
374
+ all_email_ids.update(email_ids)
375
+ print(f"Found {len(email_ids)} emails with this criteria")
376
+ except Exception as e:
377
+ print(f"Search criteria failed: {search_criteria}, error: {e}")
378
+ continue
379
+
380
+ print(f"Total unique emails found: {len(all_email_ids)}")
381
+ scraped_emails = []
382
+
383
+ # Process each email
384
+ for i, email_id in enumerate(all_email_ids):
385
+ try:
386
+ print(f"Processing email {i+1}/{len(all_email_ids)}")
387
+
388
+ # Fetch email
389
+ status, msg_data = mail.fetch(email_id, "(RFC822)")
390
+ if status != 'OK':
391
+ continue
392
+
393
+ # Parse email
394
+ msg = message_from_bytes(msg_data[0][1])
395
+
396
+ # Extract information
397
+ subject = msg.get("Subject", "No Subject")
398
+ from_header = msg.get("From", "Unknown Sender")
399
+ content = _email_to_clean_text(msg)
400
+
401
+ # Check if the keyword is actually present (case-insensitive)
402
+ keyword_lower = keyword.lower()
403
+ if not any(keyword_lower in text.lower() for text in [subject, from_header, content]):
404
+ continue
405
+
406
+ # Parse date
407
+ date_header = msg.get("Date", "")
408
+ if date_header:
409
+ try:
410
+ dt_obj = parsedate_to_datetime(date_header)
411
+ # Convert to IST
412
+ ist_dt = dt_obj.astimezone(ZoneInfo("Asia/Kolkata"))
413
+ email_date = ist_dt.strftime("%d-%b-%Y")
414
+ email_time = ist_dt.strftime("%H:%M:%S")
415
+ except:
416
+ email_date = datetime.today().strftime("%d-%b-%Y")
417
+ email_time = "00:00:00"
418
+ else:
419
+ email_date = datetime.today().strftime("%d-%b-%Y")
420
+ email_time = "00:00:00"
421
+
422
+ # Double-check date range
423
+ if not _is_date_in_range(email_date, start_date, end_date):
424
+ continue
425
+
426
+ # Get message ID for deduplication
427
+ message_id = msg.get("Message-ID", f"missing-{email_id.decode()}")
428
+
429
+ scraped_emails.append({
430
+ "date": email_date,
431
+ "time": email_time,
432
+ "subject": subject,
433
+ "from": from_header,
434
+ "content": content[:2000], # Limit content length
435
+ "message_id": message_id
436
+ })
437
+
438
+ except Exception as e:
439
+ print(f"Error processing email {email_id}: {e}")
440
+ continue
441
+
442
+ mail.logout()
443
+
444
+ # Sort by date (newest first)
445
+ scraped_emails.sort(key=lambda x: datetime.strptime(f"{x['date']} {x['time']}", "%d-%b-%Y %H:%M:%S"), reverse=True)
446
+
447
+ print(f"Successfully processed {len(scraped_emails)} emails containing '{keyword}'")
448
+ return scraped_emails
449
+
450
+ except Exception as e:
451
+ print(f"Email text search failed: {e}")
452
+ raise
453
+
454
  # Test the scraper
455
  if __name__ == "__main__":
456
  # Test scraping
agentic_implementation/name_mapping.json CHANGED
@@ -1,3 +1,4 @@
1
  {
2
- "dev agarwal": "[email protected]"
 
3
  }
 
1
  {
2
+ "dev agarwal": "[email protected]",
3
+ "axis bank": "[email protected]"
4
  }
agentic_implementation/re_act.py CHANGED
@@ -26,7 +26,7 @@ NAME_MAPPING_FILE = "name_mapping.json"
26
  SYSTEM_PLAN_PROMPT = """
27
  You are an email assistant agent. You have access to the following actions:
28
 
29
- • fetch_emails - fetch emails based on sender and date criteria (includes date extraction)
30
  • show_email - display specific email content
31
  • analyze_emails - analyze email patterns or content
32
  • draft_reply - create a reply to an email
@@ -44,11 +44,11 @@ When the user gives you a query, output _only_ valid JSON of this form:
44
  }
45
 
46
  Rules:
47
- - Use "fetch_emails" when you need to retrieve emails (it automatically handles date extraction)
48
  - The final entry _must_ be "done"
49
  - If no tool is needed, return `{"plan":["done"]}`
50
 
51
- Example: For "show me emails from dev today" → ["fetch_emails", "done"]
52
  """
53
 
54
  SYSTEM_VALIDATOR_TEMPLATE = """
@@ -182,31 +182,15 @@ def think(
182
  ) -> Tuple[bool, Optional[PlanStep], Optional[str]]:
183
  """
184
  Fill in parameters or skip based on the action:
185
- - fetch_emails: extract sender and pass the raw query for date extraction
186
  - others: ask the LLM validator for params
187
 
188
  Returns: (should_execute, updated_step, user_prompt_if_needed)
189
  """
190
- # 1) fetch_emails → extract sender and pass query for internal date extraction
191
  if step.action == "fetch_emails":
192
- # Extract sender using LLM
193
- sender_info = extract_sender_info(user_query)
194
- sender_intent = sender_info.get("sender_intent", "")
195
-
196
- if not sender_intent:
197
- return False, None, None
198
-
199
- # Resolve sender to email address
200
- email_address, needs_input = resolve_sender_email(sender_intent)
201
-
202
- if needs_input:
203
- # Need user input for email address
204
- prompt_msg = f"I don't have an email address for '{sender_intent}'. Please provide the email address:"
205
- return False, None, prompt_msg
206
-
207
  params = FetchEmailsParams(
208
- email=email_address,
209
- query=user_query # Pass the full query for date extraction
210
  )
211
  return True, PlanStep(action="fetch_emails", parameters=params), None
212
 
 
26
  SYSTEM_PLAN_PROMPT = """
27
  You are an email assistant agent. You have access to the following actions:
28
 
29
+ • fetch_emails - fetch emails using text search with sender keywords and date extraction (e.g., "swiggy emails last week")
30
  • show_email - display specific email content
31
  • analyze_emails - analyze email patterns or content
32
  • draft_reply - create a reply to an email
 
44
  }
45
 
46
  Rules:
47
+ - Use "fetch_emails" for text-based email search (automatically extracts sender keywords and dates)
48
  - The final entry _must_ be "done"
49
  - If no tool is needed, return `{"plan":["done"]}`
50
 
51
+ Example: For "show me emails from swiggy today" → ["fetch_emails", "done"]
52
  """
53
 
54
  SYSTEM_VALIDATOR_TEMPLATE = """
 
182
  ) -> Tuple[bool, Optional[PlanStep], Optional[str]]:
183
  """
184
  Fill in parameters or skip based on the action:
185
+ - fetch_emails: pass the raw query for text-based search and date extraction
186
  - others: ask the LLM validator for params
187
 
188
  Returns: (should_execute, updated_step, user_prompt_if_needed)
189
  """
190
+ # 1) fetch_emails → pass the full query for text-based search and date extraction
191
  if step.action == "fetch_emails":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  params = FetchEmailsParams(
193
+ query=user_query # Pass the full query for keyword and date extraction
 
194
  )
195
  return True, PlanStep(action="fetch_emails", parameters=params), None
196
 
agentic_implementation/schemas.py CHANGED
@@ -6,8 +6,7 @@ from typing import List, Literal, Optional, Union
6
 
7
 
8
  class FetchEmailsParams(BaseModel):
9
- email: str
10
- query: str # Changed from start_date/end_date to query for internal date extraction
11
 
12
 
13
  class ShowEmailParams(BaseModel):
 
6
 
7
 
8
  class FetchEmailsParams(BaseModel):
9
+ query: str # Natural language query with sender and date info (e.g., "show me mails for last week from swiggy")
 
10
 
11
 
12
  class ShowEmailParams(BaseModel):
agentic_implementation/tools.py CHANGED
@@ -6,8 +6,8 @@ from schemas import (
6
  SendReplyParams,
7
  )
8
  from typing import Any, Dict
9
- from email_scraper import scrape_emails_from_sender, _load_email_db, _save_email_db, _is_date_in_range
10
- from datetime import datetime
11
  from typing import List
12
  from openai import OpenAI
13
  import json
@@ -22,40 +22,48 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
22
  client = OpenAI(api_key=OPENAI_API_KEY)
23
 
24
 
25
- def extract_date_range(query: str) -> Dict[str, str]:
26
  """
27
- Use an LLM to extract a date range from a user query.
28
- Returns {"start_date":"DD-MMM-YYYY","end_date":"DD-MMM-YYYY"}.
29
  """
30
  today_str = datetime.today().strftime("%d-%b-%Y")
 
 
31
  system_prompt = f"""
32
- You are a date‐range extractor. Today is {today_str}.
33
-
34
- Given a user query (in natural language), return _only_ valid JSON with:
35
- {{
36
- "start_date": "DD-MMM-YYYY",
37
- "end_date": "DD-MMM-YYYY"
38
- }}
39
-
40
- Interpret relative dates as:
41
- - "today" → {today_str} to {today_str}
42
- - "yesterday" 1 day ago to 1 day ago
43
- - "last week" → 7 days ago to {today_str}
44
- - "last month" → 30 days ago to {today_str}
45
- - "last N days" → N days ago to {today_str}
 
 
 
 
46
 
47
  Examples:
48
- - "emails from dev agarwal last week"
49
- → {{ "start_date": "01-Jun-2025", "end_date": "{today_str}" }}
50
- - "show me emails yesterday"
51
- → {{ "start_date": "06-Jun-2025", "end_date": "06-Jun-2025" }}
 
 
52
 
53
  Return _only_ the JSON object—no extra text.
54
  """
55
 
56
  messages = [
57
- {"role": "system", "content": system_prompt},
58
- {"role": "user", "content": query}
59
  ]
60
  resp = client.chat.completions.create(
61
  model="gpt-4o-mini",
@@ -73,31 +81,58 @@ Return _only_ the JSON object—no extra text.
73
  return json.loads(content[start:end])
74
 
75
 
76
- def fetch_emails(email: str, query: str) -> Dict:
77
  """
78
- Fetch emails from a sender within a date range extracted from the query.
79
- Now returns both date info and emails.
80
 
81
  Args:
82
- email: The sender's email address
83
- query: The original user query (for date extraction)
84
 
85
  Returns:
86
- Dict with date_info and emails
87
  """
88
- # Extract date range from query
89
- date_info = extract_date_range(query)
90
- start_date = date_info.get("start_date")
91
- end_date = date_info.get("end_date")
 
 
 
 
 
 
92
 
93
- # Fetch emails using the existing scraper
94
- emails = scrape_emails_from_sender(email, start_date, end_date)
 
 
 
 
 
95
 
96
- # Return both date info and emails
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  return {
98
- "date_info": date_info,
99
- "emails": emails,
100
- "email_count": len(emails)
 
101
  }
102
 
103
 
@@ -141,18 +176,34 @@ def analyze_emails(emails: List[Dict]) -> Dict:
141
  "insights": [str, ...] # list of key observations or stats
142
  }
143
  """
144
- # 1) Prepare the email payload
145
- emails_payload = json.dumps(emails, ensure_ascii=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  # 2) Build the LLM prompt
148
  system_prompt = """
149
  You are an expert email analyst. You will be given a JSON array of email objects,
150
- each with keys: date, time, subject, content, message_id.
151
 
152
  Your job is to produce _only_ valid JSON with two fields:
153
  1. summary: a 1–2 sentence high-level overview of these emails.
154
  2. insights: a list of 3–5 bullet-style observations or statistics
155
- (e.g. "2 job offers found", "overall positive tone", "next action: reply").
 
 
156
 
157
  Output exactly:
158
 
 
6
  SendReplyParams,
7
  )
8
  from typing import Any, Dict
9
+ from email_scraper import scrape_emails_from_sender, scrape_emails_by_text_search, _load_email_db, _save_email_db, _is_date_in_range
10
+ from datetime import datetime, timedelta
11
  from typing import List
12
  from openai import OpenAI
13
  import json
 
22
  client = OpenAI(api_key=OPENAI_API_KEY)
23
 
24
 
25
+ def extract_query_info(query: str) -> Dict[str, str]:
26
  """
27
+ Use an LLM to extract sender information and date range from a user query.
28
+ Returns {"sender_keyword": "company/sender name", "start_date":"DD-MMM-YYYY","end_date":"DD-MMM-YYYY"}.
29
  """
30
  today_str = datetime.today().strftime("%d-%b-%Y")
31
+ five_days_ago = (datetime.today() - timedelta(days=5)).strftime("%d-%b-%Y")
32
+
33
  system_prompt = f"""
34
+ You are a query parser for email search. Today is {today_str}.
35
+
36
+ Given a user query, extract the sender/company keyword and date range. Return _only_ valid JSON with:
37
+ {{
38
+ "sender_keyword": "keyword or company name to search for",
39
+ "start_date": "DD-MMM-YYYY",
40
+ "end_date": "DD-MMM-YYYY"
41
+ }}
42
+
43
+ Rules:
44
+ 1. Extract sender keywords from phrases like "from swiggy", "swiggy emails", "mails from amazon", etc.
45
+ 2. If no time is mentioned, use last 5 days: {five_days_ago} to {today_str}
46
+ 3. Interpret relative dates as:
47
+ - "today" → {today_str} to {today_str}
48
+ - "yesterday" → 1 day ago to 1 day ago
49
+ - "last week" → 7 days ago to {today_str}
50
+ - "last month" → 30 days ago to {today_str}
51
+ - "last N days" → N days ago to {today_str}
52
 
53
  Examples:
54
+ - "show me mails for last week from swiggy"
55
+ → {{"sender_keyword": "swiggy", "start_date": "01-Jun-2025", "end_date": "{today_str}"}}
56
+ - "emails from amazon yesterday"
57
+ → {{"sender_keyword": "amazon", "start_date": "06-Jun-2025", "end_date": "06-Jun-2025"}}
58
+ - "show flipkart emails"
59
+ → {{"sender_keyword": "flipkart", "start_date": "{five_days_ago}", "end_date": "{today_str}"}}
60
 
61
  Return _only_ the JSON object—no extra text.
62
  """
63
 
64
  messages = [
65
+ {"role": "system", "content": system_prompt},
66
+ {"role": "user", "content": query}
67
  ]
68
  resp = client.chat.completions.create(
69
  model="gpt-4o-mini",
 
81
  return json.loads(content[start:end])
82
 
83
 
84
+ def fetch_emails(query: str) -> Dict:
85
  """
86
+ Fetch emails based on a natural language query that contains sender information and date range.
87
+ Now uses text-based search and returns only summary information, not full content.
88
 
89
  Args:
90
+ query: The natural language query (e.g., "show me mails for last week from swiggy")
 
91
 
92
  Returns:
93
+ Dict with query_info, email_summary, analysis, and email_count
94
  """
95
+ # Extract sender keyword and date range from query
96
+ query_info = extract_query_info(query)
97
+ sender_keyword = query_info.get("sender_keyword", "")
98
+ start_date = query_info.get("start_date")
99
+ end_date = query_info.get("end_date")
100
+
101
+ print(f"Searching for emails with keyword '{sender_keyword}' between {start_date} and {end_date}")
102
+
103
+ # Use the new text-based search function
104
+ full_emails = scrape_emails_by_text_search(sender_keyword, start_date, end_date)
105
 
106
+ if not full_emails:
107
+ return {
108
+ "query_info": query_info,
109
+ "email_summary": [],
110
+ "analysis": {"summary": f"No emails found for '{sender_keyword}' in the specified date range.", "insights": []},
111
+ "email_count": 0
112
+ }
113
 
114
+ # Create summary version without full content
115
+ email_summary = []
116
+ for email in full_emails:
117
+ summary_email = {
118
+ "date": email.get("date"),
119
+ "time": email.get("time"),
120
+ "subject": email.get("subject"),
121
+ "from": email.get("from", "Unknown Sender"),
122
+ "message_id": email.get("message_id")
123
+ # Note: Removed 'content' to keep response clean
124
+ }
125
+ email_summary.append(summary_email)
126
+
127
+ # Auto-analyze the emails for insights
128
+ analysis = analyze_emails(full_emails) # Use full emails for analysis but don't return them
129
+
130
+ # Return summary info with analysis
131
  return {
132
+ "query_info": query_info,
133
+ "email_summary": email_summary,
134
+ "analysis": analysis,
135
+ "email_count": len(full_emails)
136
  }
137
 
138
 
 
176
  "insights": [str, ...] # list of key observations or stats
177
  }
178
  """
179
+ if not emails:
180
+ return {"summary": "No emails to analyze.", "insights": []}
181
+
182
+ # 1) Create a simplified email summary for analysis (without full content)
183
+ simplified_emails = []
184
+ for email in emails:
185
+ simplified_email = {
186
+ "date": email.get("date"),
187
+ "time": email.get("time"),
188
+ "subject": email.get("subject"),
189
+ "from": email.get("from", "Unknown Sender"),
190
+ "content_preview": email.get("content", "")[:200] + "..." if email.get("content") else ""
191
+ }
192
+ simplified_emails.append(simplified_email)
193
+
194
+ emails_payload = json.dumps(simplified_emails, ensure_ascii=False)
195
 
196
  # 2) Build the LLM prompt
197
  system_prompt = """
198
  You are an expert email analyst. You will be given a JSON array of email objects,
199
+ each with keys: date, time, subject, from, content_preview.
200
 
201
  Your job is to produce _only_ valid JSON with two fields:
202
  1. summary: a 1–2 sentence high-level overview of these emails.
203
  2. insights: a list of 3–5 bullet-style observations or statistics
204
+ (e.g. "5 emails from Swiggy", "mostly promotional content", "received over 3 days").
205
+
206
+ Focus on metadata like senders, subjects, dates, and patterns rather than detailed content analysis.
207
 
208
  Output exactly:
209