GuglielmoTor commited on
Commit
f3ff19a
·
verified ·
1 Parent(s): 49c7360

Update eb_agent_module.py

Browse files
Files changed (1) hide show
  1. eb_agent_module.py +39 -3
eb_agent_module.py CHANGED
@@ -351,7 +351,9 @@ class EmployerBrandingAgent:
351
  if not self.all_dataframes or not GEMINI_API_KEY:
352
  logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
353
  return
354
-
 
 
355
  try:
356
  # Configure LiteLLM with Gemini
357
  llm = LiteLLM(
@@ -408,8 +410,11 @@ class EmployerBrandingAgent:
408
  description_parts.append("Key columns: " + "; ".join(column_descriptions))
409
 
410
  # Add specific context for employer branding
 
411
  if name.lower() in ['follower_stats', 'followers']:
412
- description_parts.append("This data tracks LinkedIn company page follower growth and demographics for employer branding analysis.")
 
 
413
  elif name.lower() in ['posts', 'post_stats']:
414
  description_parts.append("This data contains LinkedIn post performance metrics for employer branding content analysis.")
415
  elif name.lower() in ['mentions', 'brand_mentions']:
@@ -445,6 +450,37 @@ class EmployerBrandingAgent:
445
 
446
  def _get_dataframes_summary(self) -> str:
447
  return get_all_schemas_representation(self.all_dataframes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
 
449
  def _build_system_prompt(self) -> str:
450
  """Enhanced system prompt that works with PandasAI integration"""
@@ -544,7 +580,7 @@ class EmployerBrandingAgent:
544
  # For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
545
  if len(self.pandas_dfs) == 1:
546
  df = list(self.pandas_dfs.values())[0]
547
- logging.info(f"Using single DataFrame for query: {df}")
548
  pandas_response = df.chat(query)
549
  else:
550
  # For multiple dataframes, use pai.chat with all dfs
 
351
  if not self.all_dataframes or not GEMINI_API_KEY:
352
  logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
353
  return
354
+
355
+ self._preprocess_dataframes_for_pandas_ai()
356
+
357
  try:
358
  # Configure LiteLLM with Gemini
359
  llm = LiteLLM(
 
410
  description_parts.append("Key columns: " + "; ".join(column_descriptions))
411
 
412
  # Add specific context for employer branding
413
+ # Special handling for follower_stats
414
  if name.lower() in ['follower_stats', 'followers']:
415
+ description_parts.append("This data tracks LinkedIn company page follower growth and demographics. For monthly growth data, use the 'extracted_date' column for date-based queries instead of trying to cast 'category_name' as a date.")
416
+ if 'extracted_date' in df.columns:
417
+ description_parts.append("The 'extracted_date' column contains properly formatted dates (YYYY-MM-DD) extracted from category_name for follower_gains_monthly records.")
418
  elif name.lower() in ['posts', 'post_stats']:
419
  description_parts.append("This data contains LinkedIn post performance metrics for employer branding content analysis.")
420
  elif name.lower() in ['mentions', 'brand_mentions']:
 
450
 
451
  def _get_dataframes_summary(self) -> str:
452
  return get_all_schemas_representation(self.all_dataframes)
453
+
454
+ def _preprocess_dataframes_for_pandas_ai(self):
455
+ """Preprocess dataframes to handle date casting issues before PandasAI analysis"""
456
+ if not self.all_dataframes:
457
+ return
458
+
459
+ for name, df in self.all_dataframes.items():
460
+ if name.lower() in ['follower_stats', 'followers']:
461
+ # Create a copy to avoid modifying original data
462
+ df_copy = df.copy()
463
+
464
+ # Handle category_name column that contains dates for follower_gains_monthly
465
+ if 'category_name' in df_copy.columns and 'follower_count_type' in df_copy.columns:
466
+ # Create a proper date column for date-based queries
467
+ def extract_date_from_category(row):
468
+ if row.get('follower_count_type') == 'follower_gains_monthly':
469
+ category_name = str(row.get('category_name', ''))
470
+ # Check if it matches YYYY-MM-DD format
471
+ import re
472
+ date_pattern = r'^\d{4}-\d{2}-\d{2}$'
473
+ if re.match(date_pattern, category_name):
474
+ return category_name
475
+ return None
476
+
477
+ # Add extracted_date column for cleaner date operations
478
+ df_copy['extracted_date'] = df_copy.apply(extract_date_from_category, axis=1)
479
+
480
+ # Update the dataframe in our collection
481
+ self.all_dataframes[name] = df_copy
482
+
483
+ logging.info(f"Preprocessed {name} dataframe for date handling")
484
 
485
  def _build_system_prompt(self) -> str:
486
  """Enhanced system prompt that works with PandasAI integration"""
 
580
  # For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
581
  if len(self.pandas_dfs) == 1:
582
  df = list(self.pandas_dfs.values())[0]
583
+ logging.info(f"Using single DataFrame for query with shape: {df.df.shape}")
584
  pandas_response = df.chat(query)
585
  else:
586
  # For multiple dataframes, use pai.chat with all dfs