Spaces:
Running
Running
Update eb_agent_module.py
Browse files- eb_agent_module.py +39 -3
eb_agent_module.py
CHANGED
@@ -351,7 +351,9 @@ class EmployerBrandingAgent:
|
|
351 |
if not self.all_dataframes or not GEMINI_API_KEY:
|
352 |
logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
|
353 |
return
|
354 |
-
|
|
|
|
|
355 |
try:
|
356 |
# Configure LiteLLM with Gemini
|
357 |
llm = LiteLLM(
|
@@ -408,8 +410,11 @@ class EmployerBrandingAgent:
|
|
408 |
description_parts.append("Key columns: " + "; ".join(column_descriptions))
|
409 |
|
410 |
# Add specific context for employer branding
|
|
|
411 |
if name.lower() in ['follower_stats', 'followers']:
|
412 |
-
description_parts.append("This data tracks LinkedIn company page follower growth and demographics for
|
|
|
|
|
413 |
elif name.lower() in ['posts', 'post_stats']:
|
414 |
description_parts.append("This data contains LinkedIn post performance metrics for employer branding content analysis.")
|
415 |
elif name.lower() in ['mentions', 'brand_mentions']:
|
@@ -445,6 +450,37 @@ class EmployerBrandingAgent:
|
|
445 |
|
446 |
def _get_dataframes_summary(self) -> str:
|
447 |
return get_all_schemas_representation(self.all_dataframes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
448 |
|
449 |
def _build_system_prompt(self) -> str:
|
450 |
"""Enhanced system prompt that works with PandasAI integration"""
|
@@ -544,7 +580,7 @@ class EmployerBrandingAgent:
|
|
544 |
# For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
|
545 |
if len(self.pandas_dfs) == 1:
|
546 |
df = list(self.pandas_dfs.values())[0]
|
547 |
-
logging.info(f"Using single DataFrame for query: {df}")
|
548 |
pandas_response = df.chat(query)
|
549 |
else:
|
550 |
# For multiple dataframes, use pai.chat with all dfs
|
|
|
351 |
if not self.all_dataframes or not GEMINI_API_KEY:
|
352 |
logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
|
353 |
return
|
354 |
+
|
355 |
+
self._preprocess_dataframes_for_pandas_ai()
|
356 |
+
|
357 |
try:
|
358 |
# Configure LiteLLM with Gemini
|
359 |
llm = LiteLLM(
|
|
|
410 |
description_parts.append("Key columns: " + "; ".join(column_descriptions))
|
411 |
|
412 |
# Add specific context for employer branding
|
413 |
+
# Special handling for follower_stats
|
414 |
if name.lower() in ['follower_stats', 'followers']:
|
415 |
+
description_parts.append("This data tracks LinkedIn company page follower growth and demographics. For monthly growth data, use the 'extracted_date' column for date-based queries instead of trying to cast 'category_name' as a date.")
|
416 |
+
if 'extracted_date' in df.columns:
|
417 |
+
description_parts.append("The 'extracted_date' column contains properly formatted dates (YYYY-MM-DD) extracted from category_name for follower_gains_monthly records.")
|
418 |
elif name.lower() in ['posts', 'post_stats']:
|
419 |
description_parts.append("This data contains LinkedIn post performance metrics for employer branding content analysis.")
|
420 |
elif name.lower() in ['mentions', 'brand_mentions']:
|
|
|
450 |
|
451 |
def _get_dataframes_summary(self) -> str:
|
452 |
return get_all_schemas_representation(self.all_dataframes)
|
453 |
+
|
454 |
+
def _preprocess_dataframes_for_pandas_ai(self):
|
455 |
+
"""Preprocess dataframes to handle date casting issues before PandasAI analysis"""
|
456 |
+
if not self.all_dataframes:
|
457 |
+
return
|
458 |
+
|
459 |
+
for name, df in self.all_dataframes.items():
|
460 |
+
if name.lower() in ['follower_stats', 'followers']:
|
461 |
+
# Create a copy to avoid modifying original data
|
462 |
+
df_copy = df.copy()
|
463 |
+
|
464 |
+
# Handle category_name column that contains dates for follower_gains_monthly
|
465 |
+
if 'category_name' in df_copy.columns and 'follower_count_type' in df_copy.columns:
|
466 |
+
# Create a proper date column for date-based queries
|
467 |
+
def extract_date_from_category(row):
|
468 |
+
if row.get('follower_count_type') == 'follower_gains_monthly':
|
469 |
+
category_name = str(row.get('category_name', ''))
|
470 |
+
# Check if it matches YYYY-MM-DD format
|
471 |
+
import re
|
472 |
+
date_pattern = r'^\d{4}-\d{2}-\d{2}$'
|
473 |
+
if re.match(date_pattern, category_name):
|
474 |
+
return category_name
|
475 |
+
return None
|
476 |
+
|
477 |
+
# Add extracted_date column for cleaner date operations
|
478 |
+
df_copy['extracted_date'] = df_copy.apply(extract_date_from_category, axis=1)
|
479 |
+
|
480 |
+
# Update the dataframe in our collection
|
481 |
+
self.all_dataframes[name] = df_copy
|
482 |
+
|
483 |
+
logging.info(f"Preprocessed {name} dataframe for date handling")
|
484 |
|
485 |
def _build_system_prompt(self) -> str:
|
486 |
"""Enhanced system prompt that works with PandasAI integration"""
|
|
|
580 |
# For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
|
581 |
if len(self.pandas_dfs) == 1:
|
582 |
df = list(self.pandas_dfs.values())[0]
|
583 |
+
logging.info(f"Using single DataFrame for query with shape: {df.df.shape}")
|
584 |
pandas_response = df.chat(query)
|
585 |
else:
|
586 |
# For multiple dataframes, use pai.chat with all dfs
|