Spaces:
Running
Running
Update eb_agent_module.py
Browse files- eb_agent_module.py +46 -71
eb_agent_module.py
CHANGED
@@ -8,10 +8,8 @@ import textwrap
|
|
8 |
from datetime import datetime
|
9 |
from typing import Dict, List, Optional, Union, Any
|
10 |
import traceback
|
11 |
-
|
12 |
-
from
|
13 |
-
from pandasai.responses.response_parser import ResponseParser
|
14 |
-
from pandasai.middlewares.base import BaseMiddleware
|
15 |
|
16 |
# Configure logging
|
17 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
@@ -104,19 +102,6 @@ else:
|
|
104 |
if not GENAI_AVAILABLE:
|
105 |
logging.warning("Google GenAI library not available.")
|
106 |
|
107 |
-
# --- Custom PandasAI Middleware for Better Integration ---
|
108 |
-
class EmployerBrandingMiddleware(BaseMiddleware):
|
109 |
-
"""Custom middleware to enhance PandasAI responses with HR context"""
|
110 |
-
|
111 |
-
def run(self, code: str, **kwargs) -> str:
|
112 |
-
"""Add HR-friendly context to generated code"""
|
113 |
-
# Add comments to make code more understandable
|
114 |
-
enhanced_code = f"""
|
115 |
-
# HR Analytics Query Processing
|
116 |
-
# This code analyzes your LinkedIn employer branding data
|
117 |
-
{code}
|
118 |
-
"""
|
119 |
-
return enhanced_code
|
120 |
|
121 |
# --- Utility function to get DataFrame schema representation ---
|
122 |
def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
|
@@ -333,7 +318,7 @@ class AdvancedRAGSystem:
|
|
333 |
logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
|
334 |
return ""
|
335 |
|
336 |
-
class
|
337 |
def __init__(self,
|
338 |
all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
|
339 |
rag_documents_df: Optional[pd.DataFrame] = None,
|
@@ -362,56 +347,38 @@ class EnhancedEmployerBrandingAgent:
|
|
362 |
logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
|
363 |
|
364 |
def _initialize_pandas_agent(self):
|
365 |
-
"""Initialize PandasAI
|
366 |
if not self.all_dataframes or not GEMINI_API_KEY:
|
367 |
logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
|
368 |
return
|
369 |
-
|
370 |
try:
|
371 |
-
#
|
372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
for name, df in self.all_dataframes.items():
|
374 |
-
#
|
375 |
df_description = self._generate_dataframe_description(name, df)
|
376 |
-
|
377 |
-
|
378 |
-
name=name,
|
379 |
-
description=df_description
|
380 |
-
)
|
381 |
-
smart_dfs.append(smart_df)
|
382 |
-
|
383 |
-
# Configure PandasAI with Gemini
|
384 |
-
pandas_llm = GoogleGemini(
|
385 |
-
api_token=GEMINI_API_KEY,
|
386 |
-
model=self.llm_model_name,
|
387 |
-
temperature=0.7,
|
388 |
-
top_p=0.95,
|
389 |
-
top_k=40,
|
390 |
-
max_output_tokens=4096
|
391 |
-
)
|
392 |
-
|
393 |
-
# Create agent with enhanced configuration
|
394 |
-
self.pandas_agent = Agent(
|
395 |
-
dfs=smart_dfs,
|
396 |
-
config={
|
397 |
-
"llm": pandas_llm,
|
398 |
-
"verbose": True,
|
399 |
-
"enable_cache": True,
|
400 |
-
"save_charts": True,
|
401 |
-
"save_charts_path": "charts/",
|
402 |
-
"custom_whitelisted_dependencies": ["matplotlib", "seaborn", "plotly"],
|
403 |
-
"middlewares": [EmployerBrandingMiddleware()],
|
404 |
-
"response_parser": ResponseParser,
|
405 |
-
"max_retries": 3,
|
406 |
-
"conversational": True
|
407 |
-
}
|
408 |
-
)
|
409 |
|
410 |
-
|
|
|
411 |
|
412 |
except Exception as e:
|
413 |
-
logging.error(f"Failed to initialize PandasAI
|
414 |
self.pandas_agent = None
|
|
|
415 |
|
416 |
def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
|
417 |
"""Generate a descriptive summary for PandasAI to better understand the data"""
|
@@ -520,18 +487,18 @@ class EnhancedEmployerBrandingAgent:
|
|
520 |
3. **Business Impact**: What this means for employer branding strategy
|
521 |
4. **Recommendations**: Specific, prioritized action items
|
522 |
5. **Next Steps**: Follow-up suggestions or questions
|
523 |
-
|
524 |
-
## Example Language Patterns:
|
525 |
-
- Instead of "DataFrame shows" → "Your LinkedIn data reveals"
|
526 |
-
- Instead of "follower_count column" → "follower growth metrics"
|
527 |
-
- Instead of "engagement_rate variable" → "post engagement performance"
|
528 |
-
- Instead of "dataset analysis" → "performance review"
|
529 |
|
530 |
## Key Behaviors:
|
531 |
- **Data-Driven**: Always ground insights in actual data analysis when possible
|
532 |
- **Visual When Helpful**: Suggest or create charts that make data more understandable
|
533 |
- **Proactive**: Identify related insights the user might find valuable
|
534 |
- **Honest About Limitations**: Clearly state when data doesn't support certain analyses
|
|
|
|
|
|
|
|
|
|
|
|
|
535 |
|
536 |
Your goal remains to be a trusted partner, but now with powerful data analysis capabilities that enable deeper, more accurate insights for data-driven employer branding decisions.
|
537 |
""").strip()
|
@@ -564,13 +531,21 @@ class EnhancedEmployerBrandingAgent:
|
|
564 |
|
565 |
async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
|
566 |
"""Generate response using PandasAI for data queries"""
|
567 |
-
if not self.pandas_agent:
|
568 |
-
return "Data analysis not available - PandasAI
|
569 |
|
570 |
try:
|
571 |
-
# Use PandasAI to analyze the data
|
572 |
logging.info(f"Processing data query with PandasAI: {query[:100]}...")
|
573 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
574 |
|
575 |
# Check if response is meaningful
|
576 |
if pandas_response and str(pandas_response).strip():
|
@@ -846,12 +821,12 @@ class EnhancedEmployerBrandingAgent:
|
|
846 |
|
847 |
# --- Helper Functions for External Integration ---
|
848 |
def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
|
849 |
-
rag_docs: Optional[pd.DataFrame] = None) ->
|
850 |
"""Factory function to create a new agent instance"""
|
851 |
logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
|
852 |
-
return
|
853 |
|
854 |
-
async def initialize_agent_async(agent:
|
855 |
"""Async helper to initialize an agent instance"""
|
856 |
logging.info("Initializing agent via async helper function.")
|
857 |
return await agent.initialize()
|
|
|
8 |
from datetime import datetime
|
9 |
from typing import Dict, List, Optional, Union, Any
|
10 |
import traceback
|
11 |
+
import pandasai as pai
|
12 |
+
from pandasai_litellm import LiteLLM
|
|
|
|
|
13 |
|
14 |
# Configure logging
|
15 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
|
|
102 |
if not GENAI_AVAILABLE:
|
103 |
logging.warning("Google GenAI library not available.")
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
# --- Utility function to get DataFrame schema representation ---
|
107 |
def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
|
|
|
318 |
logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
|
319 |
return ""
|
320 |
|
321 |
+
class EmployerBrandingAgent:
|
322 |
def __init__(self,
|
323 |
all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
|
324 |
rag_documents_df: Optional[pd.DataFrame] = None,
|
|
|
347 |
logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
|
348 |
|
349 |
def _initialize_pandas_agent(self):
|
350 |
+
"""Initialize PandasAI with enhanced configuration"""
|
351 |
if not self.all_dataframes or not GEMINI_API_KEY:
|
352 |
logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
|
353 |
return
|
354 |
+
|
355 |
try:
|
356 |
+
# Configure LiteLLM with Gemini
|
357 |
+
llm = LiteLLM(model="gemini-2.5-flash-preview-05-20")
|
358 |
+
|
359 |
+
# Set PandasAI configuration
|
360 |
+
pai.config.set({
|
361 |
+
"llm": llm,
|
362 |
+
"temperature": 0.7,
|
363 |
+
"verbose": True,
|
364 |
+
"enable_cache": True
|
365 |
+
})
|
366 |
+
|
367 |
+
# Store dataframes for chat queries (we'll use them directly)
|
368 |
+
self.pandas_dfs = {}
|
369 |
for name, df in self.all_dataframes.items():
|
370 |
+
# Convert to PandasAI DataFrame with description
|
371 |
df_description = self._generate_dataframe_description(name, df)
|
372 |
+
pandas_df = pai.DataFrame(df, description=df_description)
|
373 |
+
self.pandas_dfs[name] = pandas_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
|
375 |
+
self.pandas_agent = True # Flag to indicate PandasAI is ready
|
376 |
+
logging.info(f"PandasAI initialized successfully with {len(self.pandas_dfs)} DataFrames")
|
377 |
|
378 |
except Exception as e:
|
379 |
+
logging.error(f"Failed to initialize PandasAI: {e}", exc_info=True)
|
380 |
self.pandas_agent = None
|
381 |
+
self.pandas_dfs = {}
|
382 |
|
383 |
def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
|
384 |
"""Generate a descriptive summary for PandasAI to better understand the data"""
|
|
|
487 |
3. **Business Impact**: What this means for employer branding strategy
|
488 |
4. **Recommendations**: Specific, prioritized action items
|
489 |
5. **Next Steps**: Follow-up suggestions or questions
|
|
|
|
|
|
|
|
|
|
|
|
|
490 |
|
491 |
## Key Behaviors:
|
492 |
- **Data-Driven**: Always ground insights in actual data analysis when possible
|
493 |
- **Visual When Helpful**: Suggest or create charts that make data more understandable
|
494 |
- **Proactive**: Identify related insights the user might find valuable
|
495 |
- **Honest About Limitations**: Clearly state when data doesn't support certain analyses
|
496 |
+
|
497 |
+
## Example Language Patterns:
|
498 |
+
- Instead of "DataFrame shows" → "Your LinkedIn data reveals"
|
499 |
+
- Instead of "follower_count column" → "follower growth metrics"
|
500 |
+
- Instead of "engagement_rate variable" → "post engagement performance"
|
501 |
+
- Instead of "dataset analysis" → "performance review"
|
502 |
|
503 |
Your goal remains to be a trusted partner, but now with powerful data analysis capabilities that enable deeper, more accurate insights for data-driven employer branding decisions.
|
504 |
""").strip()
|
|
|
531 |
|
532 |
async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
|
533 |
"""Generate response using PandasAI for data queries"""
|
534 |
+
if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
|
535 |
+
return "Data analysis not available - PandasAI not initialized.", False
|
536 |
|
537 |
try:
|
|
|
538 |
logging.info(f"Processing data query with PandasAI: {query[:100]}...")
|
539 |
+
|
540 |
+
# Use the first available dataframe for single-df queries
|
541 |
+
# For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
|
542 |
+
if len(self.pandas_dfs) == 1:
|
543 |
+
df = list(self.pandas_dfs.values())[0]
|
544 |
+
pandas_response = df.chat(query)
|
545 |
+
else:
|
546 |
+
# For multiple dataframes, use pai.chat with all dfs
|
547 |
+
dfs = list(self.pandas_dfs.values())
|
548 |
+
pandas_response = pai.chat(query, *dfs)
|
549 |
|
550 |
# Check if response is meaningful
|
551 |
if pandas_response and str(pandas_response).strip():
|
|
|
821 |
|
822 |
# --- Helper Functions for External Integration ---
|
823 |
def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
|
824 |
+
rag_docs: Optional[pd.DataFrame] = None) -> EmployerBrandingAgent:
|
825 |
"""Factory function to create a new agent instance"""
|
826 |
logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
|
827 |
+
return EmployerBrandingAgent(all_dataframes=dataframes, rag_documents_df=rag_docs)
|
828 |
|
829 |
+
async def initialize_agent_async(agent: EmployerBrandingAgent) -> bool:
|
830 |
"""Async helper to initialize an agent instance"""
|
831 |
logging.info("Initializing agent via async helper function.")
|
832 |
return await agent.initialize()
|