Spaces:
Running
Running
Update eb_agent_module.py
Browse files- eb_agent_module.py +78 -40
eb_agent_module.py
CHANGED
@@ -8,8 +8,10 @@ import textwrap
|
|
8 |
from datetime import datetime
|
9 |
from typing import Dict, List, Optional, Union, Any
|
10 |
import traceback
|
11 |
-
|
12 |
-
from
|
|
|
|
|
13 |
|
14 |
# Configure logging
|
15 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
@@ -102,6 +104,19 @@ else:
|
|
102 |
if not GENAI_AVAILABLE:
|
103 |
logging.warning("Google GenAI library not available.")
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
# --- Utility function to get DataFrame schema representation ---
|
107 |
def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
|
@@ -318,7 +333,7 @@ class AdvancedRAGSystem:
|
|
318 |
logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
|
319 |
return ""
|
320 |
|
321 |
-
class
|
322 |
def __init__(self,
|
323 |
all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
|
324 |
rag_documents_df: Optional[pd.DataFrame] = None,
|
@@ -347,38 +362,56 @@ class EmployerBrandingAgent:
|
|
347 |
logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
|
348 |
|
349 |
def _initialize_pandas_agent(self):
|
350 |
-
"""Initialize PandasAI with enhanced configuration"""
|
351 |
if not self.all_dataframes or not GEMINI_API_KEY:
|
352 |
logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
|
353 |
return
|
354 |
-
|
355 |
try:
|
356 |
-
#
|
357 |
-
|
358 |
-
|
359 |
-
# Set PandasAI configuration
|
360 |
-
pai.config.set({
|
361 |
-
"llm": llm,
|
362 |
-
"temperature": 0.7,
|
363 |
-
"verbose": True,
|
364 |
-
"enable_cache": True
|
365 |
-
})
|
366 |
-
|
367 |
-
# Store dataframes for chat queries (we'll use them directly)
|
368 |
-
self.pandas_dfs = {}
|
369 |
for name, df in self.all_dataframes.items():
|
370 |
-
#
|
371 |
df_description = self._generate_dataframe_description(name, df)
|
372 |
-
|
373 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
|
375 |
-
|
376 |
-
logging.info(f"PandasAI initialized successfully with {len(self.pandas_dfs)} DataFrames")
|
377 |
|
378 |
except Exception as e:
|
379 |
-
logging.error(f"Failed to initialize PandasAI: {e}", exc_info=True)
|
380 |
self.pandas_agent = None
|
381 |
-
self.pandas_dfs = {}
|
382 |
|
383 |
def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
|
384 |
"""Generate a descriptive summary for PandasAI to better understand the data"""
|
@@ -460,6 +493,13 @@ class EmployerBrandingAgent:
|
|
460 |
2. **Business Context Translation**: Convert technical analysis results into HR-friendly insights
|
461 |
3. **Actionable Recommendations**: Provide specific, implementable strategies based on data findings
|
462 |
4. **Educational Guidance**: Help users understand both the data insights and the LinkedIn analytics concepts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
463 |
|
464 |
## Communication Style:
|
465 |
- **Natural and Conversational**: Maintain a warm, supportive tone as a helpful colleague
|
@@ -480,6 +520,12 @@ class EmployerBrandingAgent:
|
|
480 |
3. **Business Impact**: What this means for employer branding strategy
|
481 |
4. **Recommendations**: Specific, prioritized action items
|
482 |
5. **Next Steps**: Follow-up suggestions or questions
|
|
|
|
|
|
|
|
|
|
|
|
|
483 |
|
484 |
## Key Behaviors:
|
485 |
- **Data-Driven**: Always ground insights in actual data analysis when possible
|
@@ -518,21 +564,13 @@ class EmployerBrandingAgent:
|
|
518 |
|
519 |
async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
|
520 |
"""Generate response using PandasAI for data queries"""
|
521 |
-
if not self.pandas_agent
|
522 |
-
return "Data analysis not available - PandasAI not initialized.", False
|
523 |
|
524 |
try:
|
|
|
525 |
logging.info(f"Processing data query with PandasAI: {query[:100]}...")
|
526 |
-
|
527 |
-
# Use the first available dataframe for single-df queries
|
528 |
-
# For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
|
529 |
-
if len(self.pandas_dfs) == 1:
|
530 |
-
df = list(self.pandas_dfs.values())[0]
|
531 |
-
pandas_response = df.chat(query)
|
532 |
-
else:
|
533 |
-
# For multiple dataframes, use pai.chat with all dfs
|
534 |
-
dfs = list(self.pandas_dfs.values())
|
535 |
-
pandas_response = pai.chat(query, *dfs)
|
536 |
|
537 |
# Check if response is meaningful
|
538 |
if pandas_response and str(pandas_response).strip():
|
@@ -808,12 +846,12 @@ class EmployerBrandingAgent:
|
|
808 |
|
809 |
# --- Helper Functions for External Integration ---
|
810 |
def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
|
811 |
-
rag_docs: Optional[pd.DataFrame] = None) ->
|
812 |
"""Factory function to create a new agent instance"""
|
813 |
logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
|
814 |
-
return
|
815 |
|
816 |
-
async def initialize_agent_async(agent:
|
817 |
"""Async helper to initialize an agent instance"""
|
818 |
logging.info("Initializing agent via async helper function.")
|
819 |
return await agent.initialize()
|
|
|
8 |
from datetime import datetime
|
9 |
from typing import Dict, List, Optional, Union, Any
|
10 |
import traceback
|
11 |
+
from pandasai import Agent, SmartDataframe
|
12 |
+
from pandasai.llm import GoogleGemini
|
13 |
+
from pandasai.responses.response_parser import ResponseParser
|
14 |
+
from pandasai.middlewares.base import BaseMiddleware
|
15 |
|
16 |
# Configure logging
|
17 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
|
|
104 |
if not GENAI_AVAILABLE:
|
105 |
logging.warning("Google GenAI library not available.")
|
106 |
|
107 |
+
# --- Custom PandasAI Middleware for Better Integration ---
|
108 |
+
class EmployerBrandingMiddleware(BaseMiddleware):
|
109 |
+
"""Custom middleware to enhance PandasAI responses with HR context"""
|
110 |
+
|
111 |
+
def run(self, code: str, **kwargs) -> str:
|
112 |
+
"""Add HR-friendly context to generated code"""
|
113 |
+
# Add comments to make code more understandable
|
114 |
+
enhanced_code = f"""
|
115 |
+
# HR Analytics Query Processing
|
116 |
+
# This code analyzes your LinkedIn employer branding data
|
117 |
+
{code}
|
118 |
+
"""
|
119 |
+
return enhanced_code
|
120 |
|
121 |
# --- Utility function to get DataFrame schema representation ---
|
122 |
def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
|
|
|
333 |
logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
|
334 |
return ""
|
335 |
|
336 |
+
class EnhancedEmployerBrandingAgent:
|
337 |
def __init__(self,
|
338 |
all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
|
339 |
rag_documents_df: Optional[pd.DataFrame] = None,
|
|
|
362 |
logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
|
363 |
|
364 |
def _initialize_pandas_agent(self):
|
365 |
+
"""Initialize PandasAI Agent with enhanced configuration"""
|
366 |
if not self.all_dataframes or not GEMINI_API_KEY:
|
367 |
logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
|
368 |
return
|
369 |
+
|
370 |
try:
|
371 |
+
# Convert DataFrames to SmartDataframes with descriptive names
|
372 |
+
smart_dfs = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
for name, df in self.all_dataframes.items():
|
374 |
+
# Add metadata to help PandasAI understand the data better
|
375 |
df_description = self._generate_dataframe_description(name, df)
|
376 |
+
smart_df = SmartDataframe(
|
377 |
+
df,
|
378 |
+
name=name,
|
379 |
+
description=df_description
|
380 |
+
)
|
381 |
+
smart_dfs.append(smart_df)
|
382 |
+
|
383 |
+
# Configure PandasAI with Gemini
|
384 |
+
pandas_llm = GoogleGemini(
|
385 |
+
api_token=GEMINI_API_KEY,
|
386 |
+
model=self.llm_model_name,
|
387 |
+
temperature=0.7,
|
388 |
+
top_p=0.95,
|
389 |
+
top_k=40,
|
390 |
+
max_output_tokens=4096
|
391 |
+
)
|
392 |
+
|
393 |
+
# Create agent with enhanced configuration
|
394 |
+
self.pandas_agent = Agent(
|
395 |
+
dfs=smart_dfs,
|
396 |
+
config={
|
397 |
+
"llm": pandas_llm,
|
398 |
+
"verbose": True,
|
399 |
+
"enable_cache": True,
|
400 |
+
"save_charts": True,
|
401 |
+
"save_charts_path": "charts/",
|
402 |
+
"custom_whitelisted_dependencies": ["matplotlib", "seaborn", "plotly"],
|
403 |
+
"middlewares": [EmployerBrandingMiddleware()],
|
404 |
+
"response_parser": ResponseParser,
|
405 |
+
"max_retries": 3,
|
406 |
+
"conversational": True
|
407 |
+
}
|
408 |
+
)
|
409 |
|
410 |
+
logging.info(f"PandasAI agent initialized successfully with {len(smart_dfs)} DataFrames")
|
|
|
411 |
|
412 |
except Exception as e:
|
413 |
+
logging.error(f"Failed to initialize PandasAI agent: {e}", exc_info=True)
|
414 |
self.pandas_agent = None
|
|
|
415 |
|
416 |
def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
|
417 |
"""Generate a descriptive summary for PandasAI to better understand the data"""
|
|
|
493 |
2. **Business Context Translation**: Convert technical analysis results into HR-friendly insights
|
494 |
3. **Actionable Recommendations**: Provide specific, implementable strategies based on data findings
|
495 |
4. **Educational Guidance**: Help users understand both the data insights and the LinkedIn analytics concepts
|
496 |
+
|
497 |
+
## CRITICAL COMMUNICATION RULES:
|
498 |
+
- **NEVER show code, technical commands, or programming syntax**
|
499 |
+
- **NEVER mention dataset names, column names, or technical data structure details**
|
500 |
+
- **NEVER reference DataFrames, schemas, or database terminology**
|
501 |
+
- **Always speak in business terms**: refer to "your LinkedIn data", "follower metrics", "engagement data", etc.
|
502 |
+
- **Focus on insights, not methods**: explain what the data shows, not how it was analyzed
|
503 |
|
504 |
## Communication Style:
|
505 |
- **Natural and Conversational**: Maintain a warm, supportive tone as a helpful colleague
|
|
|
520 |
3. **Business Impact**: What this means for employer branding strategy
|
521 |
4. **Recommendations**: Specific, prioritized action items
|
522 |
5. **Next Steps**: Follow-up suggestions or questions
|
523 |
+
|
524 |
+
## Example Language Patterns:
|
525 |
+
- Instead of "DataFrame shows" → "Your LinkedIn data reveals"
|
526 |
+
- Instead of "follower_count column" → "follower growth metrics"
|
527 |
+
- Instead of "engagement_rate variable" → "post engagement performance"
|
528 |
+
- Instead of "dataset analysis" → "performance review"
|
529 |
|
530 |
## Key Behaviors:
|
531 |
- **Data-Driven**: Always ground insights in actual data analysis when possible
|
|
|
564 |
|
565 |
async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
|
566 |
"""Generate response using PandasAI for data queries"""
|
567 |
+
if not self.pandas_agent:
|
568 |
+
return "Data analysis not available - PandasAI agent not initialized.", False
|
569 |
|
570 |
try:
|
571 |
+
# Use PandasAI to analyze the data
|
572 |
logging.info(f"Processing data query with PandasAI: {query[:100]}...")
|
573 |
+
pandas_response = self.pandas_agent.chat(query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
574 |
|
575 |
# Check if response is meaningful
|
576 |
if pandas_response and str(pandas_response).strip():
|
|
|
846 |
|
847 |
# --- Helper Functions for External Integration ---
|
848 |
def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
|
849 |
+
rag_docs: Optional[pd.DataFrame] = None) -> EnhancedEmployerBrandingAgent:
|
850 |
"""Factory function to create a new agent instance"""
|
851 |
logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
|
852 |
+
return EnhancedEmployerBrandingAgent(all_dataframes=dataframes, rag_documents_df=rag_docs)
|
853 |
|
854 |
+
async def initialize_agent_async(agent: EnhancedEmployerBrandingAgent) -> bool:
|
855 |
"""Async helper to initialize an agent instance"""
|
856 |
logging.info("Initializing agent via async helper function.")
|
857 |
return await agent.initialize()
|