GuglielmoTor commited on
Commit
76fa038
·
verified ·
1 Parent(s): 9605646

Update eb_agent_module.py

Browse files
Files changed (1) hide show
  1. eb_agent_module.py +78 -40
eb_agent_module.py CHANGED
@@ -8,8 +8,10 @@ import textwrap
8
  from datetime import datetime
9
  from typing import Dict, List, Optional, Union, Any
10
  import traceback
11
- import pandasai as pai
12
- from pandasai_litellm import LiteLLM
 
 
13
 
14
  # Configure logging
15
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
@@ -102,6 +104,19 @@ else:
102
  if not GENAI_AVAILABLE:
103
  logging.warning("Google GenAI library not available.")
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  # --- Utility function to get DataFrame schema representation ---
107
  def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
@@ -318,7 +333,7 @@ class AdvancedRAGSystem:
318
  logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
319
  return ""
320
 
321
- class EmployerBrandingAgent:
322
  def __init__(self,
323
  all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
324
  rag_documents_df: Optional[pd.DataFrame] = None,
@@ -347,38 +362,56 @@ class EmployerBrandingAgent:
347
  logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
348
 
349
  def _initialize_pandas_agent(self):
350
- """Initialize PandasAI with enhanced configuration"""
351
  if not self.all_dataframes or not GEMINI_API_KEY:
352
  logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
353
  return
354
-
355
  try:
356
- # Configure LiteLLM with Gemini
357
- llm = LiteLLM(model="gemini-2.5-flash-preview-05-20")
358
-
359
- # Set PandasAI configuration
360
- pai.config.set({
361
- "llm": llm,
362
- "temperature": 0.7,
363
- "verbose": True,
364
- "enable_cache": True
365
- })
366
-
367
- # Store dataframes for chat queries (we'll use them directly)
368
- self.pandas_dfs = {}
369
  for name, df in self.all_dataframes.items():
370
- # Convert to PandasAI DataFrame with description
371
  df_description = self._generate_dataframe_description(name, df)
372
- pandas_df = pai.DataFrame(df, description=df_description)
373
- self.pandas_dfs[name] = pandas_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
- self.pandas_agent = True # Flag to indicate PandasAI is ready
376
- logging.info(f"PandasAI initialized successfully with {len(self.pandas_dfs)} DataFrames")
377
 
378
  except Exception as e:
379
- logging.error(f"Failed to initialize PandasAI: {e}", exc_info=True)
380
  self.pandas_agent = None
381
- self.pandas_dfs = {}
382
 
383
  def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
384
  """Generate a descriptive summary for PandasAI to better understand the data"""
@@ -460,6 +493,13 @@ class EmployerBrandingAgent:
460
  2. **Business Context Translation**: Convert technical analysis results into HR-friendly insights
461
  3. **Actionable Recommendations**: Provide specific, implementable strategies based on data findings
462
  4. **Educational Guidance**: Help users understand both the data insights and the LinkedIn analytics concepts
 
 
 
 
 
 
 
463
 
464
  ## Communication Style:
465
  - **Natural and Conversational**: Maintain a warm, supportive tone as a helpful colleague
@@ -480,6 +520,12 @@ class EmployerBrandingAgent:
480
  3. **Business Impact**: What this means for employer branding strategy
481
  4. **Recommendations**: Specific, prioritized action items
482
  5. **Next Steps**: Follow-up suggestions or questions
 
 
 
 
 
 
483
 
484
  ## Key Behaviors:
485
  - **Data-Driven**: Always ground insights in actual data analysis when possible
@@ -518,21 +564,13 @@ class EmployerBrandingAgent:
518
 
519
  async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
520
  """Generate response using PandasAI for data queries"""
521
- if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
522
- return "Data analysis not available - PandasAI not initialized.", False
523
 
524
  try:
 
525
  logging.info(f"Processing data query with PandasAI: {query[:100]}...")
526
-
527
- # Use the first available dataframe for single-df queries
528
- # For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
529
- if len(self.pandas_dfs) == 1:
530
- df = list(self.pandas_dfs.values())[0]
531
- pandas_response = df.chat(query)
532
- else:
533
- # For multiple dataframes, use pai.chat with all dfs
534
- dfs = list(self.pandas_dfs.values())
535
- pandas_response = pai.chat(query, *dfs)
536
 
537
  # Check if response is meaningful
538
  if pandas_response and str(pandas_response).strip():
@@ -808,12 +846,12 @@ class EmployerBrandingAgent:
808
 
809
  # --- Helper Functions for External Integration ---
810
  def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
811
- rag_docs: Optional[pd.DataFrame] = None) -> EmployerBrandingAgent:
812
  """Factory function to create a new agent instance"""
813
  logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
814
- return EmployerBrandingAgent(all_dataframes=dataframes, rag_documents_df=rag_docs)
815
 
816
- async def initialize_agent_async(agent: EmployerBrandingAgent) -> bool:
817
  """Async helper to initialize an agent instance"""
818
  logging.info("Initializing agent via async helper function.")
819
  return await agent.initialize()
 
8
  from datetime import datetime
9
  from typing import Dict, List, Optional, Union, Any
10
  import traceback
11
+ from pandasai import Agent, SmartDataframe
12
+ from pandasai.llm import GoogleGemini
13
+ from pandasai.responses.response_parser import ResponseParser
14
+ from pandasai.middlewares.base import BaseMiddleware
15
 
16
  # Configure logging
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
 
104
  if not GENAI_AVAILABLE:
105
  logging.warning("Google GenAI library not available.")
106
 
107
+ # --- Custom PandasAI Middleware for Better Integration ---
108
+ class EmployerBrandingMiddleware(BaseMiddleware):
109
+ """Custom middleware to enhance PandasAI responses with HR context"""
110
+
111
+ def run(self, code: str, **kwargs) -> str:
112
+ """Add HR-friendly context to generated code"""
113
+ # Add comments to make code more understandable
114
+ enhanced_code = f"""
115
+ # HR Analytics Query Processing
116
+ # This code analyzes your LinkedIn employer branding data
117
+ {code}
118
+ """
119
+ return enhanced_code
120
 
121
  # --- Utility function to get DataFrame schema representation ---
122
  def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
 
333
  logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
334
  return ""
335
 
336
+ class EnhancedEmployerBrandingAgent:
337
  def __init__(self,
338
  all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
339
  rag_documents_df: Optional[pd.DataFrame] = None,
 
362
  logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
363
 
364
  def _initialize_pandas_agent(self):
365
+ """Initialize PandasAI Agent with enhanced configuration"""
366
  if not self.all_dataframes or not GEMINI_API_KEY:
367
  logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
368
  return
369
+
370
  try:
371
+ # Convert DataFrames to SmartDataframes with descriptive names
372
+ smart_dfs = []
 
 
 
 
 
 
 
 
 
 
 
373
  for name, df in self.all_dataframes.items():
374
+ # Add metadata to help PandasAI understand the data better
375
  df_description = self._generate_dataframe_description(name, df)
376
+ smart_df = SmartDataframe(
377
+ df,
378
+ name=name,
379
+ description=df_description
380
+ )
381
+ smart_dfs.append(smart_df)
382
+
383
+ # Configure PandasAI with Gemini
384
+ pandas_llm = GoogleGemini(
385
+ api_token=GEMINI_API_KEY,
386
+ model=self.llm_model_name,
387
+ temperature=0.7,
388
+ top_p=0.95,
389
+ top_k=40,
390
+ max_output_tokens=4096
391
+ )
392
+
393
+ # Create agent with enhanced configuration
394
+ self.pandas_agent = Agent(
395
+ dfs=smart_dfs,
396
+ config={
397
+ "llm": pandas_llm,
398
+ "verbose": True,
399
+ "enable_cache": True,
400
+ "save_charts": True,
401
+ "save_charts_path": "charts/",
402
+ "custom_whitelisted_dependencies": ["matplotlib", "seaborn", "plotly"],
403
+ "middlewares": [EmployerBrandingMiddleware()],
404
+ "response_parser": ResponseParser,
405
+ "max_retries": 3,
406
+ "conversational": True
407
+ }
408
+ )
409
 
410
+ logging.info(f"PandasAI agent initialized successfully with {len(smart_dfs)} DataFrames")
 
411
 
412
  except Exception as e:
413
+ logging.error(f"Failed to initialize PandasAI agent: {e}", exc_info=True)
414
  self.pandas_agent = None
 
415
 
416
  def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
417
  """Generate a descriptive summary for PandasAI to better understand the data"""
 
493
  2. **Business Context Translation**: Convert technical analysis results into HR-friendly insights
494
  3. **Actionable Recommendations**: Provide specific, implementable strategies based on data findings
495
  4. **Educational Guidance**: Help users understand both the data insights and the LinkedIn analytics concepts
496
+
497
+ ## CRITICAL COMMUNICATION RULES:
498
+ - **NEVER show code, technical commands, or programming syntax**
499
+ - **NEVER mention dataset names, column names, or technical data structure details**
500
+ - **NEVER reference DataFrames, schemas, or database terminology**
501
+ - **Always speak in business terms**: refer to "your LinkedIn data", "follower metrics", "engagement data", etc.
502
+ - **Focus on insights, not methods**: explain what the data shows, not how it was analyzed
503
 
504
  ## Communication Style:
505
  - **Natural and Conversational**: Maintain a warm, supportive tone as a helpful colleague
 
520
  3. **Business Impact**: What this means for employer branding strategy
521
  4. **Recommendations**: Specific, prioritized action items
522
  5. **Next Steps**: Follow-up suggestions or questions
523
+
524
+ ## Example Language Patterns:
525
+ - Instead of "DataFrame shows" → "Your LinkedIn data reveals"
526
+ - Instead of "follower_count column" → "follower growth metrics"
527
+ - Instead of "engagement_rate variable" → "post engagement performance"
528
+ - Instead of "dataset analysis" → "performance review"
529
 
530
  ## Key Behaviors:
531
  - **Data-Driven**: Always ground insights in actual data analysis when possible
 
564
 
565
  async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
566
  """Generate response using PandasAI for data queries"""
567
+ if not self.pandas_agent:
568
+ return "Data analysis not available - PandasAI agent not initialized.", False
569
 
570
  try:
571
+ # Use PandasAI to analyze the data
572
  logging.info(f"Processing data query with PandasAI: {query[:100]}...")
573
+ pandas_response = self.pandas_agent.chat(query)
 
 
 
 
 
 
 
 
 
574
 
575
  # Check if response is meaningful
576
  if pandas_response and str(pandas_response).strip():
 
846
 
847
  # --- Helper Functions for External Integration ---
848
  def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
849
+ rag_docs: Optional[pd.DataFrame] = None) -> EnhancedEmployerBrandingAgent:
850
  """Factory function to create a new agent instance"""
851
  logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
852
+ return EnhancedEmployerBrandingAgent(all_dataframes=dataframes, rag_documents_df=rag_docs)
853
 
854
+ async def initialize_agent_async(agent: EnhancedEmployerBrandingAgent) -> bool:
855
  """Async helper to initialize an agent instance"""
856
  logging.info("Initializing agent via async helper function.")
857
  return await agent.initialize()