GuglielmoTor commited on
Commit
205df41
·
verified ·
1 Parent(s): 76fa038

Update eb_agent_module.py

Browse files
Files changed (1) hide show
  1. eb_agent_module.py +46 -71
eb_agent_module.py CHANGED
@@ -8,10 +8,8 @@ import textwrap
8
  from datetime import datetime
9
  from typing import Dict, List, Optional, Union, Any
10
  import traceback
11
- from pandasai import Agent, SmartDataframe
12
- from pandasai.llm import GoogleGemini
13
- from pandasai.responses.response_parser import ResponseParser
14
- from pandasai.middlewares.base import BaseMiddleware
15
 
16
  # Configure logging
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
@@ -104,19 +102,6 @@ else:
104
  if not GENAI_AVAILABLE:
105
  logging.warning("Google GenAI library not available.")
106
 
107
- # --- Custom PandasAI Middleware for Better Integration ---
108
- class EmployerBrandingMiddleware(BaseMiddleware):
109
- """Custom middleware to enhance PandasAI responses with HR context"""
110
-
111
- def run(self, code: str, **kwargs) -> str:
112
- """Add HR-friendly context to generated code"""
113
- # Add comments to make code more understandable
114
- enhanced_code = f"""
115
- # HR Analytics Query Processing
116
- # This code analyzes your LinkedIn employer branding data
117
- {code}
118
- """
119
- return enhanced_code
120
 
121
  # --- Utility function to get DataFrame schema representation ---
122
  def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
@@ -333,7 +318,7 @@ class AdvancedRAGSystem:
333
  logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
334
  return ""
335
 
336
- class EnhancedEmployerBrandingAgent:
337
  def __init__(self,
338
  all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
339
  rag_documents_df: Optional[pd.DataFrame] = None,
@@ -362,56 +347,38 @@ class EnhancedEmployerBrandingAgent:
362
  logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
363
 
364
  def _initialize_pandas_agent(self):
365
- """Initialize PandasAI Agent with enhanced configuration"""
366
  if not self.all_dataframes or not GEMINI_API_KEY:
367
  logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
368
  return
369
-
370
  try:
371
- # Convert DataFrames to SmartDataframes with descriptive names
372
- smart_dfs = []
 
 
 
 
 
 
 
 
 
 
 
373
  for name, df in self.all_dataframes.items():
374
- # Add metadata to help PandasAI understand the data better
375
  df_description = self._generate_dataframe_description(name, df)
376
- smart_df = SmartDataframe(
377
- df,
378
- name=name,
379
- description=df_description
380
- )
381
- smart_dfs.append(smart_df)
382
-
383
- # Configure PandasAI with Gemini
384
- pandas_llm = GoogleGemini(
385
- api_token=GEMINI_API_KEY,
386
- model=self.llm_model_name,
387
- temperature=0.7,
388
- top_p=0.95,
389
- top_k=40,
390
- max_output_tokens=4096
391
- )
392
-
393
- # Create agent with enhanced configuration
394
- self.pandas_agent = Agent(
395
- dfs=smart_dfs,
396
- config={
397
- "llm": pandas_llm,
398
- "verbose": True,
399
- "enable_cache": True,
400
- "save_charts": True,
401
- "save_charts_path": "charts/",
402
- "custom_whitelisted_dependencies": ["matplotlib", "seaborn", "plotly"],
403
- "middlewares": [EmployerBrandingMiddleware()],
404
- "response_parser": ResponseParser,
405
- "max_retries": 3,
406
- "conversational": True
407
- }
408
- )
409
 
410
- logging.info(f"PandasAI agent initialized successfully with {len(smart_dfs)} DataFrames")
 
411
 
412
  except Exception as e:
413
- logging.error(f"Failed to initialize PandasAI agent: {e}", exc_info=True)
414
  self.pandas_agent = None
 
415
 
416
  def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
417
  """Generate a descriptive summary for PandasAI to better understand the data"""
@@ -520,18 +487,18 @@ class EnhancedEmployerBrandingAgent:
520
  3. **Business Impact**: What this means for employer branding strategy
521
  4. **Recommendations**: Specific, prioritized action items
522
  5. **Next Steps**: Follow-up suggestions or questions
523
-
524
- ## Example Language Patterns:
525
- - Instead of "DataFrame shows" → "Your LinkedIn data reveals"
526
- - Instead of "follower_count column" → "follower growth metrics"
527
- - Instead of "engagement_rate variable" → "post engagement performance"
528
- - Instead of "dataset analysis" → "performance review"
529
 
530
  ## Key Behaviors:
531
  - **Data-Driven**: Always ground insights in actual data analysis when possible
532
  - **Visual When Helpful**: Suggest or create charts that make data more understandable
533
  - **Proactive**: Identify related insights the user might find valuable
534
  - **Honest About Limitations**: Clearly state when data doesn't support certain analyses
 
 
 
 
 
 
535
 
536
  Your goal remains to be a trusted partner, but now with powerful data analysis capabilities that enable deeper, more accurate insights for data-driven employer branding decisions.
537
  """).strip()
@@ -564,13 +531,21 @@ class EnhancedEmployerBrandingAgent:
564
 
565
  async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
566
  """Generate response using PandasAI for data queries"""
567
- if not self.pandas_agent:
568
- return "Data analysis not available - PandasAI agent not initialized.", False
569
 
570
  try:
571
- # Use PandasAI to analyze the data
572
  logging.info(f"Processing data query with PandasAI: {query[:100]}...")
573
- pandas_response = self.pandas_agent.chat(query)
 
 
 
 
 
 
 
 
 
574
 
575
  # Check if response is meaningful
576
  if pandas_response and str(pandas_response).strip():
@@ -846,12 +821,12 @@ class EnhancedEmployerBrandingAgent:
846
 
847
  # --- Helper Functions for External Integration ---
848
  def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
849
- rag_docs: Optional[pd.DataFrame] = None) -> EnhancedEmployerBrandingAgent:
850
  """Factory function to create a new agent instance"""
851
  logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
852
- return EnhancedEmployerBrandingAgent(all_dataframes=dataframes, rag_documents_df=rag_docs)
853
 
854
- async def initialize_agent_async(agent: EnhancedEmployerBrandingAgent) -> bool:
855
  """Async helper to initialize an agent instance"""
856
  logging.info("Initializing agent via async helper function.")
857
  return await agent.initialize()
 
8
  from datetime import datetime
9
  from typing import Dict, List, Optional, Union, Any
10
  import traceback
11
+ import pandasai as pai
12
+ from pandasai_litellm import LiteLLM
 
 
13
 
14
  # Configure logging
15
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
 
102
  if not GENAI_AVAILABLE:
103
  logging.warning("Google GenAI library not available.")
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  # --- Utility function to get DataFrame schema representation ---
107
  def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
 
318
  logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
319
  return ""
320
 
321
+ class EmployerBrandingAgent:
322
  def __init__(self,
323
  all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
324
  rag_documents_df: Optional[pd.DataFrame] = None,
 
347
  logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
348
 
349
  def _initialize_pandas_agent(self):
350
+ """Initialize PandasAI with enhanced configuration"""
351
  if not self.all_dataframes or not GEMINI_API_KEY:
352
  logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
353
  return
354
+
355
  try:
356
+ # Configure LiteLLM with Gemini
357
+ llm = LiteLLM(model="gemini-2.5-flash-preview-05-20")
358
+
359
+ # Set PandasAI configuration
360
+ pai.config.set({
361
+ "llm": llm,
362
+ "temperature": 0.7,
363
+ "verbose": True,
364
+ "enable_cache": True
365
+ })
366
+
367
+ # Store dataframes for chat queries (we'll use them directly)
368
+ self.pandas_dfs = {}
369
  for name, df in self.all_dataframes.items():
370
+ # Convert to PandasAI DataFrame with description
371
  df_description = self._generate_dataframe_description(name, df)
372
+ pandas_df = pai.DataFrame(df, description=df_description)
373
+ self.pandas_dfs[name] = pandas_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
+ self.pandas_agent = True # Flag to indicate PandasAI is ready
376
+ logging.info(f"PandasAI initialized successfully with {len(self.pandas_dfs)} DataFrames")
377
 
378
  except Exception as e:
379
+ logging.error(f"Failed to initialize PandasAI: {e}", exc_info=True)
380
  self.pandas_agent = None
381
+ self.pandas_dfs = {}
382
 
383
  def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
384
  """Generate a descriptive summary for PandasAI to better understand the data"""
 
487
  3. **Business Impact**: What this means for employer branding strategy
488
  4. **Recommendations**: Specific, prioritized action items
489
  5. **Next Steps**: Follow-up suggestions or questions
 
 
 
 
 
 
490
 
491
  ## Key Behaviors:
492
  - **Data-Driven**: Always ground insights in actual data analysis when possible
493
  - **Visual When Helpful**: Suggest or create charts that make data more understandable
494
  - **Proactive**: Identify related insights the user might find valuable
495
  - **Honest About Limitations**: Clearly state when data doesn't support certain analyses
496
+
497
+ ## Example Language Patterns:
498
+ - Instead of "DataFrame shows" → "Your LinkedIn data reveals"
499
+ - Instead of "follower_count column" → "follower growth metrics"
500
+ - Instead of "engagement_rate variable" → "post engagement performance"
501
+ - Instead of "dataset analysis" → "performance review"
502
 
503
  Your goal remains to be a trusted partner, but now with powerful data analysis capabilities that enable deeper, more accurate insights for data-driven employer branding decisions.
504
  """).strip()
 
531
 
532
  async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
533
  """Generate response using PandasAI for data queries"""
534
+ if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
535
+ return "Data analysis not available - PandasAI not initialized.", False
536
 
537
  try:
 
538
  logging.info(f"Processing data query with PandasAI: {query[:100]}...")
539
+
540
+ # Use the first available dataframe for single-df queries
541
+ # For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
542
+ if len(self.pandas_dfs) == 1:
543
+ df = list(self.pandas_dfs.values())[0]
544
+ pandas_response = df.chat(query)
545
+ else:
546
+ # For multiple dataframes, use pai.chat with all dfs
547
+ dfs = list(self.pandas_dfs.values())
548
+ pandas_response = pai.chat(query, *dfs)
549
 
550
  # Check if response is meaningful
551
  if pandas_response and str(pandas_response).strip():
 
821
 
822
  # --- Helper Functions for External Integration ---
823
  def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
824
+ rag_docs: Optional[pd.DataFrame] = None) -> EmployerBrandingAgent:
825
  """Factory function to create a new agent instance"""
826
  logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
827
+ return EmployerBrandingAgent(all_dataframes=dataframes, rag_documents_df=rag_docs)
828
 
829
+ async def initialize_agent_async(agent: EmployerBrandingAgent) -> bool:
830
  """Async helper to initialize an agent instance"""
831
  logging.info("Initializing agent via async helper function.")
832
  return await agent.initialize()