GuglielmoTor commited on
Commit
0533496
·
verified ·
1 Parent(s): 45dda21

Update eb_agent_module.py

Browse files
Files changed (1) hide show
  1. eb_agent_module.py +36 -48
eb_agent_module.py CHANGED
@@ -8,10 +8,8 @@ import textwrap
8
  from datetime import datetime
9
  from typing import Dict, List, Optional, Union, Any
10
  import traceback
11
- from pandasai import Agent, SmartDataframe
12
- from pandasai.llm import GoogleGemini
13
- from pandasai.responses.response_parser import ResponseParser
14
- from pandasai.middlewares.base import BaseMiddleware
15
 
16
  # Configure logging
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
@@ -362,56 +360,38 @@ class EnhancedEmployerBrandingAgent:
362
  logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
363
 
364
  def _initialize_pandas_agent(self):
365
- """Initialize PandasAI Agent with enhanced configuration"""
366
  if not self.all_dataframes or not GEMINI_API_KEY:
367
  logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
368
  return
369
-
370
  try:
371
- # Convert DataFrames to SmartDataframes with descriptive names
372
- smart_dfs = []
 
 
 
 
 
 
 
 
 
 
 
373
  for name, df in self.all_dataframes.items():
374
- # Add metadata to help PandasAI understand the data better
375
  df_description = self._generate_dataframe_description(name, df)
376
- smart_df = SmartDataframe(
377
- df,
378
- name=name,
379
- description=df_description
380
- )
381
- smart_dfs.append(smart_df)
382
-
383
- # Configure PandasAI with Gemini
384
- pandas_llm = GoogleGemini(
385
- api_token=GEMINI_API_KEY,
386
- model=self.llm_model_name,
387
- temperature=0.7,
388
- top_p=0.95,
389
- top_k=40,
390
- max_output_tokens=4096
391
- )
392
-
393
- # Create agent with enhanced configuration
394
- self.pandas_agent = Agent(
395
- dfs=smart_dfs,
396
- config={
397
- "llm": pandas_llm,
398
- "verbose": True,
399
- "enable_cache": True,
400
- "save_charts": True,
401
- "save_charts_path": "charts/",
402
- "custom_whitelisted_dependencies": ["matplotlib", "seaborn", "plotly"],
403
- "middlewares": [EmployerBrandingMiddleware()],
404
- "response_parser": ResponseParser,
405
- "max_retries": 3,
406
- "conversational": True
407
- }
408
- )
409
 
410
- logging.info(f"PandasAI agent initialized successfully with {len(smart_dfs)} DataFrames")
 
411
 
412
  except Exception as e:
413
- logging.error(f"Failed to initialize PandasAI agent: {e}", exc_info=True)
414
  self.pandas_agent = None
 
415
 
416
  def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
417
  """Generate a descriptive summary for PandasAI to better understand the data"""
@@ -551,13 +531,21 @@ class EnhancedEmployerBrandingAgent:
551
 
552
  async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
553
  """Generate response using PandasAI for data queries"""
554
- if not self.pandas_agent:
555
- return "Data analysis not available - PandasAI agent not initialized.", False
556
 
557
  try:
558
- # Use PandasAI to analyze the data
559
  logging.info(f"Processing data query with PandasAI: {query[:100]}...")
560
- pandas_response = self.pandas_agent.chat(query)
 
 
 
 
 
 
 
 
 
561
 
562
  # Check if response is meaningful
563
  if pandas_response and str(pandas_response).strip():
 
8
  from datetime import datetime
9
  from typing import Dict, List, Optional, Union, Any
10
  import traceback
11
+ import pandasai as pai
12
+ from pandasai_litellm import LiteLLM
 
 
13
 
14
  # Configure logging
15
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
 
360
  logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
361
 
362
  def _initialize_pandas_agent(self):
363
+ """Initialize PandasAI with enhanced configuration"""
364
  if not self.all_dataframes or not GEMINI_API_KEY:
365
  logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
366
  return
367
+
368
  try:
369
+ # Configure LiteLLM with Gemini
370
+ llm = LiteLLM(model="gemini-2.5-flash-preview-05-20")
371
+
372
+ # Set PandasAI configuration
373
+ pai.config.set({
374
+ "llm": llm,
375
+ "temperature": 0.7,
376
+ "verbose": True,
377
+ "enable_cache": True
378
+ })
379
+
380
+ # Store dataframes for chat queries (we'll use them directly)
381
+ self.pandas_dfs = {}
382
  for name, df in self.all_dataframes.items():
383
+ # Convert to PandasAI DataFrame with description
384
  df_description = self._generate_dataframe_description(name, df)
385
+ pandas_df = pai.DataFrame(df, description=df_description)
386
+ self.pandas_dfs[name] = pandas_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
+ self.pandas_agent = True # Flag to indicate PandasAI is ready
389
+ logging.info(f"PandasAI initialized successfully with {len(self.pandas_dfs)} DataFrames")
390
 
391
  except Exception as e:
392
+ logging.error(f"Failed to initialize PandasAI: {e}", exc_info=True)
393
  self.pandas_agent = None
394
+ self.pandas_dfs = {}
395
 
396
  def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
397
  """Generate a descriptive summary for PandasAI to better understand the data"""
 
531
 
532
  async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
533
  """Generate response using PandasAI for data queries"""
534
+ if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
535
+ return "Data analysis not available - PandasAI not initialized.", False
536
 
537
  try:
 
538
  logging.info(f"Processing data query with PandasAI: {query[:100]}...")
539
+
540
+ # Use the first available dataframe for single-df queries
541
+ # For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
542
+ if len(self.pandas_dfs) == 1:
543
+ df = list(self.pandas_dfs.values())[0]
544
+ pandas_response = df.chat(query)
545
+ else:
546
+ # For multiple dataframes, use pai.chat with all dfs
547
+ dfs = list(self.pandas_dfs.values())
548
+ pandas_response = pai.chat(query, *dfs)
549
 
550
  # Check if response is meaningful
551
  if pandas_response and str(pandas_response).strip():