Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on May 26

Commit

0533496

verified ·

1 Parent(s): 45dda21

Update eb_agent_module.py

Browse files

Files changed (1) hide show

eb_agent_module.py +36 -48

eb_agent_module.py CHANGED Viewed

@@ -8,10 +8,8 @@ import textwrap
 from datetime import datetime
 from typing import Dict, List, Optional, Union, Any
 import traceback
-from pandasai import Agent, SmartDataframe
-from pandasai.llm import GoogleGemini
-from pandasai.responses.response_parser import ResponseParser
-from pandasai.middlewares.base import BaseMiddleware
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
@@ -362,56 +360,38 @@ class EnhancedEmployerBrandingAgent:
         logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
     def _initialize_pandas_agent(self):
-        """Initialize PandasAI Agent with enhanced configuration"""
         if not self.all_dataframes or not GEMINI_API_KEY:
             logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
             return
         try:
-            # Convert DataFrames to SmartDataframes with descriptive names
-            smart_dfs = []
             for name, df in self.all_dataframes.items():
-                # Add metadata to help PandasAI understand the data better
                 df_description = self._generate_dataframe_description(name, df)
-                smart_df = SmartDataframe(
-                    df,
-                    name=name,
-                    description=df_description
-                )
-                smart_dfs.append(smart_df)
-            # Configure PandasAI with Gemini
-            pandas_llm = GoogleGemini(
-                api_token=GEMINI_API_KEY,
-                model=self.llm_model_name,
-                temperature=0.7,
-                top_p=0.95,
-                top_k=40,
-                max_output_tokens=4096
-            )
-            # Create agent with enhanced configuration
-            self.pandas_agent = Agent(
-                dfs=smart_dfs,
-                config={
-                    "llm": pandas_llm,
-                    "verbose": True,
-                    "enable_cache": True,
-                    "save_charts": True,
-                    "save_charts_path": "charts/",
-                    "custom_whitelisted_dependencies": ["matplotlib", "seaborn", "plotly"],
-                    "middlewares": [EmployerBrandingMiddleware()],
-                    "response_parser": ResponseParser,
-                    "max_retries": 3,
-                    "conversational": True
-                }
-            )
-            logging.info(f"PandasAI agent initialized successfully with {len(smart_dfs)} DataFrames")
         except Exception as e:
-            logging.error(f"Failed to initialize PandasAI agent: {e}", exc_info=True)
             self.pandas_agent = None
     def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
         """Generate a descriptive summary for PandasAI to better understand the data"""
@@ -551,13 +531,21 @@ class EnhancedEmployerBrandingAgent:
     async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
         """Generate response using PandasAI for data queries"""
-        if not self.pandas_agent:
-            return "Data analysis not available - PandasAI agent not initialized.", False
         try:
-            # Use PandasAI to analyze the data
             logging.info(f"Processing data query with PandasAI: {query[:100]}...")
-            pandas_response = self.pandas_agent.chat(query)
             # Check if response is meaningful
             if pandas_response and str(pandas_response).strip():

 from datetime import datetime
 from typing import Dict, List, Optional, Union, Any
 import traceback
+import pandasai as pai
+from pandasai_litellm import LiteLLM
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
         logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
     def _initialize_pandas_agent(self):
+        """Initialize PandasAI with enhanced configuration"""
         if not self.all_dataframes or not GEMINI_API_KEY:
             logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
             return
         try:
+            # Configure LiteLLM with Gemini
+            llm = LiteLLM(model="gemini-2.5-flash-preview-05-20")
+            # Set PandasAI configuration
+            pai.config.set({
+                "llm": llm,
+                "temperature": 0.7,
+                "verbose": True,
+                "enable_cache": True
+            })
+            # Store dataframes for chat queries (we'll use them directly)
+            self.pandas_dfs = {}
             for name, df in self.all_dataframes.items():
+                # Convert to PandasAI DataFrame with description
                 df_description = self._generate_dataframe_description(name, df)
+                pandas_df = pai.DataFrame(df, description=df_description)
+                self.pandas_dfs[name] = pandas_df
+            self.pandas_agent = True  # Flag to indicate PandasAI is ready
+            logging.info(f"PandasAI initialized successfully with {len(self.pandas_dfs)} DataFrames")
         except Exception as e:
+            logging.error(f"Failed to initialize PandasAI: {e}", exc_info=True)
             self.pandas_agent = None
+            self.pandas_dfs = {}
     def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
         """Generate a descriptive summary for PandasAI to better understand the data"""
     async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
         """Generate response using PandasAI for data queries"""
+        if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
+            return "Data analysis not available - PandasAI not initialized.", False
         try:
             logging.info(f"Processing data query with PandasAI: {query[:100]}...")
+            # Use the first available dataframe for single-df queries
+            # For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
+            if len(self.pandas_dfs) == 1:
+                df = list(self.pandas_dfs.values())[0]
+                pandas_response = df.chat(query)
+            else:
+                # For multiple dataframes, use pai.chat with all dfs
+                dfs = list(self.pandas_dfs.values())
+                pandas_response = pai.chat(query, *dfs)
             # Check if response is meaningful
             if pandas_response and str(pandas_response).strip():