Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on May 26

Commit

205df41

verified ·

1 Parent(s): 76fa038

Update eb_agent_module.py

Browse files

Files changed (1) hide show

eb_agent_module.py +46 -71

eb_agent_module.py CHANGED Viewed

@@ -8,10 +8,8 @@ import textwrap
 from datetime import datetime
 from typing import Dict, List, Optional, Union, Any
 import traceback
-from pandasai import Agent, SmartDataframe
-from pandasai.llm import GoogleGemini
-from pandasai.responses.response_parser import ResponseParser
-from pandasai.middlewares.base import BaseMiddleware
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
@@ -104,19 +102,6 @@ else:
     if not GENAI_AVAILABLE:
         logging.warning("Google GenAI library not available.")
-# --- Custom PandasAI Middleware for Better Integration ---
-class EmployerBrandingMiddleware(BaseMiddleware):
-    """Custom middleware to enhance PandasAI responses with HR context"""
-    def run(self, code: str, **kwargs) -> str:
-        """Add HR-friendly context to generated code"""
-        # Add comments to make code more understandable
-        enhanced_code = f"""
-# HR Analytics Query Processing
-# This code analyzes your LinkedIn employer branding data
-{code}
-"""
-        return enhanced_code
 # --- Utility function to get DataFrame schema representation ---
 def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
@@ -333,7 +318,7 @@ class AdvancedRAGSystem:
             logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
             return ""
-class EnhancedEmployerBrandingAgent:
     def __init__(self,
                  all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
                  rag_documents_df: Optional[pd.DataFrame] = None,
@@ -362,56 +347,38 @@ class EnhancedEmployerBrandingAgent:
         logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
     def _initialize_pandas_agent(self):
-        """Initialize PandasAI Agent with enhanced configuration"""
         if not self.all_dataframes or not GEMINI_API_KEY:
             logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
             return
         try:
-            # Convert DataFrames to SmartDataframes with descriptive names
-            smart_dfs = []
             for name, df in self.all_dataframes.items():
-                # Add metadata to help PandasAI understand the data better
                 df_description = self._generate_dataframe_description(name, df)
-                smart_df = SmartDataframe(
-                    df,
-                    name=name,
-                    description=df_description
-                )
-                smart_dfs.append(smart_df)
-            # Configure PandasAI with Gemini
-            pandas_llm = GoogleGemini(
-                api_token=GEMINI_API_KEY,
-                model=self.llm_model_name,
-                temperature=0.7,
-                top_p=0.95,
-                top_k=40,
-                max_output_tokens=4096
-            )
-            # Create agent with enhanced configuration
-            self.pandas_agent = Agent(
-                dfs=smart_dfs,
-                config={
-                    "llm": pandas_llm,
-                    "verbose": True,
-                    "enable_cache": True,
-                    "save_charts": True,
-                    "save_charts_path": "charts/",
-                    "custom_whitelisted_dependencies": ["matplotlib", "seaborn", "plotly"],
-                    "middlewares": [EmployerBrandingMiddleware()],
-                    "response_parser": ResponseParser,
-                    "max_retries": 3,
-                    "conversational": True
-                }
-            )
-            logging.info(f"PandasAI agent initialized successfully with {len(smart_dfs)} DataFrames")
         except Exception as e:
-            logging.error(f"Failed to initialize PandasAI agent: {e}", exc_info=True)
             self.pandas_agent = None
     def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
         """Generate a descriptive summary for PandasAI to better understand the data"""
@@ -520,18 +487,18 @@ class EnhancedEmployerBrandingAgent:
         3. **Business Impact**: What this means for employer branding strategy
         4. **Recommendations**: Specific, prioritized action items
         5. **Next Steps**: Follow-up suggestions or questions
-        ## Example Language Patterns:
-        - Instead of "DataFrame shows" → "Your LinkedIn data reveals"
-        - Instead of "follower_count column" → "follower growth metrics"
-        - Instead of "engagement_rate variable" → "post engagement performance"
-        - Instead of "dataset analysis" → "performance review"
         ## Key Behaviors:
         - **Data-Driven**: Always ground insights in actual data analysis when possible
         - **Visual When Helpful**: Suggest or create charts that make data more understandable
         - **Proactive**: Identify related insights the user might find valuable
         - **Honest About Limitations**: Clearly state when data doesn't support certain analyses
         Your goal remains to be a trusted partner, but now with powerful data analysis capabilities that enable deeper, more accurate insights for data-driven employer branding decisions.
         """).strip()
@@ -564,13 +531,21 @@ class EnhancedEmployerBrandingAgent:
     async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
         """Generate response using PandasAI for data queries"""
-        if not self.pandas_agent:
-            return "Data analysis not available - PandasAI agent not initialized.", False
         try:
-            # Use PandasAI to analyze the data
             logging.info(f"Processing data query with PandasAI: {query[:100]}...")
-            pandas_response = self.pandas_agent.chat(query)
             # Check if response is meaningful
             if pandas_response and str(pandas_response).strip():
@@ -846,12 +821,12 @@ class EnhancedEmployerBrandingAgent:
 # --- Helper Functions for External Integration ---
 def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
-                          rag_docs: Optional[pd.DataFrame] = None) -> EnhancedEmployerBrandingAgent:
     """Factory function to create a new agent instance"""
     logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
-    return EnhancedEmployerBrandingAgent(all_dataframes=dataframes, rag_documents_df=rag_docs)
-async def initialize_agent_async(agent: EnhancedEmployerBrandingAgent) -> bool:
     """Async helper to initialize an agent instance"""
     logging.info("Initializing agent via async helper function.")
     return await agent.initialize()

 from datetime import datetime
 from typing import Dict, List, Optional, Union, Any
 import traceback
+import pandasai as pai
+from pandasai_litellm import LiteLLM
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
     if not GENAI_AVAILABLE:
         logging.warning("Google GenAI library not available.")
 # --- Utility function to get DataFrame schema representation ---
 def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
             logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
             return ""
+class EmployerBrandingAgent:
     def __init__(self,
                  all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
                  rag_documents_df: Optional[pd.DataFrame] = None,
         logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
     def _initialize_pandas_agent(self):
+        """Initialize PandasAI with enhanced configuration"""
         if not self.all_dataframes or not GEMINI_API_KEY:
             logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
             return
         try:
+            # Configure LiteLLM with Gemini
+            llm = LiteLLM(model="gemini-2.5-flash-preview-05-20")
+            # Set PandasAI configuration
+            pai.config.set({
+                "llm": llm,
+                "temperature": 0.7,
+                "verbose": True,
+                "enable_cache": True
+            })
+            # Store dataframes for chat queries (we'll use them directly)
+            self.pandas_dfs = {}
             for name, df in self.all_dataframes.items():
+                # Convert to PandasAI DataFrame with description
                 df_description = self._generate_dataframe_description(name, df)
+                pandas_df = pai.DataFrame(df, description=df_description)
+                self.pandas_dfs[name] = pandas_df
+            self.pandas_agent = True  # Flag to indicate PandasAI is ready
+            logging.info(f"PandasAI initialized successfully with {len(self.pandas_dfs)} DataFrames")
         except Exception as e:
+            logging.error(f"Failed to initialize PandasAI: {e}", exc_info=True)
             self.pandas_agent = None
+            self.pandas_dfs = {}
     def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
         """Generate a descriptive summary for PandasAI to better understand the data"""
         3. **Business Impact**: What this means for employer branding strategy
         4. **Recommendations**: Specific, prioritized action items
         5. **Next Steps**: Follow-up suggestions or questions
         ## Key Behaviors:
         - **Data-Driven**: Always ground insights in actual data analysis when possible
         - **Visual When Helpful**: Suggest or create charts that make data more understandable
         - **Proactive**: Identify related insights the user might find valuable
         - **Honest About Limitations**: Clearly state when data doesn't support certain analyses
+        ## Example Language Patterns:
+        - Instead of "DataFrame shows" → "Your LinkedIn data reveals"
+        - Instead of "follower_count column" → "follower growth metrics"
+        - Instead of "engagement_rate variable" → "post engagement performance"
+        - Instead of "dataset analysis" → "performance review"
         Your goal remains to be a trusted partner, but now with powerful data analysis capabilities that enable deeper, more accurate insights for data-driven employer branding decisions.
         """).strip()
     async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
         """Generate response using PandasAI for data queries"""
+        if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
+            return "Data analysis not available - PandasAI not initialized.", False
         try:
             logging.info(f"Processing data query with PandasAI: {query[:100]}...")
+            # Use the first available dataframe for single-df queries
+            # For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
+            if len(self.pandas_dfs) == 1:
+                df = list(self.pandas_dfs.values())[0]
+                pandas_response = df.chat(query)
+            else:
+                # For multiple dataframes, use pai.chat with all dfs
+                dfs = list(self.pandas_dfs.values())
+                pandas_response = pai.chat(query, *dfs)
             # Check if response is meaningful
             if pandas_response and str(pandas_response).strip():
 # --- Helper Functions for External Integration ---
 def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
+                          rag_docs: Optional[pd.DataFrame] = None) -> EmployerBrandingAgent:
     """Factory function to create a new agent instance"""
     logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
+    return EmployerBrandingAgent(all_dataframes=dataframes, rag_documents_df=rag_docs)
+async def initialize_agent_async(agent: EmployerBrandingAgent) -> bool:
     """Async helper to initialize an agent instance"""
     logging.info("Initializing agent via async helper function.")
     return await agent.initialize()