Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on May 26

Commit

76fa038

verified ·

1 Parent(s): 9605646

Update eb_agent_module.py

Browse files

Files changed (1) hide show

eb_agent_module.py +78 -40

eb_agent_module.py CHANGED Viewed

@@ -8,8 +8,10 @@ import textwrap
 from datetime import datetime
 from typing import Dict, List, Optional, Union, Any
 import traceback
-import pandasai as pai
-from pandasai_litellm import LiteLLM
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
@@ -102,6 +104,19 @@ else:
     if not GENAI_AVAILABLE:
         logging.warning("Google GenAI library not available.")
 # --- Utility function to get DataFrame schema representation ---
 def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
@@ -318,7 +333,7 @@ class AdvancedRAGSystem:
             logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
             return ""
-class EmployerBrandingAgent:
     def __init__(self,
                  all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
                  rag_documents_df: Optional[pd.DataFrame] = None,
@@ -347,38 +362,56 @@ class EmployerBrandingAgent:
         logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
     def _initialize_pandas_agent(self):
-        """Initialize PandasAI with enhanced configuration"""
         if not self.all_dataframes or not GEMINI_API_KEY:
             logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
             return
         try:
-            # Configure LiteLLM with Gemini
-            llm = LiteLLM(model="gemini-2.5-flash-preview-05-20")
-            # Set PandasAI configuration
-            pai.config.set({
-                "llm": llm,
-                "temperature": 0.7,
-                "verbose": True,
-                "enable_cache": True
-            })
-            # Store dataframes for chat queries (we'll use them directly)
-            self.pandas_dfs = {}
             for name, df in self.all_dataframes.items():
-                # Convert to PandasAI DataFrame with description
                 df_description = self._generate_dataframe_description(name, df)
-                pandas_df = pai.DataFrame(df, description=df_description)
-                self.pandas_dfs[name] = pandas_df
-            self.pandas_agent = True  # Flag to indicate PandasAI is ready
-            logging.info(f"PandasAI initialized successfully with {len(self.pandas_dfs)} DataFrames")
         except Exception as e:
-            logging.error(f"Failed to initialize PandasAI: {e}", exc_info=True)
             self.pandas_agent = None
-            self.pandas_dfs = {}
     def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
         """Generate a descriptive summary for PandasAI to better understand the data"""
@@ -460,6 +493,13 @@ class EmployerBrandingAgent:
         2. **Business Context Translation**: Convert technical analysis results into HR-friendly insights
         3. **Actionable Recommendations**: Provide specific, implementable strategies based on data findings
         4. **Educational Guidance**: Help users understand both the data insights and the LinkedIn analytics concepts
         ## Communication Style:
         - **Natural and Conversational**: Maintain a warm, supportive tone as a helpful colleague
@@ -480,6 +520,12 @@ class EmployerBrandingAgent:
         3. **Business Impact**: What this means for employer branding strategy
         4. **Recommendations**: Specific, prioritized action items
         5. **Next Steps**: Follow-up suggestions or questions
         ## Key Behaviors:
         - **Data-Driven**: Always ground insights in actual data analysis when possible
@@ -518,21 +564,13 @@ class EmployerBrandingAgent:
     async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
         """Generate response using PandasAI for data queries"""
-        if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
-            return "Data analysis not available - PandasAI not initialized.", False
         try:
             logging.info(f"Processing data query with PandasAI: {query[:100]}...")
-            # Use the first available dataframe for single-df queries
-            # For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
-            if len(self.pandas_dfs) == 1:
-                df = list(self.pandas_dfs.values())[0]
-                pandas_response = df.chat(query)
-            else:
-                # For multiple dataframes, use pai.chat with all dfs
-                dfs = list(self.pandas_dfs.values())
-                pandas_response = pai.chat(query, *dfs)
             # Check if response is meaningful
             if pandas_response and str(pandas_response).strip():
@@ -808,12 +846,12 @@ class EmployerBrandingAgent:
 # --- Helper Functions for External Integration ---
 def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
-                          rag_docs: Optional[pd.DataFrame] = None) -> EmployerBrandingAgent:
     """Factory function to create a new agent instance"""
     logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
-    return EmployerBrandingAgent(all_dataframes=dataframes, rag_documents_df=rag_docs)
-async def initialize_agent_async(agent: EmployerBrandingAgent) -> bool:
     """Async helper to initialize an agent instance"""
     logging.info("Initializing agent via async helper function.")
     return await agent.initialize()

 from datetime import datetime
 from typing import Dict, List, Optional, Union, Any
 import traceback
+from pandasai import Agent, SmartDataframe
+from pandasai.llm import GoogleGemini
+from pandasai.responses.response_parser import ResponseParser
+from pandasai.middlewares.base import BaseMiddleware
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
     if not GENAI_AVAILABLE:
         logging.warning("Google GenAI library not available.")
+# --- Custom PandasAI Middleware for Better Integration ---
+class EmployerBrandingMiddleware(BaseMiddleware):
+    """Custom middleware to enhance PandasAI responses with HR context"""
+    def run(self, code: str, **kwargs) -> str:
+        """Add HR-friendly context to generated code"""
+        # Add comments to make code more understandable
+        enhanced_code = f"""
+# HR Analytics Query Processing
+# This code analyzes your LinkedIn employer branding data
+{code}
+"""
+        return enhanced_code
 # --- Utility function to get DataFrame schema representation ---
 def get_df_schema_representation(df: pd.DataFrame, df_name: str) -> str:
             logging.error(f"Error during RAG retrieval for query '{query[:50]}...': {e}", exc_info=True)
             return ""
+class EnhancedEmployerBrandingAgent:
     def __init__(self,
                  all_dataframes: Optional[Dict[str, pd.DataFrame]] = None,
                  rag_documents_df: Optional[pd.DataFrame] = None,
         logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
     def _initialize_pandas_agent(self):
+        """Initialize PandasAI Agent with enhanced configuration"""
         if not self.all_dataframes or not GEMINI_API_KEY:
             logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
             return
         try:
+            # Convert DataFrames to SmartDataframes with descriptive names
+            smart_dfs = []
             for name, df in self.all_dataframes.items():
+                # Add metadata to help PandasAI understand the data better
                 df_description = self._generate_dataframe_description(name, df)
+                smart_df = SmartDataframe(
+                    df,
+                    name=name,
+                    description=df_description
+                )
+                smart_dfs.append(smart_df)
+            # Configure PandasAI with Gemini
+            pandas_llm = GoogleGemini(
+                api_token=GEMINI_API_KEY,
+                model=self.llm_model_name,
+                temperature=0.7,
+                top_p=0.95,
+                top_k=40,
+                max_output_tokens=4096
+            )
+            # Create agent with enhanced configuration
+            self.pandas_agent = Agent(
+                dfs=smart_dfs,
+                config={
+                    "llm": pandas_llm,
+                    "verbose": True,
+                    "enable_cache": True,
+                    "save_charts": True,
+                    "save_charts_path": "charts/",
+                    "custom_whitelisted_dependencies": ["matplotlib", "seaborn", "plotly"],
+                    "middlewares": [EmployerBrandingMiddleware()],
+                    "response_parser": ResponseParser,
+                    "max_retries": 3,
+                    "conversational": True
+                }
+            )
+            logging.info(f"PandasAI agent initialized successfully with {len(smart_dfs)} DataFrames")
         except Exception as e:
+            logging.error(f"Failed to initialize PandasAI agent: {e}", exc_info=True)
             self.pandas_agent = None
     def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
         """Generate a descriptive summary for PandasAI to better understand the data"""
         2. **Business Context Translation**: Convert technical analysis results into HR-friendly insights
         3. **Actionable Recommendations**: Provide specific, implementable strategies based on data findings
         4. **Educational Guidance**: Help users understand both the data insights and the LinkedIn analytics concepts
+        ## CRITICAL COMMUNICATION RULES:
+        - **NEVER show code, technical commands, or programming syntax**
+        - **NEVER mention dataset names, column names, or technical data structure details**
+        - **NEVER reference DataFrames, schemas, or database terminology**
+        - **Always speak in business terms**: refer to "your LinkedIn data", "follower metrics", "engagement data", etc.
+        - **Focus on insights, not methods**: explain what the data shows, not how it was analyzed
         ## Communication Style:
         - **Natural and Conversational**: Maintain a warm, supportive tone as a helpful colleague
         3. **Business Impact**: What this means for employer branding strategy
         4. **Recommendations**: Specific, prioritized action items
         5. **Next Steps**: Follow-up suggestions or questions
+        ## Example Language Patterns:
+        - Instead of "DataFrame shows" → "Your LinkedIn data reveals"
+        - Instead of "follower_count column" → "follower growth metrics"
+        - Instead of "engagement_rate variable" → "post engagement performance"
+        - Instead of "dataset analysis" → "performance review"
         ## Key Behaviors:
         - **Data-Driven**: Always ground insights in actual data analysis when possible
     async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
         """Generate response using PandasAI for data queries"""
+        if not self.pandas_agent:
+            return "Data analysis not available - PandasAI agent not initialized.", False
         try:
+            # Use PandasAI to analyze the data
             logging.info(f"Processing data query with PandasAI: {query[:100]}...")
+            pandas_response = self.pandas_agent.chat(query)
             # Check if response is meaningful
             if pandas_response and str(pandas_response).strip():
 # --- Helper Functions for External Integration ---
 def create_agent_instance(dataframes: Optional[Dict[str, pd.DataFrame]] = None,
+                          rag_docs: Optional[pd.DataFrame] = None) -> EnhancedEmployerBrandingAgent:
     """Factory function to create a new agent instance"""
     logging.info("Creating new EnhancedEmployerBrandingAgent instance via helper function.")
+    return EnhancedEmployerBrandingAgent(all_dataframes=dataframes, rag_documents_df=rag_docs)
+async def initialize_agent_async(agent: EnhancedEmployerBrandingAgent) -> bool:
     """Async helper to initialize an agent instance"""
     logging.info("Initializing agent via async helper function.")
     return await agent.initialize()