Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on May 26

Commit

11fceba

verified ·

1 Parent(s): 2b8fb33

Update eb_agent_module.py

Browse files

Files changed (1) hide show

eb_agent_module.py +66 -70

eb_agent_module.py CHANGED Viewed

@@ -580,78 +580,74 @@ class EmployerBrandingAgent:
         else:
             return "general"
-    async def _generate_pandas_response(self, query: str) -> Tuple[Optional[str], Optional[str], bool]:
-        """
-        Generate response using PandasAI for data queries.
-        Returns: (textual_output_from_pandas_ai, chart_file_path, success_flag)
-        """
-        if not self.pandas_agent or not self.pandas_dfs: # Check if pandas_dfs is populated
-            return "Data analysis tool (PandasAI) is not initialized or no data is loaded.", None, False
-        latest_chart_path: Optional[str] = None
-        textual_pandas_response: Optional[str] = None
-        pandas_response_raw: Any = None
-        try:
-            logging.info(f"Processing data query with PandasAI: '{query[:100]}...' using {len(self.pandas_dfs)} DataFrame(s).")
-            # PandasAI's `chat` method can take multiple DataFrames.
-            # The `pai.DataFrame` objects are already stored in self.pandas_dfs.
-            # We pass the original pandas.DataFrame objects wrapped in pai.DataFrame to the agent.
-            # The `chat` method is called on these pai.DataFrame objects or globally with `pai.chat`.
-            dfs_to_query = list(self.pandas_dfs.values()) # List of pai.DataFrame objects
-            if not dfs_to_query:
-                 return "No dataframes available for PandasAI to query.", None, False
-            # Use pai.chat() for potentially multiple dataframes
-            # The *dfs_to_query unpacks the list of pai.DataFrame objects
-            pandas_response_raw = await asyncio.to_thread(pai.chat, query, *dfs_to_query)
-            # Check if a chart was generated and saved by PandasAI
-            # PandasAI should save charts to the path specified in its config.
-            charts_dir = pai.config.save_charts_path # Get configured path
-            if charts_dir and os.path.exists(charts_dir):
-                # Look for the most recently created chart file
-                chart_files = [os.path.join(charts_dir, f) for f in os.listdir(charts_dir) if f.endswith(('.png', '.jpg', '.jpeg', '.svg'))]
                 if chart_files:
-                    chart_files.sort(key=os.path.getmtime, reverse=True)
-                    latest_chart_path = chart_files[0] # Full path
-                    logging.info(f"Chart detected/generated by PandasAI: {latest_chart_path}")
-            # Determine the textual part of the PandasAI response
-            if isinstance(pandas_response_raw, str):
-                # If the raw response IS the chart path, then text is minimal.
-                # This can happen if PandasAI's primary output for a query is a chart.
-                if latest_chart_path and pandas_response_raw == latest_chart_path:
-                    textual_pandas_response = "A chart was generated to answer your query."
-                else:
-                    textual_pandas_response = pandas_response_raw.strip()
-            elif isinstance(pandas_response_raw, (int, float, bool)):
-                textual_pandas_response = str(pandas_response_raw)
-            elif isinstance(pandas_response_raw, pd.DataFrame):
-                # If PandasAI returns a DataFrame, summarize it.
-                # Avoid sending overly long strings to the LLM.
-                textual_pandas_response = f"PandasAI returned a data table with {len(pandas_response_raw)} rows. Here are the first few entries:\n{pandas_response_raw.head(3).to_string()}"
-            elif pandas_response_raw is not None: # Other types
-                textual_pandas_response = str(pandas_response_raw).strip()
-            # If no textual response formed yet, but a chart exists, make a note.
-            if not textual_pandas_response and latest_chart_path:
-                textual_pandas_response = "A chart was generated as a result of the analysis."
-            elif textual_pandas_response is None and latest_chart_path is None: # No output at all
-                 textual_pandas_response = "PandasAI processed the query but did not return a specific textual result or chart."
-            return textual_pandas_response, latest_chart_path, True
-        except Exception as e:
-            logging.error(f"Error during PandasAI processing for query '{query[:100]}...': {e}", exc_info=True)
-            # Try to provide a more user-friendly error message if possible
-            error_msg = f"I encountered an issue while analyzing the data with the data tool: {type(e).__name__}."
-            if "duckdb" in str(e).lower() and "binder error" in str(e).lower():
-                error_msg += " This might be due to a mismatch in data types or an unsupported operation on the data."
-            return error_msg, None, False
     async def _generate_enhanced_response(self, query: str, pandas_result: str = "", query_type: str = "general") -> str:
         """Generate enhanced response combining PandasAI results with RAG context"""

         else:
             return "general"
+async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
+    """Generate response using PandasAI for data queries"""
+    if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
+        return "Data analysis not available - PandasAI not initialized.", False
+    try:
+        logging.info(f"Processing data query with PandasAI: {query[:100]}...")
+        # Clear any existing matplotlib figures to avoid conflicts
+        import matplotlib.pyplot as plt
+        plt.clf()
+        plt.close('all')
+        # Use the first available dataframe for single-df queries
+        if len(self.pandas_dfs) == 1:
+            df = list(self.pandas_dfs.values())[0]
+            logging.info(f"Using single DataFrame for query with shape: {df.df.shape}")
+            pandas_response = df.chat(query)
+        else:
+            # For multiple dataframes, use pai.chat with all dfs
+            dfs = list(self.pandas_dfs.values())
+            pandas_response = pai.chat(query, *dfs)
+        # Handle different response types
+        response_text = ""
+        chart_info = ""
+        # Check if response is a plot path or contains plot information
+        if isinstance(pandas_response, str) and pandas_response.endswith(('.png', '.jpg', '.jpeg', '.svg')):
+            # Response is a chart path
+            chart_info = f"\n\n📊 **Chart Generated**: {os.path.basename(pandas_response)}\nChart saved at: {pandas_response}"
+            response_text = "Analysis completed with visualization"
+            logging.info(f"Chart generated: {pandas_response}")
+        elif hasattr(pandas_response, 'plot_path') and pandas_response.plot_path:
+            # Response object has plot path
+            chart_info = f"\n\n📊 **Chart Generated**: {os.path.basename(pandas_response.plot_path)}\nChart saved at: {pandas_response.plot_path}"
+            response_text = getattr(pandas_response, 'text', str(pandas_response))
+            logging.info(f"Chart generated: {pandas_response.plot_path}")
+        else:
+            # Check for any new chart files in the charts directory
+            if os.path.exists(self.charts_dir):
+                chart_files = [f for f in os.listdir(self.charts_dir) if f.endswith(('.png', '.jpg', '.jpeg', '.svg'))]
                 if chart_files:
+                    # Get the most recent chart file
+                    chart_files.sort(key=lambda x: os.path.getmtime(os.path.join(self.charts_dir, x)), reverse=True)
+                    latest_chart = chart_files[0]
+                    chart_path = os.path.join(self.charts_dir, latest_chart)
+                    # Check if this chart was created in the last 30 seconds (likely from this query)
+                    import time
+                    if time.time() - os.path.getmtime(chart_path) < 30:
+                        chart_info = f"\n\n📊 **Chart Generated**: {latest_chart}\nChart saved at: {chart_path}"
+                        logging.info(f"Chart generated: {chart_path}")
+            # Handle text response
+            if pandas_response and str(pandas_response).strip():
+                response_text = str(pandas_response).strip()
+            else:
+                response_text = "Analysis completed"
+        final_response = response_text + chart_info
+        return final_response, True
+    except Exception as e:
+        logging.error(f"Error in PandasAI processing: {e}", exc_info=True)
+        # Try to provide a more helpful error message
+        if "Invalid output" in str(e) and "plot save path" in str(e):
+            return "I tried to create a visualization but encountered a formatting issue. Please try rephrasing your request or ask for specific data without requesting a chart.", False
     async def _generate_enhanced_response(self, query: str, pandas_result: str = "", query_type: str = "general") -> str:
         """Generate enhanced response combining PandasAI results with RAG context"""