GuglielmoTor commited on
Commit
11fceba
·
verified ·
1 Parent(s): 2b8fb33

Update eb_agent_module.py

Browse files
Files changed (1) hide show
  1. eb_agent_module.py +66 -70
eb_agent_module.py CHANGED
@@ -580,78 +580,74 @@ class EmployerBrandingAgent:
580
  else:
581
  return "general"
582
 
583
- async def _generate_pandas_response(self, query: str) -> Tuple[Optional[str], Optional[str], bool]:
584
- """
585
- Generate response using PandasAI for data queries.
586
- Returns: (textual_output_from_pandas_ai, chart_file_path, success_flag)
587
- """
588
- if not self.pandas_agent or not self.pandas_dfs: # Check if pandas_dfs is populated
589
- return "Data analysis tool (PandasAI) is not initialized or no data is loaded.", None, False
590
-
591
- latest_chart_path: Optional[str] = None
592
- textual_pandas_response: Optional[str] = None
593
- pandas_response_raw: Any = None
594
-
595
- try:
596
- logging.info(f"Processing data query with PandasAI: '{query[:100]}...' using {len(self.pandas_dfs)} DataFrame(s).")
597
-
598
- # PandasAI's `chat` method can take multiple DataFrames.
599
- # The `pai.DataFrame` objects are already stored in self.pandas_dfs.
600
- # We pass the original pandas.DataFrame objects wrapped in pai.DataFrame to the agent.
601
- # The `chat` method is called on these pai.DataFrame objects or globally with `pai.chat`.
602
-
603
- dfs_to_query = list(self.pandas_dfs.values()) # List of pai.DataFrame objects
604
-
605
- if not dfs_to_query:
606
- return "No dataframes available for PandasAI to query.", None, False
607
-
608
- # Use pai.chat() for potentially multiple dataframes
609
- # The *dfs_to_query unpacks the list of pai.DataFrame objects
610
- pandas_response_raw = await asyncio.to_thread(pai.chat, query, *dfs_to_query)
611
-
612
- # Check if a chart was generated and saved by PandasAI
613
- # PandasAI should save charts to the path specified in its config.
614
- charts_dir = pai.config.save_charts_path # Get configured path
615
- if charts_dir and os.path.exists(charts_dir):
616
- # Look for the most recently created chart file
617
- chart_files = [os.path.join(charts_dir, f) for f in os.listdir(charts_dir) if f.endswith(('.png', '.jpg', '.jpeg', '.svg'))]
 
 
 
 
 
 
 
618
  if chart_files:
619
- chart_files.sort(key=os.path.getmtime, reverse=True)
620
- latest_chart_path = chart_files[0] # Full path
621
- logging.info(f"Chart detected/generated by PandasAI: {latest_chart_path}")
622
-
623
- # Determine the textual part of the PandasAI response
624
- if isinstance(pandas_response_raw, str):
625
- # If the raw response IS the chart path, then text is minimal.
626
- # This can happen if PandasAI's primary output for a query is a chart.
627
- if latest_chart_path and pandas_response_raw == latest_chart_path:
628
- textual_pandas_response = "A chart was generated to answer your query."
629
- else:
630
- textual_pandas_response = pandas_response_raw.strip()
631
- elif isinstance(pandas_response_raw, (int, float, bool)):
632
- textual_pandas_response = str(pandas_response_raw)
633
- elif isinstance(pandas_response_raw, pd.DataFrame):
634
- # If PandasAI returns a DataFrame, summarize it.
635
- # Avoid sending overly long strings to the LLM.
636
- textual_pandas_response = f"PandasAI returned a data table with {len(pandas_response_raw)} rows. Here are the first few entries:\n{pandas_response_raw.head(3).to_string()}"
637
- elif pandas_response_raw is not None: # Other types
638
- textual_pandas_response = str(pandas_response_raw).strip()
639
-
640
- # If no textual response formed yet, but a chart exists, make a note.
641
- if not textual_pandas_response and latest_chart_path:
642
- textual_pandas_response = "A chart was generated as a result of the analysis."
643
- elif textual_pandas_response is None and latest_chart_path is None: # No output at all
644
- textual_pandas_response = "PandasAI processed the query but did not return a specific textual result or chart."
645
-
646
- return textual_pandas_response, latest_chart_path, True
647
 
648
- except Exception as e:
649
- logging.error(f"Error during PandasAI processing for query '{query[:100]}...': {e}", exc_info=True)
650
- # Try to provide a more user-friendly error message if possible
651
- error_msg = f"I encountered an issue while analyzing the data with the data tool: {type(e).__name__}."
652
- if "duckdb" in str(e).lower() and "binder error" in str(e).lower():
653
- error_msg += " This might be due to a mismatch in data types or an unsupported operation on the data."
654
- return error_msg, None, False
 
 
655
 
656
  async def _generate_enhanced_response(self, query: str, pandas_result: str = "", query_type: str = "general") -> str:
657
  """Generate enhanced response combining PandasAI results with RAG context"""
 
580
  else:
581
  return "general"
582
 
583
+ async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
584
+ """Generate response using PandasAI for data queries"""
585
+ if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
586
+ return "Data analysis not available - PandasAI not initialized.", False
587
+
588
+ try:
589
+ logging.info(f"Processing data query with PandasAI: {query[:100]}...")
590
+
591
+ # Clear any existing matplotlib figures to avoid conflicts
592
+ import matplotlib.pyplot as plt
593
+ plt.clf()
594
+ plt.close('all')
595
+
596
+ # Use the first available dataframe for single-df queries
597
+ if len(self.pandas_dfs) == 1:
598
+ df = list(self.pandas_dfs.values())[0]
599
+ logging.info(f"Using single DataFrame for query with shape: {df.df.shape}")
600
+ pandas_response = df.chat(query)
601
+ else:
602
+ # For multiple dataframes, use pai.chat with all dfs
603
+ dfs = list(self.pandas_dfs.values())
604
+ pandas_response = pai.chat(query, *dfs)
605
+
606
+ # Handle different response types
607
+ response_text = ""
608
+ chart_info = ""
609
+
610
+ # Check if response is a plot path or contains plot information
611
+ if isinstance(pandas_response, str) and pandas_response.endswith(('.png', '.jpg', '.jpeg', '.svg')):
612
+ # Response is a chart path
613
+ chart_info = f"\n\n📊 **Chart Generated**: {os.path.basename(pandas_response)}\nChart saved at: {pandas_response}"
614
+ response_text = "Analysis completed with visualization"
615
+ logging.info(f"Chart generated: {pandas_response}")
616
+ elif hasattr(pandas_response, 'plot_path') and pandas_response.plot_path:
617
+ # Response object has plot path
618
+ chart_info = f"\n\n📊 **Chart Generated**: {os.path.basename(pandas_response.plot_path)}\nChart saved at: {pandas_response.plot_path}"
619
+ response_text = getattr(pandas_response, 'text', str(pandas_response))
620
+ logging.info(f"Chart generated: {pandas_response.plot_path}")
621
+ else:
622
+ # Check for any new chart files in the charts directory
623
+ if os.path.exists(self.charts_dir):
624
+ chart_files = [f for f in os.listdir(self.charts_dir) if f.endswith(('.png', '.jpg', '.jpeg', '.svg'))]
625
  if chart_files:
626
+ # Get the most recent chart file
627
+ chart_files.sort(key=lambda x: os.path.getmtime(os.path.join(self.charts_dir, x)), reverse=True)
628
+ latest_chart = chart_files[0]
629
+ chart_path = os.path.join(self.charts_dir, latest_chart)
630
+ # Check if this chart was created in the last 30 seconds (likely from this query)
631
+ import time
632
+ if time.time() - os.path.getmtime(chart_path) < 30:
633
+ chart_info = f"\n\n📊 **Chart Generated**: {latest_chart}\nChart saved at: {chart_path}"
634
+ logging.info(f"Chart generated: {chart_path}")
635
+
636
+ # Handle text response
637
+ if pandas_response and str(pandas_response).strip():
638
+ response_text = str(pandas_response).strip()
639
+ else:
640
+ response_text = "Analysis completed"
 
 
 
 
 
 
 
 
 
 
 
 
 
641
 
642
+ final_response = response_text + chart_info
643
+ return final_response, True
644
+
645
+ except Exception as e:
646
+ logging.error(f"Error in PandasAI processing: {e}", exc_info=True)
647
+ # Try to provide a more helpful error message
648
+ if "Invalid output" in str(e) and "plot save path" in str(e):
649
+ return "I tried to create a visualization but encountered a formatting issue. Please try rephrasing your request or ask for specific data without requesting a chart.", False
650
+
651
 
652
  async def _generate_enhanced_response(self, query: str, pandas_result: str = "", query_type: str = "general") -> str:
653
  """Generate enhanced response combining PandasAI results with RAG context"""