Spaces:
Running
Running
Update eb_agent_module.py
Browse files- eb_agent_module.py +103 -102
eb_agent_module.py
CHANGED
@@ -649,113 +649,114 @@ class EmployerBrandingAgent:
|
|
649 |
else:
|
650 |
return "general"
|
651 |
|
652 |
-
# Replace the _generate_pandas_response method and everything after it with this properly indented code:
|
653 |
-
|
654 |
-
async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
|
655 |
-
"""Generate response using PandasAI with enhanced error handling"""
|
656 |
-
if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
|
657 |
-
return "Data analysis not available - PandasAI not initialized.", False
|
658 |
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
import matplotlib.pyplot as plt
|
664 |
-
plt.clf()
|
665 |
-
plt.close('all')
|
666 |
-
|
667 |
-
# Enhanced query processing based on content
|
668 |
-
processed_query = query
|
669 |
-
|
670 |
-
# Add helpful context for common chart requests
|
671 |
-
if any(word in query.lower() for word in ['chart', 'graph', 'plot', 'visualize']):
|
672 |
-
if 'monthly' in query.lower() and 'follower' in query.lower():
|
673 |
-
processed_query += """.
|
674 |
-
Use the monthly gains data (follower_count_type='follower_gains_monthly')
|
675 |
-
and use the extracted_date or month_name column for the x-axis.
|
676 |
-
Make sure to filter out any null dates and sort by date.
|
677 |
-
Create a clear line chart showing the trend over time."""
|
678 |
-
elif 'cumulative' in query.lower() and 'follower' in query.lower():
|
679 |
-
processed_query += """.
|
680 |
-
Use the cumulative data (follower_count_type='follower_count_cumulative')
|
681 |
-
and create a chart showing the total follower growth over time."""
|
682 |
-
|
683 |
-
# Execute the query
|
684 |
-
if len(self.pandas_dfs) == 1:
|
685 |
-
df = list(self.pandas_dfs.values())[0]
|
686 |
-
logging.info(f"Using single DataFrame for query with shape: {df.df.shape}")
|
687 |
-
pandas_response = df.chat(processed_query)
|
688 |
-
else:
|
689 |
-
dfs = list(self.pandas_dfs.values())
|
690 |
-
pandas_response = pai.chat(processed_query, *dfs)
|
691 |
-
|
692 |
-
# Enhanced response processing
|
693 |
-
response_text = ""
|
694 |
-
chart_info = ""
|
695 |
-
|
696 |
-
# Check for chart generation
|
697 |
-
chart_path = None
|
698 |
-
|
699 |
-
# Method 1: Direct path response
|
700 |
-
if isinstance(pandas_response, str) and pandas_response.endswith(('.png', '.jpg', '.jpeg', '.svg')):
|
701 |
-
chart_path = pandas_response
|
702 |
-
response_text = "Analysis completed with visualization"
|
703 |
|
704 |
-
|
705 |
-
|
706 |
-
chart_path = pandas_response.plot_path
|
707 |
-
response_text = getattr(pandas_response, 'text', str(pandas_response))
|
708 |
-
|
709 |
-
# Method 3: Check charts directory for new files
|
710 |
-
else:
|
711 |
-
if os.path.exists(self.charts_dir):
|
712 |
-
# Get all chart files sorted by modification time
|
713 |
-
chart_files = []
|
714 |
-
for f in os.listdir(self.charts_dir):
|
715 |
-
if f.endswith(('.png', '.jpg', '.jpeg', '.svg')):
|
716 |
-
full_path = os.path.join(self.charts_dir, f)
|
717 |
-
chart_files.append((full_path, os.path.getmtime(full_path)))
|
718 |
-
|
719 |
-
if chart_files:
|
720 |
-
# Sort by modification time (newest first)
|
721 |
-
chart_files.sort(key=lambda x: x[1], reverse=True)
|
722 |
-
latest_chart_path, latest_time = chart_files[0]
|
723 |
-
|
724 |
-
# Check if created in last 60 seconds
|
725 |
-
import time
|
726 |
-
if time.time() - latest_time < 60:
|
727 |
-
chart_path = latest_chart_path
|
728 |
-
logging.info(f"Found recent chart: {chart_path}")
|
729 |
|
730 |
-
#
|
731 |
-
|
732 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
733 |
else:
|
734 |
-
|
735 |
-
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
|
747 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
748 |
|
749 |
-
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
|
|
|
|
|
|
|
|
759 |
|
760 |
async def _generate_enhanced_response(self, query: str, pandas_result: str = "", query_type: str = "general") -> str:
|
761 |
"""Generate enhanced response combining PandasAI results with RAG context"""
|
|
|
649 |
else:
|
650 |
return "general"
|
651 |
|
652 |
+
# Replace the _generate_pandas_response method and everything after it with this properly indented code:
|
|
|
|
|
|
|
|
|
|
|
653 |
|
654 |
+
async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
|
655 |
+
"""Generate response using PandasAI with enhanced error handling"""
|
656 |
+
if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
|
657 |
+
return "Data analysis not available - PandasAI not initialized.", False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
658 |
|
659 |
+
try:
|
660 |
+
logging.info(f"Processing data query with PandasAI: {query[:100]}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
661 |
|
662 |
+
# Clear any existing matplotlib figures
|
663 |
+
import matplotlib.pyplot as plt
|
664 |
+
plt.clf()
|
665 |
+
plt.close('all')
|
666 |
+
|
667 |
+
# Enhanced query processing based on content
|
668 |
+
processed_query = query
|
669 |
+
|
670 |
+
# Add helpful context for common chart requests
|
671 |
+
if any(word in query.lower() for word in ['chart', 'graph', 'plot', 'visualize']):
|
672 |
+
if 'monthly' in query.lower() and 'follower' in query.lower():
|
673 |
+
processed_query += """.
|
674 |
+
Use the monthly gains data (follower_count_type='follower_gains_monthly')
|
675 |
+
and use the extracted_date or month_name column for the x-axis.
|
676 |
+
Make sure to filter out any null dates and sort by date.
|
677 |
+
Create a clear line chart showing the trend over time."""
|
678 |
+
elif 'cumulative' in query.lower() and 'follower' in query.lower():
|
679 |
+
processed_query += """.
|
680 |
+
Use the cumulative data (follower_count_type='follower_count_cumulative')
|
681 |
+
and create a chart showing the total follower growth over time."""
|
682 |
+
|
683 |
+
# Execute the query
|
684 |
+
if len(self.pandas_dfs) == 1:
|
685 |
+
df = list(self.pandas_dfs.values())[0]
|
686 |
+
logging.info(f"Using single DataFrame for query with shape: {df.df.shape}")
|
687 |
+
pandas_response = df.chat(processed_query)
|
688 |
else:
|
689 |
+
dfs = list(self.pandas_dfs.values())
|
690 |
+
pandas_response = pai.chat(processed_query, *dfs)
|
691 |
+
|
692 |
+
# Enhanced response processing
|
693 |
+
response_text = ""
|
694 |
+
chart_info = ""
|
695 |
+
|
696 |
+
# Check for chart generation
|
697 |
+
chart_path = None
|
698 |
+
|
699 |
+
# Method 1: Direct path response
|
700 |
+
if isinstance(pandas_response, str) and pandas_response.endswith(('.png', '.jpg', '.jpeg', '.svg')):
|
701 |
+
chart_path = pandas_response
|
702 |
+
response_text = "Analysis completed with visualization"
|
703 |
+
|
704 |
+
# Method 2: Response object with plot path
|
705 |
+
elif hasattr(pandas_response, 'plot_path') and pandas_response.plot_path:
|
706 |
+
chart_path = pandas_response.plot_path
|
707 |
+
response_text = getattr(pandas_response, 'text', str(pandas_response))
|
708 |
+
|
709 |
+
# Method 3: Check charts directory for new files
|
710 |
+
else:
|
711 |
+
if os.path.exists(self.charts_dir):
|
712 |
+
# Get all chart files sorted by modification time
|
713 |
+
chart_files = []
|
714 |
+
for f in os.listdir(self.charts_dir):
|
715 |
+
if f.endswith(('.png', '.jpg', '.jpeg', '.svg')):
|
716 |
+
full_path = os.path.join(self.charts_dir, f)
|
717 |
+
chart_files.append((full_path, os.path.getmtime(full_path)))
|
718 |
+
|
719 |
+
if chart_files:
|
720 |
+
# Sort by modification time (newest first)
|
721 |
+
chart_files.sort(key=lambda x: x[1], reverse=True)
|
722 |
+
latest_chart_path, latest_time = chart_files[0]
|
723 |
+
|
724 |
+
# Check if created in last 60 seconds
|
725 |
+
import time
|
726 |
+
if time.time() - latest_time < 60:
|
727 |
+
chart_path = latest_chart_path
|
728 |
+
logging.info(f"Found recent chart: {chart_path}")
|
729 |
+
|
730 |
+
# Handle text response
|
731 |
+
if pandas_response and str(pandas_response).strip():
|
732 |
+
response_text = str(pandas_response).strip()
|
733 |
+
else:
|
734 |
+
response_text = "Analysis completed"
|
735 |
+
|
736 |
+
# Format final response
|
737 |
+
if chart_path and os.path.exists(chart_path):
|
738 |
+
chart_info = f"\n\n📊 **Chart Generated**: {os.path.basename(chart_path)}\nChart saved at: {chart_path}"
|
739 |
+
logging.info(f"Chart successfully generated: {chart_path}")
|
740 |
+
|
741 |
+
final_response = response_text + chart_info
|
742 |
+
success = True
|
743 |
+
|
744 |
+
return final_response, success
|
745 |
|
746 |
+
except Exception as e:
|
747 |
+
logging.error(f"Error in PandasAI processing: {e}", exc_info=True)
|
748 |
+
|
749 |
+
# Enhanced error handling
|
750 |
+
error_str = str(e).lower()
|
751 |
+
if "matplotlib" in error_str and "none" in error_str:
|
752 |
+
return "I encountered a data visualization error. This might be due to missing or null values in your data. Please try asking for the raw data first, or specify which specific columns you'd like to analyze.", False
|
753 |
+
elif "strftime" in error_str:
|
754 |
+
return "I encountered a date formatting issue. Please try asking for the data without specific date formatting, or ask me to show the raw data structure first.", False
|
755 |
+
elif "ambiguous" in error_str:
|
756 |
+
return "I encountered an ambiguous data type issue. Please try being more specific about which data you'd like to analyze (e.g., 'show monthly follower gains' vs 'show cumulative followers').", False
|
757 |
+
else:
|
758 |
+
return f"Error processing data query: {str(e)}", False
|
759 |
+
|
760 |
|
761 |
async def _generate_enhanced_response(self, query: str, pandas_result: str = "", query_type: str = "general") -> str:
|
762 |
"""Generate enhanced response combining PandasAI results with RAG context"""
|