Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on May 27

Commit

e7eee4a

verified ·

1 Parent(s): cd58acb

Update eb_agent_module.py

Browse files

Files changed (1) hide show

eb_agent_module.py +95 -49

eb_agent_module.py CHANGED Viewed

@@ -187,18 +187,33 @@ class AdvancedRAGSystem:
             embed_config_payload = None
             if GENAI_AVAILABLE and hasattr(types, 'EmbedContentConfig'):
                 embed_config_payload = types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT")
             response = client.models.embed_content(
                 model=f"models/{self.embedding_model_name}" if not self.embedding_model_name.startswith("models/") else self.embedding_model_name,
-                contents=text,  # Fix: Remove the list wrapper
                 config=embed_config_payload
             )
-            # Fix: Update response parsing - use .embeddings directly (it's a list)
             if hasattr(response, 'embeddings') and isinstance(response.embeddings, list) and len(response.embeddings) > 0:
-                # Fix: Access embedding values directly from the list
-                embedding_values = response.embeddings[0]  # This is already the array/list of values
-                return np.array(embedding_values)
             else:
                 logging.error(f"Unexpected response structure")
                 return None
@@ -258,6 +273,10 @@ class AdvancedRAGSystem:
         self.is_initialized = True
     def _calculate_cosine_similarity(self, embeddings_matrix: np.ndarray, query_vector: np.ndarray) -> np.ndarray:
         if embeddings_matrix.ndim == 1:
             embeddings_matrix = embeddings_matrix.reshape(1, -1)
         if query_vector.ndim == 1:
@@ -268,7 +287,7 @@ class AdvancedRAGSystem:
         norm_matrix = np.linalg.norm(embeddings_matrix, axis=1, keepdims=True)
         normalized_embeddings_matrix = np.divide(embeddings_matrix, norm_matrix + 1e-8, where=norm_matrix!=0)
         norm_query = np.linalg.norm(query_vector, axis=1, keepdims=True)
         normalized_query_vector = np.divide(query_vector, norm_query + 1e-8, where=norm_query!=0)
@@ -681,6 +700,7 @@ class EmployerBrandingAgent:
                     and create a chart showing the total follower growth over time."""
             # Execute the query
             if len(self.pandas_dfs) == 1:
                 df = list(self.pandas_dfs.values())[0]
                 logging.info(f"Using single DataFrame for query with shape: {df.df.shape}")
@@ -689,59 +709,84 @@ class EmployerBrandingAgent:
                 dfs = list(self.pandas_dfs.values())
                 pandas_response = pai.chat(processed_query, *dfs)
-            # Enhanced response processing
             response_text = ""
-            chart_info = ""
-            # Check for chart generation
             chart_path = None
-            # Method 1: Direct path response
-            if isinstance(pandas_response, str) and pandas_response.endswith(('.png', '.jpg', '.jpeg', '.svg')):
-                chart_path = pandas_response
-                response_text = "Analysis completed with visualization"
-            # Method 2: Response object with plot path
-            elif hasattr(pandas_response, 'plot_path') and pandas_response.plot_path:
-                chart_path = pandas_response.plot_path
-                response_text = getattr(pandas_response, 'text', str(pandas_response))
-            # Method 3: Check charts directory for new files
-            else:
-                if os.path.exists(self.charts_dir):
-                    # Get all chart files sorted by modification time
-                    chart_files = []
-                    for f in os.listdir(self.charts_dir):
-                        if f.endswith(('.png', '.jpg', '.jpeg', '.svg')):
-                            full_path = os.path.join(self.charts_dir, f)
-                            chart_files.append((full_path, os.path.getmtime(full_path)))
-                    if chart_files:
-                        # Sort by modification time (newest first)
-                        chart_files.sort(key=lambda x: x[1], reverse=True)
-                        latest_chart_path, latest_time = chart_files[0]
-                        # Check if created in last 60 seconds
-                        import time
-                        if time.time() - latest_time < 60:
-                            chart_path = latest_chart_path
-                            logging.info(f"Found recent chart: {chart_path}")
-                # Handle text response
-                if pandas_response and str(pandas_response).strip():
-                    response_text = str(pandas_response).strip()
                 else:
-                    response_text = "Analysis completed"
             # Format final response
             if chart_path and os.path.exists(chart_path):
                 chart_info = f"\n\n📊 **Chart Generated**: {os.path.basename(chart_path)}\nChart saved at: {chart_path}"
                 logging.info(f"Chart successfully generated: {chart_path}")
             final_response = response_text + chart_info
-            success = True
-            return final_response, success
         except Exception as e:
             logging.error(f"Error in PandasAI processing: {e}", exc_info=True)
@@ -754,10 +799,11 @@ class EmployerBrandingAgent:
                 return "I encountered a date formatting issue. Please try asking for the data without specific date formatting, or ask me to show the raw data structure first.", False
             elif "ambiguous" in error_str:
                 return "I encountered an ambiguous data type issue. Please try being more specific about which data you'd like to analyze (e.g., 'show monthly follower gains' vs 'show cumulative followers').", False
             else:
                 return f"Error processing data query: {str(e)}", False
     async def _generate_enhanced_response(self, query: str, pandas_result: str = "", query_type: str = "general") -> str:
         """Generate enhanced response combining PandasAI results with RAG context"""
         if not self.is_ready:

             embed_config_payload = None
             if GENAI_AVAILABLE and hasattr(types, 'EmbedContentConfig'):
                 embed_config_payload = types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT")
             response = client.models.embed_content(
                 model=f"models/{self.embedding_model_name}" if not self.embedding_model_name.startswith("models/") else self.embedding_model_name,
+                contents=text,
                 config=embed_config_payload
             )
+            # Fix: Handle ContentEmbedding objects properly
             if hasattr(response, 'embeddings') and isinstance(response.embeddings, list) and len(response.embeddings) > 0:
+                embedding_obj = response.embeddings[0]
+                # Extract values from ContentEmbedding object
+                if hasattr(embedding_obj, 'values'):
+                    embedding_values = embedding_obj.values
+                elif hasattr(embedding_obj, 'embedding'):
+                    embedding_values = embedding_obj.embedding
+                elif isinstance(embedding_obj, (list, tuple)):
+                    embedding_values = embedding_obj
+                else:
+                    # Try to convert to list/array if it's a different object type
+                    try:
+                        embedding_values = list(embedding_obj)
+                    except:
+                        logging.error(f"Cannot extract embedding values from object type: {type(embedding_obj)}")
+                        return None
+                return np.array(embedding_values, dtype=np.float32)
             else:
                 logging.error(f"Unexpected response structure")
                 return None
         self.is_initialized = True
     def _calculate_cosine_similarity(self, embeddings_matrix: np.ndarray, query_vector: np.ndarray) -> np.ndarray:
+        # Ensure inputs are numpy arrays with proper dtype
+        embeddings_matrix = np.asarray(embeddings_matrix, dtype=np.float32)
+        query_vector = np.asarray(query_vector, dtype=np.float32)
         if embeddings_matrix.ndim == 1:
             embeddings_matrix = embeddings_matrix.reshape(1, -1)
         if query_vector.ndim == 1:
         norm_matrix = np.linalg.norm(embeddings_matrix, axis=1, keepdims=True)
         normalized_embeddings_matrix = np.divide(embeddings_matrix, norm_matrix + 1e-8, where=norm_matrix!=0)
         norm_query = np.linalg.norm(query_vector, axis=1, keepdims=True)
         normalized_query_vector = np.divide(query_vector, norm_query + 1e-8, where=norm_query!=0)
                     and create a chart showing the total follower growth over time."""
             # Execute the query
+            pandas_response = None
             if len(self.pandas_dfs) == 1:
                 df = list(self.pandas_dfs.values())[0]
                 logging.info(f"Using single DataFrame for query with shape: {df.df.shape}")
                 dfs = list(self.pandas_dfs.values())
                 pandas_response = pai.chat(processed_query, *dfs)
+            # Enhanced response processing with better type handling
             response_text = ""
             chart_path = None
+            # Handle different response types from PandasAI
+            try:
+                # Case 1: Direct string response (file path)
+                if isinstance(pandas_response, str):
+                    if pandas_response.endswith(('.png', '.jpg', '.jpeg', '.svg')):
+                        chart_path = pandas_response
+                        response_text = "Analysis completed with visualization"
+                    else:
+                        response_text = pandas_response
+                # Case 2: Chart object response
+                elif hasattr(pandas_response, 'value') and hasattr(pandas_response, '_get_image'):
+                    # Handle PandasAI Chart response object
+                    try:
+                        # Try to get the chart path without calling show() which causes the error
+                        if hasattr(pandas_response, 'value'):
+                            if isinstance(pandas_response.value, str) and pandas_response.value.endswith(('.png', '.jpg', '.jpeg', '.svg')):
+                                chart_path = pandas_response.value
+                                response_text = "Analysis completed with visualization"
+                            elif isinstance(pandas_response.value, dict):
+                                # Handle dict response from Chart object
+                                if 'path' in pandas_response.value:
+                                    chart_path = pandas_response.value['path']
+                                    response_text = "Analysis completed with visualization"
+                                else:
+                                    response_text = "Chart generated but path not accessible"
+                    except Exception as chart_error:
+                        logging.warning(f"Error handling chart response: {chart_error}")
+                        response_text = "Chart generated but encountered display issue"
+                # Case 3: Response with plot_path attribute
+                elif hasattr(pandas_response, 'plot_path') and pandas_response.plot_path:
+                    chart_path = pandas_response.plot_path
+                    response_text = getattr(pandas_response, 'text', "Analysis completed with visualization")
+                # Case 4: Other response types
                 else:
+                    if pandas_response is not None:
+                        response_text = str(pandas_response).strip()
+            except Exception as response_error:
+                logging.warning(f"Error processing PandasAI response: {response_error}")
+                response_text = "Analysis completed but encountered response processing issue"
+            # Fallback: Check charts directory for new files if no chart path found
+            if not chart_path and os.path.exists(self.charts_dir):
+                chart_files = []
+                for f in os.listdir(self.charts_dir):
+                    if f.endswith(('.png', '.jpg', '.jpeg', '.svg')):
+                        full_path = os.path.join(self.charts_dir, f)
+                        chart_files.append((full_path, os.path.getmtime(full_path)))
+                if chart_files:
+                    # Sort by modification time (newest first)
+                    chart_files.sort(key=lambda x: x[1], reverse=True)
+                    latest_chart_path, latest_time = chart_files[0]
+                    # Check if created in last 60 seconds
+                    import time
+                    if time.time() - latest_time < 60:
+                        chart_path = latest_chart_path
+                        logging.info(f"Found recent chart: {chart_path}")
             # Format final response
+            if not response_text:
+                response_text = "Analysis completed"
+            chart_info = ""
             if chart_path and os.path.exists(chart_path):
                 chart_info = f"\n\n📊 **Chart Generated**: {os.path.basename(chart_path)}\nChart saved at: {chart_path}"
                 logging.info(f"Chart successfully generated: {chart_path}")
             final_response = response_text + chart_info
+            return final_response, True
         except Exception as e:
             logging.error(f"Error in PandasAI processing: {e}", exc_info=True)
                 return "I encountered a date formatting issue. Please try asking for the data without specific date formatting, or ask me to show the raw data structure first.", False
             elif "ambiguous" in error_str:
                 return "I encountered an ambiguous data type issue. Please try being more specific about which data you'd like to analyze (e.g., 'show monthly follower gains' vs 'show cumulative followers').", False
+            elif "startswith" in error_str or "dict" in error_str:
+                return "I encountered a response formatting issue. The analysis may have completed but I couldn't process the result properly. Please try rephrasing your query.", False
             else:
                 return f"Error processing data query: {str(e)}", False
     async def _generate_enhanced_response(self, query: str, pandas_result: str = "", query_type: str = "general") -> str:
         """Generate enhanced response combining PandasAI results with RAG context"""
         if not self.is_ready: