Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on May 23

Commit

003ceb6

verified ·

1 Parent(s): ee9de40

Update eb_agent_module.py

Browse files

Files changed (1) hide show

eb_agent_module.py +33 -53

eb_agent_module.py CHANGED Viewed

@@ -8,8 +8,8 @@ import textwrap
 from datetime import datetime # Added for date calculations
 try:
-    from google import generativeai as genai
-    from google.generativeai import types # For GenerateContentConfig, SafetySetting, HarmCategory, HarmBlockThreshold etc.
 except ImportError:
     logging.error("Google Generative AI library not found. Please install it: pip install google-generativeai", exc_info=True)
     # Define dummy classes/variables if import fails
@@ -30,7 +30,7 @@ except ImportError:
             BLOCK_LOW_AND_ABOVE = "BLOCK_LOW_AND_ABOVE"
             BLOCK_MEDIUM_AND_ABOVE = "BLOCK_MEDIUM_AND_ABOVE"
             BLOCK_ONLY_HIGH = "BLOCK_ONLY_HIGH" # Added for completeness, adjust if needed
 # --- Custom Exceptions ---
 class ValidationError(Exception):
     """Custom validation error for agent inputs"""
@@ -56,27 +56,9 @@ GENERATION_CONFIG_PARAMS = {
     "candidate_count": 1,
 }
-# Corrected to use direct enum members when available
-DEFAULT_SAFETY_SETTINGS = []
-if types and hasattr(types, 'HarmCategory') and hasattr(types, 'HarmBlockThreshold'):
-    DEFAULT_SAFETY_SETTINGS = [
-        {"category": types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
-         "threshold": types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE},
-        {"category": types.HarmCategory.HARM_CATEGORY_HARASSMENT,
-         "threshold": types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE},
-        {"category": types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
-         "threshold": types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE},
-        {"category": types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
-         "threshold": types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE},
-    ]
-else: # Fallback to strings if types or enums are not properly imported
-    logging.warning("Falling back to string representations for DEFAULT_SAFETY_SETTINGS due to missing types.HarmCategory or types.HarmBlockThreshold.")
-    DEFAULT_SAFETY_SETTINGS = [
-        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
-        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
-        {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
-        {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
-    ]
 df_rag_documents = pd.DataFrame({
@@ -214,18 +196,19 @@ class EmployerBrandingAgent:
                  llm_model_name: str,
                  embedding_model_name: str,
                  generation_config_dict: dict,
-                 safety_settings_list_of_dicts: list, # This list now contains dicts with ENUM values or STRINGS
                  force_sandbox: bool = False):
         self.all_dataframes = {k: df.copy() for k, df in all_dataframes.items()}
         self.schemas_representation = self._get_enhanced_schemas_representation()
         self.chat_history = []
         self.llm_model_name = llm_model_name
         self.generation_config_dict = generation_config_dict
-        self.safety_settings_list_of_dicts = safety_settings_list_of_dicts
         self.embedding_model_name = embedding_model_name
         self.rag_system = AdvancedRAGSystem(rag_documents_df, self.embedding_model_name)
         self.force_sandbox = force_sandbox
-        logging.info(f"EmployerBrandingAgent initialized. LLM: {self.llm_model_name}, Embedding: {self.embedding_model_name}. RAG system created.")
     def _get_date_range(self, df: pd.DataFrame) -> str:
         for col in df.columns:
@@ -344,7 +327,7 @@ Relevant Benchmarks: {benchmarks_val}"""
         return enhanced_context
     async def _build_prompt_for_current_turn(self, raw_user_query: str) -> str:
-        prompt_parts = ["You are an expert Employer Branding Analyst...", "--- DETAILED DATA OVERVIEW ---", self.schemas_representation] # Truncated for brevity
         if self.rag_system.embeddings is not None and self.rag_system.embeddings.size > 0:
             base_rag_context = await self.rag_system.retrieve_relevant_info(raw_user_query)
             if base_rag_context:
@@ -360,28 +343,27 @@ Relevant Benchmarks: {benchmarks_val}"""
         return {"Key Findings": ["Placeholder finding 1"], "Performance Metrics": ["Placeholder metric"], "Actionable Recommendations": {"Immediate Actions (0-30 days)": ["Placeholder action"]}, "Risk Assessment": ["Placeholder risk"], "Success Metrics to Track": ["Placeholder KPI"]}
     async def _generate_hr_insights(self, query: str, context: str) -> str:
-        insight_prompt = f"As an expert HR analytics consultant...\n{context}\nUser Query: {query}\nPlease provide insights in this structured format:\n## Key Findings\n- ...\n..." # Truncated
         if not client: return "Error: AI client not configured for generating HR insights."
         api_call_contents = [{"role": "user", "parts": [{"text": insight_prompt}]}]
         api_safety_settings_objects = []
-        if types and hasattr(types, 'SafetySetting'):
-            for ss_dict in self.safety_settings_list_of_dicts:
                 try:
-                    # Directly use the category and threshold from the dict,
-                    # which should be enum members if types was available at DEFAULT_SAFETY_SETTINGS definition,
-                    # or strings otherwise.
-                    api_safety_settings_objects.append(types.SafetySetting(category=ss_dict['category'], threshold=ss_dict['threshold']))
-                except Exception as e_ss: # Catch if ss_dict values are not valid for SafetySetting
-                    logging.warning(f"Could not create SafetySetting object from {ss_dict} for HR insights: {e_ss}. Using raw dict.")
-                    api_safety_settings_objects.append(ss_dict)
-        else:
-            api_safety_settings_objects = self.safety_settings_list_of_dicts
         api_generation_config_obj = None
         if types and hasattr(types, 'GenerateContentConfig'):
             api_generation_config_obj = types.GenerateContentConfig(**self.generation_config_dict, safety_settings=api_safety_settings_objects)
-        else:
             api_generation_config_obj = {**self.generation_config_dict, "safety_settings": api_safety_settings_objects}
         try:
@@ -418,21 +400,22 @@ Relevant Benchmarks: {benchmarks_val}"""
         logging.debug(f"Sending to GenAI. Total turns in content: {len(api_call_contents)}")
         api_safety_settings_objects = []
-        if types and hasattr(types, 'SafetySetting'):
-            for ss_dict in self.safety_settings_list_of_dicts:
                 try:
-                    # Directly use category/threshold from ss_dict. They should be enums or valid strings.
-                    api_safety_settings_objects.append(types.SafetySetting(category=ss_dict['category'], threshold=ss_dict['threshold']))
                 except Exception as e_ss_core:
-                    logging.warning(f"Could not create SafetySetting object from {ss_dict} in core: {e_ss_core}. Using raw dict.")
-                    api_safety_settings_objects.append(ss_dict) # Fallback to passing the dict itself
-        else: # Fallback if types.SafetySetting is not available
-            api_safety_settings_objects = self.safety_settings_list_of_dicts
         api_generation_config_obj = None
         if types and hasattr(types, 'GenerateContentConfig'):
              api_generation_config_obj = types.GenerateContentConfig(**self.generation_config_dict, safety_settings=api_safety_settings_objects)
-        else:
             logging.error("GenerateContentConfig type not available. API call might fail.")
             api_generation_config_obj = {**self.generation_config_dict, "safety_settings": api_safety_settings_objects}
@@ -471,7 +454,6 @@ Relevant Benchmarks: {benchmarks_val}"""
         return self._get_fallback_response(raw_user_query_this_turn)
     def _classify_query_type(self, query: str) -> str:
-        # ... (implementation unchanged)
         query_lower = query.lower()
         if any(word in query_lower for word in ['trend', 'growth', 'change', 'time']): return 'trend_analysis'
         elif any(word in query_lower for word in ['compare', 'benchmark', 'versus']): return 'comparative_analysis'
@@ -496,7 +478,6 @@ def get_all_schemas_representation(all_dataframes: dict) -> str:
             schema = f"\n--- DataFrame: {df_name} ---\nStatus: Empty\nShape: {shape}\nColumns: {columns}"
         else:
             try:
-                # Attempt to use to_markdown, with a fallback to to_string
                 sample_data_str = df.head(2).to_markdown(index=False)
             except ImportError:
                 logging.warning("`tabulate` library not found. Falling back to `to_string()` for schema representation.")
@@ -511,7 +492,6 @@ def get_all_schemas_representation(all_dataframes: dict) -> str:
 async def test_rag_retrieval_accuracy():
-    # ... (implementation unchanged, ensure client and types are checked if used here)
     logging.info("Running RAG retrieval accuracy test...")
     test_embedding_model = GEMINI_EMBEDDING_MODEL_NAME
     if not client:

 from datetime import datetime # Added for date calculations
 try:
+    from google import genai
+    from google.genai import types # For GenerateContentConfig, SafetySetting, HarmCategory, HarmBlockThreshold etc.
 except ImportError:
     logging.error("Google Generative AI library not found. Please install it: pip install google-generativeai", exc_info=True)
     # Define dummy classes/variables if import fails
             BLOCK_LOW_AND_ABOVE = "BLOCK_LOW_AND_ABOVE"
             BLOCK_MEDIUM_AND_ABOVE = "BLOCK_MEDIUM_AND_ABOVE"
             BLOCK_ONLY_HIGH = "BLOCK_ONLY_HIGH" # Added for completeness, adjust if needed
 # --- Custom Exceptions ---
 class ValidationError(Exception):
     """Custom validation error for agent inputs"""
     "candidate_count": 1,
 }
+# No safety settings by default as per user request
+DEFAULT_SAFETY_SETTINGS = []
+logging.info("Default safety settings are now empty (no explicit client-side safety settings).")
 df_rag_documents = pd.DataFrame({
                  llm_model_name: str,
                  embedding_model_name: str,
                  generation_config_dict: dict,
+                 safety_settings_list: list,
                  force_sandbox: bool = False):
         self.all_dataframes = {k: df.copy() for k, df in all_dataframes.items()}
         self.schemas_representation = self._get_enhanced_schemas_representation()
         self.chat_history = []
         self.llm_model_name = llm_model_name
         self.generation_config_dict = generation_config_dict
+        # If an empty list is passed, it means no specific safety settings are enforced by the client.
+        self.safety_settings_list = safety_settings_list if safety_settings_list is not None else []
         self.embedding_model_name = embedding_model_name
         self.rag_system = AdvancedRAGSystem(rag_documents_df, self.embedding_model_name)
         self.force_sandbox = force_sandbox
+        logging.info(f"EmployerBrandingAgent initialized. LLM: {self.llm_model_name}, Embedding: {self.embedding_model_name}. Safety settings count: {len(self.safety_settings_list)}")
     def _get_date_range(self, df: pd.DataFrame) -> str:
         for col in df.columns:
         return enhanced_context
     async def _build_prompt_for_current_turn(self, raw_user_query: str) -> str:
+        prompt_parts = ["You are an expert Employer Branding Analyst...", "--- DETAILED DATA OVERVIEW ---", self.schemas_representation]
         if self.rag_system.embeddings is not None and self.rag_system.embeddings.size > 0:
             base_rag_context = await self.rag_system.retrieve_relevant_info(raw_user_query)
             if base_rag_context:
         return {"Key Findings": ["Placeholder finding 1"], "Performance Metrics": ["Placeholder metric"], "Actionable Recommendations": {"Immediate Actions (0-30 days)": ["Placeholder action"]}, "Risk Assessment": ["Placeholder risk"], "Success Metrics to Track": ["Placeholder KPI"]}
     async def _generate_hr_insights(self, query: str, context: str) -> str:
+        insight_prompt = f"As an expert HR analytics consultant...\n{context}\nUser Query: {query}\nPlease provide insights in this structured format:\n## Key Findings\n- ...\n..."
         if not client: return "Error: AI client not configured for generating HR insights."
         api_call_contents = [{"role": "user", "parts": [{"text": insight_prompt}]}]
         api_safety_settings_objects = []
+        # self.safety_settings_list is expected to be empty if no settings are desired
+        if types and hasattr(types, 'SafetySetting') and self.safety_settings_list:
+            for ss_item in self.safety_settings_list:
                 try:
+                    api_safety_settings_objects.append(types.SafetySetting(category=ss_item['category'], threshold=ss_item['threshold']))
+                except Exception as e_ss:
+                    logging.warning(f"Could not create SafetySetting object from {ss_item} for HR insights: {e_ss}. Using raw item.")
+                    api_safety_settings_objects.append(ss_item)
+        elif self.safety_settings_list: # Fallback if types.SafetySetting not available but list is not empty
+             api_safety_settings_objects = self.safety_settings_list
         api_generation_config_obj = None
         if types and hasattr(types, 'GenerateContentConfig'):
             api_generation_config_obj = types.GenerateContentConfig(**self.generation_config_dict, safety_settings=api_safety_settings_objects)
+        else: # Fallback if types.GenerateContentConfig is not available
             api_generation_config_obj = {**self.generation_config_dict, "safety_settings": api_safety_settings_objects}
         try:
         logging.debug(f"Sending to GenAI. Total turns in content: {len(api_call_contents)}")
         api_safety_settings_objects = []
+        # self.safety_settings_list is expected to be empty if no settings are desired
+        if types and hasattr(types, 'SafetySetting') and self.safety_settings_list:
+            for ss_item in self.safety_settings_list:
                 try:
+                    api_safety_settings_objects.append(types.SafetySetting(category=ss_item['category'], threshold=ss_item['threshold']))
                 except Exception as e_ss_core:
+                    logging.warning(f"Could not create SafetySetting object from {ss_item} in core: {e_ss_core}. Using raw item.")
+                    api_safety_settings_objects.append(ss_item)
+        elif self.safety_settings_list : # Fallback if types.SafetySetting not available but list is not empty
+            api_safety_settings_objects = self.safety_settings_list
         api_generation_config_obj = None
         if types and hasattr(types, 'GenerateContentConfig'):
              api_generation_config_obj = types.GenerateContentConfig(**self.generation_config_dict, safety_settings=api_safety_settings_objects)
+        else: # Fallback if types.GenerateContentConfig is not available
             logging.error("GenerateContentConfig type not available. API call might fail.")
             api_generation_config_obj = {**self.generation_config_dict, "safety_settings": api_safety_settings_objects}
         return self._get_fallback_response(raw_user_query_this_turn)
     def _classify_query_type(self, query: str) -> str:
         query_lower = query.lower()
         if any(word in query_lower for word in ['trend', 'growth', 'change', 'time']): return 'trend_analysis'
         elif any(word in query_lower for word in ['compare', 'benchmark', 'versus']): return 'comparative_analysis'
             schema = f"\n--- DataFrame: {df_name} ---\nStatus: Empty\nShape: {shape}\nColumns: {columns}"
         else:
             try:
                 sample_data_str = df.head(2).to_markdown(index=False)
             except ImportError:
                 logging.warning("`tabulate` library not found. Falling back to `to_string()` for schema representation.")
 async def test_rag_retrieval_accuracy():
     logging.info("Running RAG retrieval accuracy test...")
     test_embedding_model = GEMINI_EMBEDDING_MODEL_NAME
     if not client: