GuglielmoTor commited on
Commit
e64ca65
·
verified ·
1 Parent(s): 911f78e

Update eb_agent_module.py

Browse files
Files changed (1) hide show
  1. eb_agent_module.py +230 -225
eb_agent_module.py CHANGED
@@ -7,145 +7,105 @@ import logging
7
  import numpy as np
8
  import textwrap
9
 
10
- # Attempt to import Google Generative AI and related types
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  try:
12
- from google import generativeai as genai # Renamed for clarity to avoid conflict
13
- from google.generativeai import types as genai_types
14
- # from google.generativeai import GenerationConfig # For direct use if needed
15
- # from google.generativeai.types import HarmCategory, HarmBlockThreshold, SafetySetting # For direct use
16
-
17
  except ImportError:
18
- print("Google Generative AI library not found. Please install it: pip install google-generativeai")
19
- # Define dummy classes/functions if the import fails, to allow the rest of the script to be parsed
20
- class genai: # type: ignore
21
- @staticmethod
22
- def configure(api_key):
23
- print(f"Dummy genai.configure called with API key: {'SET' if api_key else 'NOT SET'}")
24
-
25
- # Dummy Client and related structures
26
- class Client:
27
- def __init__(self, api_key=None): # api_key is optional for Client constructor
28
- self.api_key = api_key
29
- self.models = self._Models() # This is the service client for models
30
- print(f"Dummy genai.Client initialized {'with api_key' if api_key else '(global API key expected)'}.")
31
-
32
- class _Models: # Represents the model service client
33
- async def generate_content_async(self, model=None, contents=None, generation_config=None, safety_settings=None, stream=False, tools=None, tool_config=None): # Matched real signature better
34
- print(f"Dummy genai.Client.models.generate_content_async called for model: {model} with config: {generation_config}, safety_settings: {safety_settings}, stream: {stream}")
35
- class DummyPart:
36
- def __init__(self, text): self.text = text
37
- class DummyContent:
38
- def __init__(self): self.parts = [DummyPart("# Dummy response from dummy client's async generate_content")]
39
- class DummyCandidate:
40
- def __init__(self):
41
- self.content = DummyContent()
42
- self.finish_reason = genai_types.FinishReason.STOP # Use dummy FinishReason
43
- self.safety_ratings = []
44
- self.token_count = 0
45
- self.index = 0
46
- class DummyResponse:
47
- def __init__(self):
48
- self.candidates = [DummyCandidate()]
49
- self.prompt_feedback = self._PromptFeedback()
50
- self.text = "# Dummy response text from dummy client's async generate_content"
51
- class _PromptFeedback:
52
- def __init__(self):
53
- self.block_reason = None
54
- self.safety_ratings = []
55
- return DummyResponse()
56
-
57
- def generate_content(self, model=None, contents=None, generation_config=None, safety_settings=None, stream=False, tools=None, tool_config=None): # Matched real signature better
58
- print(f"Dummy genai.Client.models.generate_content called for model: {model} with config: {generation_config}, safety_settings: {safety_settings}, stream: {stream}")
59
- # Re-using the async dummy structure for simplicity
60
- class DummyPart:
61
- def __init__(self, text): self.text = text
62
- class DummyContent:
63
- def __init__(self): self.parts = [DummyPart("# Dummy response from dummy client's generate_content")]
64
- class DummyCandidate:
65
- def __init__(self):
66
- self.content = DummyContent()
67
- self.finish_reason = genai_types.FinishReason.STOP
68
- self.safety_ratings = []
69
- self.token_count = 0
70
- self.index = 0
71
- class DummyResponse:
72
- def __init__(self):
73
- self.candidates = [DummyCandidate()]
74
- self.prompt_feedback = self._PromptFeedback()
75
- self.text = "# Dummy response text from dummy client's generate_content"
76
- class _PromptFeedback:
77
- def __init__(self):
78
- self.block_reason = None
79
- self.safety_ratings = []
80
- return DummyResponse()
81
-
82
- @staticmethod
83
- def GenerativeModel(model_name, generation_config=None, safety_settings=None, system_instruction=None): # Kept for AdvancedRAGSystem if it uses it, or if user switches back
84
- print(f"Dummy genai.GenerativeModel called for model: {model_name} (This might be unused if Client approach is preferred)")
85
- # ... (rest of DummyGenerativeModel as before, for completeness) ...
86
- class DummyGenerativeModel:
87
- def __init__(self, model_name_in, generation_config_in, safety_settings_in, system_instruction_in):
88
- self.model_name = model_name_in
89
- async def generate_content_async(self, contents, stream=False):
90
- class DummyPart:
91
- def __init__(self, text): self.text = text
92
- class DummyContent:
93
- def __init__(self): self.parts = [DummyPart(f"# Dummy response from dummy GenerativeModel ({self.model_name})")]
94
- class DummyCandidate:
95
- def __init__(self):
96
- self.content = DummyContent(); self.finish_reason = genai_types.FinishReason.STOP; self.safety_ratings = []
97
- class DummyResponse:
98
- def __init__(self):
99
- self.candidates = [DummyCandidate()]; self.prompt_feedback = None; self.text = f"# Dummy GM response"
100
- return DummyResponse()
101
- return DummyGenerativeModel(model_name, generation_config, safety_settings, system_instruction)
102
-
103
-
104
- @staticmethod
105
- def embed_content(model, content, task_type, title=None):
106
- print(f"Dummy genai.embed_content called for model: {model}, task_type: {task_type}, title: {title}")
107
- return {"embedding": [0.1] * 768}
108
-
109
- class genai_types: # type: ignore
110
- @staticmethod
111
- def GenerationConfig(**kwargs):
112
- print(f"Dummy genai_types.GenerationConfig created with: {kwargs}")
113
- return dict(kwargs)
114
-
115
- @staticmethod
116
- def SafetySetting(category, threshold):
117
- print(f"Dummy SafetySetting created: category={category}, threshold={threshold}")
118
- return {"category": category, "threshold": threshold}
119
-
120
- class HarmCategory:
121
- HARM_CATEGORY_UNSPECIFIED = "HARM_CATEGORY_UNSPECIFIED"; HARM_CATEGORY_HARASSMENT = "HARM_CATEGORY_HARASSMENT"; HARM_CATEGORY_HATE_SPEECH = "HARM_CATEGORY_HATE_SPEECH"; HARM_CATEGORY_SEXUALLY_EXPLICIT = "HARM_CATEGORY_SEXUALLY_EXPLICIT"; HARM_CATEGORY_DANGEROUS_CONTENT = "HARM_CATEGORY_DANGEROUS_CONTENT"
122
- class HarmBlockThreshold:
123
- BLOCK_NONE = "BLOCK_NONE"; BLOCK_LOW_AND_ABOVE = "BLOCK_LOW_AND_ABOVE"; BLOCK_MEDIUM_AND_ABOVE = "BLOCK_MEDIUM_AND_ABOVE"; BLOCK_ONLY_HIGH = "BLOCK_ONLY_HIGH"
124
- class FinishReason:
125
- FINISH_REASON_UNSPECIFIED = "UNSPECIFIED"; STOP = "STOP"; MAX_TOKENS = "MAX_TOKENS"; SAFETY = "SAFETY"; RECITATION = "RECITATION"; OTHER = "OTHER"
126
-
127
- # Dummy for BlockedReason if needed by response parsing
128
- class BlockedReason:
129
- BLOCKED_REASON_UNSPECIFIED = "BLOCKED_REASON_UNSPECIFIED"
130
- SAFETY = "SAFETY"
131
- OTHER = "OTHER"
132
 
133
  # --- Configuration ---
134
  GEMINI_API_KEY = os.getenv('GEMINI_API_KEY', "")
135
- # User-specified model names:
136
- # LLM_MODEL_NAME = "gemini-2.0-flash" # Original
137
  LLM_MODEL_NAME = "gemini-2.0-flash"
138
  GEMINI_EMBEDDING_MODEL_NAME = "gemini-embedding-exp-03-07"
139
 
140
- # Base generation configuration for the LLM
141
  GENERATION_CONFIG_PARAMS = {
142
- "temperature": 0.3,
143
- "top_p": 1.0,
144
- "top_k": 32,
145
- "max_output_tokens": 8192,
146
  }
147
 
148
  # Default safety settings list for Gemini
 
149
  try:
150
  DEFAULT_SAFETY_SETTINGS = [
151
  genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
@@ -153,26 +113,29 @@ try:
153
  genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
154
  genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
155
  ]
156
- except AttributeError as e:
157
- logging.warning(f"Could not define DEFAULT_SAFETY_SETTINGS using real genai_types: {e}. Using placeholder list of dicts.")
158
  DEFAULT_SAFETY_SETTINGS = [
159
  {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
160
  {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
161
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
162
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
163
  ]
164
 
 
165
  # Logging setup
166
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(filename)s:%(lineno)d - %(message)s')
167
 
168
- if GEMINI_API_KEY:
169
  try:
170
- genai.configure(api_key=GEMINI_API_KEY)
171
- logging.info(f"Gemini API key configured globally.")
172
  except Exception as e:
173
  logging.error(f"Failed to configure Gemini API globally: {e}", exc_info=True)
174
- else:
175
- logging.warning("GEMINI_API_KEY environment variable not set. Agent will use dummy responses if real genai library is not fully mocked or if API calls fail.")
 
 
 
 
176
 
177
 
178
  # --- RAG Documents Definition (Example) ---
@@ -201,44 +164,58 @@ class AdvancedRAGSystem:
201
  self.embedding_model_name = embedding_model_name
202
  self.documents_df = documents_df.copy()
203
  self.embeddings_generated = False
204
- # Check if genai.embed_content is the real one or our dummy
205
- self.client_available = hasattr(genai, 'embed_content') and not (hasattr(genai.embed_content, '__func__') and genai.embed_content.__func__.__qualname__.startswith('genai.embed_content'))
206
 
207
- if GEMINI_API_KEY and self.client_available:
208
  try:
209
  self._precompute_embeddings()
210
  self.embeddings_generated = True
211
- logging.info(f"RAG embeddings precomputed using '{self.embedding_model_name}'.")
212
- except Exception as e: logging.error(f"RAG precomputation error: {e}", exc_info=True)
 
213
  else:
214
- logging.warning(f"RAG embeddings not precomputed. Key: {bool(GEMINI_API_KEY)}, embed_content_ok: {self.client_available}.")
 
 
 
215
 
216
  def _embed_fn(self, title: str, text: str) -> list[float]:
217
- if not self.client_available: return [0.0] * 768
218
  try:
219
  content_to_embed = text if text else title
220
  if not content_to_embed: return [0.0] * 768
 
221
  return genai.embed_content(model=self.embedding_model_name, content=content_to_embed, task_type="retrieval_document", title=title if title else None)["embedding"]
222
  except Exception as e:
223
- logging.error(f"Error in _embed_fn for '{title}': {e}", exc_info=True)
224
  return [0.0] * 768
225
 
226
  def _precompute_embeddings(self):
227
  if 'Embeddings' not in self.documents_df.columns: self.documents_df['Embeddings'] = pd.Series(dtype='object')
228
  mask = (self.documents_df['Text'].notna() & (self.documents_df['Text'] != '')) | (self.documents_df['Title'].notna() & (self.documents_df['Title'] != ''))
229
  if not mask.any(): logging.warning("No content for RAG embeddings."); return
 
230
  self.documents_df.loc[mask, 'Embeddings'] = self.documents_df[mask].apply(lambda row: self._embed_fn(row.get('Title', ''), row.get('Text', '')), axis=1)
231
- logging.info(f"Applied RAG embedding function to {mask.sum()} rows.")
 
232
 
233
  def retrieve_relevant_info(self, query_text: str, top_k: int = 2) -> str:
234
- if not self.client_available: return "\n[RAG Context]\nEmbedding client not available.\n"
235
- if not self.embeddings_generated or 'Embeddings' not in self.documents_df.columns or self.documents_df['Embeddings'].isnull().all():
236
- return "\n[RAG Context]\nEmbeddings not ready for RAG.\n"
 
 
 
 
 
 
 
237
  try:
238
  query_embedding = np.array(genai.embed_content(model=self.embedding_model_name, content=query_text, task_type="retrieval_query")["embedding"])
239
  valid_df = self.documents_df.dropna(subset=['Embeddings'])
240
- valid_df = valid_df[valid_df['Embeddings'].apply(lambda x: isinstance(x, (list, np.ndarray)) and len(x) > 0)]
241
- if valid_df.empty: return "\n[RAG Context]\nNo valid document embeddings.\n"
242
 
243
  doc_embeddings = np.stack(valid_df['Embeddings'].apply(np.array).values)
244
  if query_embedding.shape[0] != doc_embeddings.shape[1]: return "\n[RAG Context]\nEmbedding dimension mismatch.\n"
@@ -251,8 +228,8 @@ class AdvancedRAGSystem:
251
  passages = "".join([f"\n[RAG Context from: '{valid_df.iloc[i]['Title']}']\n{valid_df.iloc[i]['Text']}\n" for i in idx if i < len(valid_df)])
252
  return passages if passages else "\n[RAG Context]\nNo relevant passages found after search.\n"
253
  except Exception as e:
254
- logging.error(f"Error in RAG retrieve_relevant_info: {e}", exc_info=True)
255
- return f"\n[RAG Context]\nError during RAG retrieval: {type(e).__name__} - {e}\n"
256
 
257
  # --- PandasLLM Class (Gemini-Powered using genai.Client) ---
258
  class PandasLLM:
@@ -261,69 +238,80 @@ class PandasLLM:
261
  safety_settings_list: list,
262
  data_privacy=True, force_sandbox=True):
263
  self.llm_model_name = llm_model_name
264
- self.generation_config_dict = generation_config_dict # Will be passed to API call
265
- self.safety_settings_list = safety_settings_list # Will be passed to API call
266
  self.data_privacy = data_privacy
267
  self.force_sandbox = force_sandbox
268
  self.client = None
269
- self.model_service = None # This will be client.models
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
- # Check if genai.Client is the real one or our dummy
272
- is_real_genai_client = hasattr(genai, 'Client') and not (hasattr(genai.Client, '__func__') and genai.Client.__func__.__qualname__.startswith('genai.Client'))
273
 
274
- if not GEMINI_API_KEY and is_real_genai_client: # Real client but no API key
275
- logging.warning(f"PandasLLM: GEMINI_API_KEY not set, but real 'genai.Client' seems available. API calls may fail if global config is not sufficient.")
276
- # Proceed to initialize client; it might work if genai.configure() was successful without explicit key here
277
- # or if the environment provides credentials in another way.
278
 
279
- try:
280
- self.client = genai.Client() # API key is usually set via genai.configure or environment
281
- self.model_service = self.client.models
282
- logging.info(f"PandasLLM: Initialized with genai.Client().models for '{self.llm_model_name}'.")
283
- except Exception as e:
284
- logging.error(f"Failed to initialize PandasLLM with genai.Client: {e}", exc_info=True)
285
- # Fallback to dummy if real initialization fails, to prevent crashes
286
- if not is_real_genai_client: # If it was already the dummy, re-initialize dummy
287
- self.client = genai.Client()
288
- self.model_service = self.client.models
289
- logging.warning("PandasLLM: Falling back to DUMMY genai.Client due to real initialization error or it was already dummy.")
290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
- async def _call_gemini_api_async(self, prompt_text: str, history: list = None) -> str:
293
- if not self.model_service:
294
- logging.error("PandasLLM: Model service (client.models) not available. Cannot call API.")
295
- return "# Error: Gemini model service not available for API call."
296
 
297
  gemini_history = []
298
- if history: # history is agent.chat_history, which will now have 'content'
299
  for entry in history:
300
- # Standardize role for API: 'model' for assistant responses
301
  role_for_api = "model" if entry.get("role") == "assistant" else entry.get("role", "user")
302
- # Get text from 'content' key
303
  text_content = entry.get("content", "")
304
  gemini_history.append({"role": role_for_api, "parts": [{"text": text_content}]})
305
 
306
  current_prompt_content = [{"role": "user", "parts": [{"text": prompt_text}]}]
307
  contents_for_api = gemini_history + current_prompt_content
308
 
309
- # Prepare model ID (e.g., "models/gemini-2.0-flash")
310
  model_id_for_api = self.llm_model_name
311
  if not model_id_for_api.startswith("models/"):
312
  model_id_for_api = f"models/{model_id_for_api}"
313
 
314
- # Prepare generation config object
315
  api_generation_config = None
316
  if self.generation_config_dict:
317
- try:
318
  api_generation_config = genai_types.GenerationConfig(**self.generation_config_dict)
319
  except Exception as e_cfg:
320
- logging.error(f"Error creating GenerationConfig object: {e_cfg}. Using dict as fallback.")
321
- api_generation_config = self.generation_config_dict # Fallback to dict
322
 
323
- logging.info(f"\n--- Calling Gemini API via Client (model: {model_id_for_api}) ---\nConfig: {api_generation_config}\nSafety: {bool(self.safety_settings_list)}\nContent (last part text): {contents_for_api[-1]['parts'][0]['text'][:100]}...\n")
324
 
325
  try:
326
- response = await self.model_service.generate_content_async(
 
 
327
  model=model_id_for_api,
328
  contents=contents_for_api,
329
  generation_config=api_generation_config,
@@ -340,14 +328,16 @@ class PandasLLM:
340
  llm_output = ""
341
  if hasattr(response, 'text') and isinstance(response.text, str):
342
  llm_output = response.text
343
- elif response.candidates:
344
  candidate = response.candidates[0]
345
  if candidate.content and candidate.content.parts:
346
  llm_output = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
347
 
348
  if not llm_output and candidate.finish_reason:
349
  finish_reason_val = candidate.finish_reason
350
- finish_reason_str = str(finish_reason_val.name if hasattr(finish_reason_val, 'name') else finish_reason_val)
 
 
351
 
352
  if finish_reason_str == "SAFETY":
353
  safety_messages = []
@@ -366,15 +356,16 @@ class PandasLLM:
366
  return f"# Error: Unexpected API response structure: {str(response)[:200]}"
367
 
368
  return llm_output
369
-
370
- except genai_types.BlockedPromptException as bpe:
 
371
  logging.error(f"Prompt blocked (BlockedPromptException): {bpe}", exc_info=True)
372
  return f"# Error: Prompt blocked. Details: {bpe}"
373
- except genai_types.StopCandidateException as sce:
374
  logging.error(f"Candidate stopped (StopCandidateException): {sce}", exc_info=True)
375
  return f"# Error: Content generation stopped. Details: {sce}"
376
  except Exception as e:
377
- logging.error(f"Error calling Gemini API via Client: {e}", exc_info=True)
378
  return f"# Error during API call: {type(e).__name__} - {str(e)[:100]}."
379
 
380
 
@@ -395,10 +386,12 @@ class PandasLLM:
395
  except IndexError: code_to_execute = ""
396
 
397
  if llm_response_text.startswith("# Error:") or not code_to_execute.strip():
398
- logging.warning(f"LLM error or no code: {llm_response_text[:200]}")
 
399
  if not code_to_execute.strip() and not llm_response_text.startswith("# Error:"):
400
- if "```" not in llm_response_text and len(llm_response_text.strip()) > 0:
401
- logging.info(f"LLM text output in sandbox mode: {llm_response_text[:200]}")
 
402
  return llm_response_text
403
 
404
  logging.info(f"\n--- Code to Execute: ---\n{code_to_execute}\n----------------------\n")
@@ -409,19 +402,19 @@ class PandasLLM:
409
  if dataframes_dict:
410
  for name, df_instance in dataframes_dict.items():
411
  if isinstance(df_instance, pd.DataFrame): exec_globals[f"df_{name}"] = df_instance
412
- else: logging.warning(f"Item '{name}' not a DataFrame.")
413
  try:
414
  exec(code_to_execute, exec_globals, {})
415
  final_output_str = sys.stdout.getvalue()
416
  if not final_output_str.strip():
417
  if not any(ln.strip() and not ln.strip().startswith("#") for ln in code_to_execute.splitlines()):
418
- return "# LLM generated only comments or empty code. No output."
419
- return "# Code executed, but no print() output. Ensure print() for results."
420
  return final_output_str
421
  except Exception as e:
422
- logging.error(f"Sandbox Exec Error: {e}\nCode:\n{code_to_execute}", exc_info=True)
423
  indented_code = textwrap.indent(code_to_execute, '# ')
424
- return f"# Sandbox Exec Error: {type(e).__name__}: {e}\n# Code:\n{indented_code}"
425
  finally: sys.stdout = old_stdout
426
  else: return llm_response_text
427
 
@@ -439,8 +432,8 @@ class EmployerBrandingAgent:
439
  self.rag_system = AdvancedRAGSystem(rag_documents_df, embedding_model_name)
440
  self.all_dataframes = all_dataframes if all_dataframes else {}
441
  self.schemas_representation = get_all_schemas_representation(self.all_dataframes)
442
- self.chat_history = [] # This will store entries like {"role": "user/assistant", "content": "text"}
443
- logging.info("EmployerBrandingAgent Initialized (using Client API approach).")
444
 
445
  def _build_prompt(self, user_query: str, role="Employer Branding Analyst & Strategist", task_decomposition_hint=None, cot_hint=True) -> str:
446
  prompt = f"You are a highly skilled '{role}'. Your goal is to provide actionable employer branding insights by analyzing Pandas DataFrames and RAG documents.\n"
@@ -466,11 +459,16 @@ class EmployerBrandingAgent:
466
  prompt += "\n--- AVAILABLE DATA AND SCHEMAS ---\n"
467
  prompt += self.schemas_representation if self.schemas_representation.strip() != "No DataFrames provided." else "No DataFrames loaded.\n"
468
 
 
469
  rag_context = self.rag_system.retrieve_relevant_info(user_query)
470
- meaningful_rag_keywords = ["Error", "No valid", "No relevant", "Cannot retrieve", "not available", "not generated"]
471
  is_meaningful_rag = bool(rag_context.strip()) and not any(keyword in rag_context for keyword in meaningful_rag_keywords)
472
- if is_meaningful_rag: prompt += f"\n--- RAG CONTEXT ---\n{rag_context}\n"
473
- else: prompt += "\n--- RAG CONTEXT ---\nNo specific RAG context found or RAG error.\n"
 
 
 
 
474
 
475
  prompt += f"\n--- USER QUERY ---\n{user_query}\n"
476
  if task_decomposition_hint: prompt += f"\n--- GUIDANCE ---\n{task_decomposition_hint}\n"
@@ -485,21 +483,12 @@ class EmployerBrandingAgent:
485
  return prompt
486
 
487
  async def process_query(self, user_query: str, role="Employer Branding Analyst & Strategist", task_decomposition_hint=None, cot_hint=True) -> str:
488
- current_turn_history_for_llm = self.chat_history[:] # History before this turn
489
-
490
- # Append new user message to chat_history using 'content' key
491
  self.chat_history.append({"role": "user", "content": user_query})
492
-
493
  full_prompt = self._build_prompt(user_query, role, task_decomposition_hint, cot_hint)
494
- logging.info(f"Built prompt for query: {user_query[:100]}...")
495
-
496
- # Pass history (which now uses 'content') to pandas_llm.query
497
  response_text = await self.pandas_llm.query(full_prompt, self.all_dataframes, history=current_turn_history_for_llm)
498
-
499
- # Append new assistant message to chat_history using 'content' key
500
- # Standardize role to 'assistant' for model responses in history
501
  self.chat_history.append({"role": "assistant", "content": response_text})
502
-
503
  MAX_HISTORY_TURNS = 5
504
  if len(self.chat_history) > MAX_HISTORY_TURNS * 2:
505
  self.chat_history = self.chat_history[-(MAX_HISTORY_TURNS * 2):]
@@ -515,12 +504,16 @@ class EmployerBrandingAgent:
515
 
516
  # --- Example Usage (Conceptual) ---
517
  async def main_test():
518
- logging.info("Starting main_test for EmployerBrandingAgent...")
 
 
519
  df_follower_stats = pd.DataFrame({'date': pd.to_datetime(['2023-01-01']), 'country': ['USA'], 'new_followers': [10]})
520
  df_posts = pd.DataFrame({'post_id': [1], 'theme': ['Culture'], 'engagement_rate': [0.05]})
521
  test_dataframes = {"follower_stats": df_follower_stats, "posts": df_posts}
522
 
523
- if not GEMINI_API_KEY: logging.warning("GEMINI_API_KEY not set. Testing with dummy functionality.")
 
 
524
  agent = EmployerBrandingAgent(LLM_MODEL_NAME, GENERATION_CONFIG_PARAMS, DEFAULT_SAFETY_SETTINGS, test_dataframes, df_rag_documents, GEMINI_EMBEDDING_MODEL_NAME, force_sandbox=True)
525
 
526
  queries = ["Which post theme has the highest average engagement rate? Provide an insight.", "Hello!"]
@@ -528,12 +521,24 @@ async def main_test():
528
  logging.info(f"\n\n--- Query: {query} ---")
529
  response = await agent.process_query(user_query=query)
530
  logging.info(f"--- Response for '{query}': ---\n{response}\n---------------------------\n")
531
- if GEMINI_API_KEY: await asyncio.sleep(1)
532
 
533
  if __name__ == "__main__":
534
- if GEMINI_API_KEY:
535
- try: asyncio.run(main_test())
536
- except RuntimeError as e:
537
- if "asyncio.run() cannot be called from a running event loop" in str(e): print("Skip asyncio.run in existing loop.")
538
- else: raise
539
- else: print("GEMINI_API_KEY not set. Skipping main_test().")
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import numpy as np
8
  import textwrap
9
 
10
+ # --- Define Dummy Classes with unique names first ---
11
+ class _DummyGenAIClientModels: # Represents the dummy model service client
12
+ async def generate_content_async(self, model=None, contents=None, generation_config=None, safety_settings=None, stream=False, tools=None, tool_config=None):
13
+ print(f"Dummy _DummyGenAI.Client.models.generate_content_async called for model: {model}")
14
+ # Simplified dummy response structure
15
+ class DummyPart: text = "# Dummy response from _DummyGenAI async"
16
+ class DummyContent: parts = [DummyPart()]
17
+ class DummyCandidate: content = DummyContent(); finish_reason = "_DUMMY_STOP"; safety_ratings = []; token_count = 0; index = 0
18
+ class DummyResponse: candidates = [DummyCandidate()]; text = DummyCandidate.content.parts[0].text; prompt_feedback = None
19
+ return DummyResponse()
20
+
21
+ def generate_content(self, model=None, contents=None, generation_config=None, safety_settings=None, stream=False, tools=None, tool_config=None):
22
+ print(f"Dummy _DummyGenAI.Client.models.generate_content called for model: {model}")
23
+ class DummyPart: text = "# Dummy response from _DummyGenAI sync"
24
+ class DummyContent: parts = [DummyPart()]
25
+ class DummyCandidate: content = DummyContent(); finish_reason = "_DUMMY_STOP"; safety_ratings = []; token_count = 0; index = 0
26
+ class DummyResponse: candidates = [DummyCandidate()]; text = DummyCandidate.content.parts[0].text; prompt_feedback = None
27
+ return DummyResponse()
28
+
29
+ class _DummyGenAIClient: # Dummy Client
30
+ def __init__(self, api_key=None):
31
+ self.api_key = api_key
32
+ self.models = _DummyGenAIClientModels()
33
+ print(f"Dummy _DummyGenAI.Client initialized {'with api_key' if api_key else '(global API key expected)'}.")
34
+
35
+ class _DummyGenAIGenerativeModel:
36
+ def __init__(self, model_name_in, generation_config_in, safety_settings_in, system_instruction_in):
37
+ self.model_name = model_name_in
38
+ print(f"Dummy _DummyGenAIGenerativeModel initialized for {model_name_in}")
39
+ async def generate_content_async(self, contents, stream=False):
40
+ print(f"Dummy _DummyGenAIGenerativeModel.generate_content_async called for {self.model_name}")
41
+ class DummyPart: text = f"# Dummy response from dummy _DummyGenAIGenerativeModel ({self.model_name})"
42
+ class DummyContent: parts = [DummyPart()]
43
+ class DummyCandidate: content = DummyContent(); finish_reason = "_DUMMY_STOP"; safety_ratings = []
44
+ class DummyResponse: candidates = [DummyCandidate()]; prompt_feedback = None; text = DummyCandidate.content.parts[0].text
45
+ return DummyResponse()
46
+
47
+ class _ActualDummyGenAI: # type: ignore # Renamed the main dummy class
48
+ Client = _DummyGenAIClient # Assign inner class
49
+
50
+ @staticmethod
51
+ def configure(api_key):
52
+ print(f"Dummy _ActualDummyGenAI.configure called with API key: {'SET' if api_key else 'NOT SET'}")
53
+
54
+ @staticmethod
55
+ def GenerativeModel(model_name, generation_config=None, safety_settings=None, system_instruction=None):
56
+ print(f"Dummy _ActualDummyGenAI.GenerativeModel called for model: {model_name}")
57
+ return _DummyGenAIGenerativeModel(model_name, generation_config, safety_settings, system_instruction)
58
+
59
+ @staticmethod
60
+ def embed_content(model, content, task_type, title=None):
61
+ # This print is crucial for debugging which embed_content is called
62
+ print(f"Dummy _ActualDummyGenAI.embed_content called for model: {model}, task_type: {task_type}, title: {title}")
63
+ return {"embedding": [0.1] * 768}
64
+
65
+ class _ActualDummyGenAITypes: # type: ignore # Renamed the main dummy types class
66
+ @staticmethod
67
+ def GenerationConfig(**kwargs):
68
+ print(f"Dummy _ActualDummyGenAITypes.GenerationConfig created with: {kwargs}")
69
+ return dict(kwargs)
70
+
71
+ @staticmethod
72
+ def SafetySetting(category, threshold):
73
+ print(f"Dummy _ActualDummyGenAITypes.SafetySetting created: category={category}, threshold={threshold}")
74
+ return {"category": category, "threshold": threshold}
75
+
76
+ class HarmCategory:
77
+ HARM_CATEGORY_UNSPECIFIED = "HARM_CATEGORY_UNSPECIFIED"; HARM_CATEGORY_HARASSMENT = "HARM_CATEGORY_HARASSMENT"; HARM_CATEGORY_HATE_SPEECH = "HARM_CATEGORY_HATE_SPEECH"; HARM_CATEGORY_SEXUALLY_EXPLICIT = "HARM_CATEGORY_SEXUALLY_EXPLICIT"; HARM_CATEGORY_DANGEROUS_CONTENT = "HARM_CATEGORY_DANGEROUS_CONTENT"
78
+ class HarmBlockThreshold:
79
+ BLOCK_NONE = "BLOCK_NONE"; BLOCK_LOW_AND_ABOVE = "BLOCK_LOW_AND_ABOVE"; BLOCK_MEDIUM_AND_ABOVE = "BLOCK_MEDIUM_AND_ABOVE"; BLOCK_ONLY_HIGH = "BLOCK_ONLY_HIGH"
80
+ class FinishReason: # This should match the structure of the real FinishReason enum if possible
81
+ FINISH_REASON_UNSPECIFIED = "UNSPECIFIED"; STOP = "STOP"; MAX_TOKENS = "MAX_TOKENS"; SAFETY = "SAFETY"; RECITATION = "RECITATION"; OTHER = "OTHER"
82
+ class BlockedReason:
83
+ BLOCKED_REASON_UNSPECIFIED = "BLOCKED_REASON_UNSPECIFIED"; SAFETY = "SAFETY"; OTHER = "OTHER"
84
+
85
+ # --- Attempt to import the real library ---
86
+ _REAL_GENAI_LOADED = False
87
  try:
88
+ from google import generativeai as genai # This is the real 'genai'
89
+ from google.generativeai import types as genai_types # This is the real 'genai_types'
90
+ _REAL_GENAI_LOADED = True
91
+ logging.info("Successfully imported 'google.generativeai' library.")
 
92
  except ImportError:
93
+ genai = _ActualDummyGenAI() # Alias to our dummy genai class instance if import fails
94
+ genai_types = _ActualDummyGenAITypes() # Alias to our dummy genai_types class instance
95
+ logging.warning("Google Generative AI library not found. Using dummy implementations.")
96
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  # --- Configuration ---
99
  GEMINI_API_KEY = os.getenv('GEMINI_API_KEY', "")
 
 
100
  LLM_MODEL_NAME = "gemini-2.0-flash"
101
  GEMINI_EMBEDDING_MODEL_NAME = "gemini-embedding-exp-03-07"
102
 
 
103
  GENERATION_CONFIG_PARAMS = {
104
+ "temperature": 0.3, "top_p": 1.0, "top_k": 32, "max_output_tokens": 8192,
 
 
 
105
  }
106
 
107
  # Default safety settings list for Gemini
108
+ # Ensure genai_types used here is the one defined (real or dummy alias)
109
  try:
110
  DEFAULT_SAFETY_SETTINGS = [
111
  genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
 
113
  genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
114
  genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
115
  ]
116
+ except Exception as e_safety: # Catch broader exception if dummy types are not perfect
117
+ logging.warning(f"Could not define DEFAULT_SAFETY_SETTINGS using genai_types: {e_safety}. Using placeholder list of dicts.")
118
  DEFAULT_SAFETY_SETTINGS = [
119
  {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
120
  {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
 
 
121
  ]
122
 
123
+
124
  # Logging setup
125
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(filename)s:%(lineno)d - %(message)s')
126
 
127
+ if GEMINI_API_KEY and _REAL_GENAI_LOADED:
128
  try:
129
+ genai.configure(api_key=GEMINI_API_KEY) # genai is now consistently real or dummy
130
+ logging.info(f"Gemini API key configured globally (real genai active: {_REAL_GENAI_LOADED}).")
131
  except Exception as e:
132
  logging.error(f"Failed to configure Gemini API globally: {e}", exc_info=True)
133
+ elif not GEMINI_API_KEY and _REAL_GENAI_LOADED:
134
+ logging.warning("GEMINI_API_KEY environment variable not set, but real library is loaded. API calls will likely fail.")
135
+ elif not _REAL_GENAI_LOADED:
136
+ logging.info("Operating in DUMMY mode because 'google-generativeai' library was not found.")
137
+ if GEMINI_API_KEY: # Call dummy configure if key is present but library is dummy
138
+ genai.configure(api_key=GEMINI_API_KEY)
139
 
140
 
141
  # --- RAG Documents Definition (Example) ---
 
164
  self.embedding_model_name = embedding_model_name
165
  self.documents_df = documents_df.copy()
166
  self.embeddings_generated = False
167
+ # Use _REAL_GENAI_LOADED to determine if real client is available
168
+ self.real_client_available_for_rag = _REAL_GENAI_LOADED and bool(GEMINI_API_KEY)
169
 
170
+ if self.real_client_available_for_rag:
171
  try:
172
  self._precompute_embeddings()
173
  self.embeddings_generated = True
174
+ # This log should only appear if real genai.embed_content was used without printing dummy message
175
+ logging.info(f"RAG embeddings precomputed using REAL genai.embed_content for '{self.embedding_model_name}'.")
176
+ except Exception as e: logging.error(f"RAG precomputation error with real client: {e}", exc_info=True)
177
  else:
178
+ logging.warning(f"RAG embeddings not precomputed. Real GenAI loaded: {_REAL_GENAI_LOADED}, API Key set: {bool(GEMINI_API_KEY)}.")
179
+ # If in dummy mode, call dummy precompute to see its log
180
+ if not _REAL_GENAI_LOADED:
181
+ self._precompute_embeddings() # This will call dummy genai.embed_content
182
 
183
  def _embed_fn(self, title: str, text: str) -> list[float]:
184
+ # genai here is now consistently the real or the aliased dummy
185
  try:
186
  content_to_embed = text if text else title
187
  if not content_to_embed: return [0.0] * 768
188
+ # The call to genai.embed_content will print its own message if it's the dummy
189
  return genai.embed_content(model=self.embedding_model_name, content=content_to_embed, task_type="retrieval_document", title=title if title else None)["embedding"]
190
  except Exception as e:
191
+ logging.error(f"Error in _embed_fn for '{title}' (real_genai_loaded: {_REAL_GENAI_LOADED}): {e}", exc_info=True)
192
  return [0.0] * 768
193
 
194
  def _precompute_embeddings(self):
195
  if 'Embeddings' not in self.documents_df.columns: self.documents_df['Embeddings'] = pd.Series(dtype='object')
196
  mask = (self.documents_df['Text'].notna() & (self.documents_df['Text'] != '')) | (self.documents_df['Title'].notna() & (self.documents_df['Title'] != ''))
197
  if not mask.any(): logging.warning("No content for RAG embeddings."); return
198
+ # This will call _embed_fn, which calls the current 'genai.embed_content' (real or dummy)
199
  self.documents_df.loc[mask, 'Embeddings'] = self.documents_df[mask].apply(lambda row: self._embed_fn(row.get('Title', ''), row.get('Text', '')), axis=1)
200
+ logging.info(f"Applied RAG embedding function to {mask.sum()} rows (real_genai_loaded: {_REAL_GENAI_LOADED}).")
201
+
202
 
203
  def retrieve_relevant_info(self, query_text: str, top_k: int = 2) -> str:
204
+ if not (_REAL_GENAI_LOADED and GEMINI_API_KEY): # Check if we can use real embeddings
205
+ # If not using real, and dummy is active, dummy embed_content will print.
206
+ # If real loaded but no key, this will also be skipped for actual API call.
207
+ if not _REAL_GENAI_LOADED: # If in dummy mode, call dummy embed_content to see log
208
+ genai.embed_content(model=self.embedding_model_name, content=query_text, task_type="retrieval_query") # Call for log
209
+ logging.warning(f"Skipping real RAG retrieval. Real GenAI: {_REAL_GENAI_LOADED}, API Key: {bool(GEMINI_API_KEY)}")
210
+ return "\n[RAG Context]\nReal RAG retrieval skipped (check logs for mode).\n"
211
+
212
+ # At this point, _REAL_GENAI_LOADED and GEMINI_API_KEY are true
213
+ # So, genai.embed_content should be the real one.
214
  try:
215
  query_embedding = np.array(genai.embed_content(model=self.embedding_model_name, content=query_text, task_type="retrieval_query")["embedding"])
216
  valid_df = self.documents_df.dropna(subset=['Embeddings'])
217
+ valid_df = valid_df[valid_df['Embeddings'].apply(lambda x: isinstance(x, (list, np.ndarray)) and len(x) > 0 and np.any(x))] # Ensure not all zeros
218
+ if valid_df.empty: return "\n[RAG Context]\nNo valid document embeddings after filtering.\n"
219
 
220
  doc_embeddings = np.stack(valid_df['Embeddings'].apply(np.array).values)
221
  if query_embedding.shape[0] != doc_embeddings.shape[1]: return "\n[RAG Context]\nEmbedding dimension mismatch.\n"
 
228
  passages = "".join([f"\n[RAG Context from: '{valid_df.iloc[i]['Title']}']\n{valid_df.iloc[i]['Text']}\n" for i in idx if i < len(valid_df)])
229
  return passages if passages else "\n[RAG Context]\nNo relevant passages found after search.\n"
230
  except Exception as e:
231
+ logging.error(f"Error in RAG retrieve_relevant_info (real mode): {e}", exc_info=True)
232
+ return f"\n[RAG Context]\nError during RAG retrieval (real mode): {type(e).__name__} - {e}\n"
233
 
234
  # --- PandasLLM Class (Gemini-Powered using genai.Client) ---
235
  class PandasLLM:
 
238
  safety_settings_list: list,
239
  data_privacy=True, force_sandbox=True):
240
  self.llm_model_name = llm_model_name
241
+ self.generation_config_dict = generation_config_dict
242
+ self.safety_settings_list = safety_settings_list
243
  self.data_privacy = data_privacy
244
  self.force_sandbox = force_sandbox
245
  self.client = None
246
+ self.model_service = None
247
+
248
+ if _REAL_GENAI_LOADED and GEMINI_API_KEY:
249
+ try:
250
+ self.client = genai.Client() # Should be the REAL genai.Client
251
+ self.model_service = self.client.models
252
+ logging.info(f"PandasLLM: Initialized with REAL genai.Client().models for '{self.llm_model_name}'.")
253
+ except Exception as e:
254
+ logging.error(f"Failed to initialize REAL PandasLLM with genai.Client: {e}", exc_info=True)
255
+ # No explicit fallback to dummy here; _call_gemini_api_async will use the global dummy if self.model_service is None and _REAL_GENAI_LOADED is False
256
+ else:
257
+ logging.warning(f"PandasLLM: Not using REAL genai.Client. RealGenAILoaded: {_REAL_GENAI_LOADED}, APIKeySet: {bool(GEMINI_API_KEY)}. Will use DUMMY if library not loaded.")
258
+ if not _REAL_GENAI_LOADED: # If import failed, genai is already the dummy
259
+ self.client = genai.Client() # Instantiates _ActualDummyGenAI.Client
260
+ self.model_service = self.client.models # Uses _DummyGenAIClientModels
261
+ logging.info("PandasLLM: Initialized with DUMMY genai.Client().models because real library failed to load.")
262
 
 
 
263
 
264
+ async def _call_gemini_api_async(self, prompt_text: str, history: list = None) -> str:
265
+ # Determine if we should use the real service or expect dummy behavior
266
+ use_real_service = _REAL_GENAI_LOADED and GEMINI_API_KEY and self.model_service is not None
 
267
 
268
+ # If not using real service, and we are in dummy mode (library not loaded),
269
+ # self.model_service should be the dummy one.
270
+ # If real library loaded but no key, self.model_service might be None or real (but calls would fail).
 
 
 
 
 
 
 
 
271
 
272
+ active_model_service = self.model_service
273
+ if not use_real_service and not _REAL_GENAI_LOADED:
274
+ # Ensure we have a dummy service if we are in full dummy mode and self.model_service wasn't set
275
+ # This case should ideally be covered by PandasLLM.__init__
276
+ if active_model_service is None:
277
+ logging.debug("PandasLLM._call_gemini_api_async: active_model_service is None in dummy mode, attempting to get dummy service.")
278
+ dummy_client_instance = _ActualDummyGenAI.Client() # Get a fresh dummy client models service
279
+ active_model_service = dummy_client_instance.models
280
+
281
+
282
+ if not active_model_service:
283
+ logging.error("PandasLLM: Model service not available (real or dummy). Cannot call API.")
284
+ return "# Error: Gemini model service not available."
285
 
 
 
 
 
286
 
287
  gemini_history = []
288
+ if history:
289
  for entry in history:
 
290
  role_for_api = "model" if entry.get("role") == "assistant" else entry.get("role", "user")
 
291
  text_content = entry.get("content", "")
292
  gemini_history.append({"role": role_for_api, "parts": [{"text": text_content}]})
293
 
294
  current_prompt_content = [{"role": "user", "parts": [{"text": prompt_text}]}]
295
  contents_for_api = gemini_history + current_prompt_content
296
 
 
297
  model_id_for_api = self.llm_model_name
298
  if not model_id_for_api.startswith("models/"):
299
  model_id_for_api = f"models/{model_id_for_api}"
300
 
 
301
  api_generation_config = None
302
  if self.generation_config_dict:
303
+ try: # genai_types is now consistently real or dummy alias
304
  api_generation_config = genai_types.GenerationConfig(**self.generation_config_dict)
305
  except Exception as e_cfg:
306
+ logging.error(f"Error creating GenerationConfig object (real_loaded: {_REAL_GENAI_LOADED}): {e_cfg}. Using dict fallback.")
307
+ api_generation_config = self.generation_config_dict
308
 
309
+ logging.info(f"\n--- Calling Gemini API (model: {model_id_for_api}, RealMode: {use_real_service}) ---\nConfig: {api_generation_config}\nSafety: {bool(self.safety_settings_list)}\nContent (last part text): {contents_for_api[-1]['parts'][0]['text'][:100]}...\n")
310
 
311
  try:
312
+ # This call will use either the real model_service or the dummy one.
313
+ # The dummy service's methods have print statements.
314
+ response = await active_model_service.generate_content_async(
315
  model=model_id_for_api,
316
  contents=contents_for_api,
317
  generation_config=api_generation_config,
 
328
  llm_output = ""
329
  if hasattr(response, 'text') and isinstance(response.text, str):
330
  llm_output = response.text
331
+ elif response.candidates: # Standard way to get text from Gemini response
332
  candidate = response.candidates[0]
333
  if candidate.content and candidate.content.parts:
334
  llm_output = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
335
 
336
  if not llm_output and candidate.finish_reason:
337
  finish_reason_val = candidate.finish_reason
338
+ # Try to get enum name if available (for real API) or use string (for dummy)
339
+ finish_reason_str = str(finish_reason_val.name if hasattr(finish_reason_val, 'name') and not isinstance(finish_reason_val, str) else finish_reason_val)
340
+
341
 
342
  if finish_reason_str == "SAFETY":
343
  safety_messages = []
 
356
  return f"# Error: Unexpected API response structure: {str(response)[:200]}"
357
 
358
  return llm_output
359
+
360
+ # Specific exceptions for the real API, might not be raised by dummy
361
+ except genai_types.BlockedPromptException as bpe: # type: ignore
362
  logging.error(f"Prompt blocked (BlockedPromptException): {bpe}", exc_info=True)
363
  return f"# Error: Prompt blocked. Details: {bpe}"
364
+ except genai_types.StopCandidateException as sce: # type: ignore
365
  logging.error(f"Candidate stopped (StopCandidateException): {sce}", exc_info=True)
366
  return f"# Error: Content generation stopped. Details: {sce}"
367
  except Exception as e:
368
+ logging.error(f"Error calling Gemini API (RealMode: {use_real_service}): {e}", exc_info=True)
369
  return f"# Error during API call: {type(e).__name__} - {str(e)[:100]}."
370
 
371
 
 
386
  except IndexError: code_to_execute = ""
387
 
388
  if llm_response_text.startswith("# Error:") or not code_to_execute.strip():
389
+ # Log if it's an error from LLM or if it's just non-code/comment response.
390
+ logging.warning(f"LLM response is an error, or no valid Python code block found for sandbox. Raw LLM response: {llm_response_text[:200]}")
391
  if not code_to_execute.strip() and not llm_response_text.startswith("# Error:"):
392
+ # If it's not an error and not code, it might be a natural language refusal or comment.
393
+ if "```" not in llm_response_text and len(llm_response_text.strip()) > 0: # Heuristic for non-code text
394
+ logging.info(f"LLM produced text output instead of Python code in sandbox mode. Passing through: {llm_response_text[:200]}")
395
  return llm_response_text
396
 
397
  logging.info(f"\n--- Code to Execute: ---\n{code_to_execute}\n----------------------\n")
 
402
  if dataframes_dict:
403
  for name, df_instance in dataframes_dict.items():
404
  if isinstance(df_instance, pd.DataFrame): exec_globals[f"df_{name}"] = df_instance
405
+ else: logging.warning(f"Item '{name}' not a DataFrame for sandbox exec.")
406
  try:
407
  exec(code_to_execute, exec_globals, {})
408
  final_output_str = sys.stdout.getvalue()
409
  if not final_output_str.strip():
410
  if not any(ln.strip() and not ln.strip().startswith("#") for ln in code_to_execute.splitlines()):
411
+ return "# LLM generated only comments or empty code. No output produced by sandbox."
412
+ return "# Code executed successfully by sandbox, but it did not produce any printed output. Ensure print() for results."
413
  return final_output_str
414
  except Exception as e:
415
+ logging.error(f"Sandbox Execution Error: {e}\nCode was:\n{code_to_execute}", exc_info=True)
416
  indented_code = textwrap.indent(code_to_execute, '# ')
417
+ return f"# Sandbox Execution Error: {type(e).__name__}: {e}\n# --- Code that caused error: ---\n{indented_code}"
418
  finally: sys.stdout = old_stdout
419
  else: return llm_response_text
420
 
 
432
  self.rag_system = AdvancedRAGSystem(rag_documents_df, embedding_model_name)
433
  self.all_dataframes = all_dataframes if all_dataframes else {}
434
  self.schemas_representation = get_all_schemas_representation(self.all_dataframes)
435
+ self.chat_history = []
436
+ logging.info(f"EmployerBrandingAgent Initialized (Real GenAI Loaded: {_REAL_GENAI_LOADED}).")
437
 
438
  def _build_prompt(self, user_query: str, role="Employer Branding Analyst & Strategist", task_decomposition_hint=None, cot_hint=True) -> str:
439
  prompt = f"You are a highly skilled '{role}'. Your goal is to provide actionable employer branding insights by analyzing Pandas DataFrames and RAG documents.\n"
 
459
  prompt += "\n--- AVAILABLE DATA AND SCHEMAS ---\n"
460
  prompt += self.schemas_representation if self.schemas_representation.strip() != "No DataFrames provided." else "No DataFrames loaded.\n"
461
 
462
+ # RAG retrieval will use the current state of 'genai' (real or dummy)
463
  rag_context = self.rag_system.retrieve_relevant_info(user_query)
464
+ meaningful_rag_keywords = ["Error", "No valid", "No relevant", "Cannot retrieve", "not available", "not generated", "Skipped"]
465
  is_meaningful_rag = bool(rag_context.strip()) and not any(keyword in rag_context for keyword in meaningful_rag_keywords)
466
+
467
+ if is_meaningful_rag:
468
+ prompt += f"\n--- RAG CONTEXT (Real GenAI for RAG: {self.rag_system.real_client_available_for_rag}) ---\n{rag_context}\n"
469
+ else:
470
+ prompt += f"\n--- RAG CONTEXT (Real GenAI for RAG: {self.rag_system.real_client_available_for_rag}) ---\nNo specific RAG context found, RAG error, or RAG skipped. Details: {rag_context[:100]}...\n"
471
+
472
 
473
  prompt += f"\n--- USER QUERY ---\n{user_query}\n"
474
  if task_decomposition_hint: prompt += f"\n--- GUIDANCE ---\n{task_decomposition_hint}\n"
 
483
  return prompt
484
 
485
  async def process_query(self, user_query: str, role="Employer Branding Analyst & Strategist", task_decomposition_hint=None, cot_hint=True) -> str:
486
+ current_turn_history_for_llm = self.chat_history[:]
 
 
487
  self.chat_history.append({"role": "user", "content": user_query})
 
488
  full_prompt = self._build_prompt(user_query, role, task_decomposition_hint, cot_hint)
489
+ logging.info(f"Built prompt for query: {user_query[:100]}... (Real GenAI: {_REAL_GENAI_LOADED})")
 
 
490
  response_text = await self.pandas_llm.query(full_prompt, self.all_dataframes, history=current_turn_history_for_llm)
 
 
 
491
  self.chat_history.append({"role": "assistant", "content": response_text})
 
492
  MAX_HISTORY_TURNS = 5
493
  if len(self.chat_history) > MAX_HISTORY_TURNS * 2:
494
  self.chat_history = self.chat_history[-(MAX_HISTORY_TURNS * 2):]
 
504
 
505
  # --- Example Usage (Conceptual) ---
506
  async def main_test():
507
+ # This test will reflect whether _REAL_GENAI_LOADED is true or false
508
+ logging.info(f"Starting main_test for EmployerBrandingAgent (Real GenAI Loaded: {_REAL_GENAI_LOADED}, API Key Set: {bool(GEMINI_API_KEY)})")
509
+
510
  df_follower_stats = pd.DataFrame({'date': pd.to_datetime(['2023-01-01']), 'country': ['USA'], 'new_followers': [10]})
511
  df_posts = pd.DataFrame({'post_id': [1], 'theme': ['Culture'], 'engagement_rate': [0.05]})
512
  test_dataframes = {"follower_stats": df_follower_stats, "posts": df_posts}
513
 
514
+ if not GEMINI_API_KEY and _REAL_GENAI_LOADED:
515
+ logging.warning("GEMINI_API_KEY not set but real library loaded. Real API calls in test will fail.")
516
+
517
  agent = EmployerBrandingAgent(LLM_MODEL_NAME, GENERATION_CONFIG_PARAMS, DEFAULT_SAFETY_SETTINGS, test_dataframes, df_rag_documents, GEMINI_EMBEDDING_MODEL_NAME, force_sandbox=True)
518
 
519
  queries = ["Which post theme has the highest average engagement rate? Provide an insight.", "Hello!"]
 
521
  logging.info(f"\n\n--- Query: {query} ---")
522
  response = await agent.process_query(user_query=query)
523
  logging.info(f"--- Response for '{query}': ---\n{response}\n---------------------------\n")
524
+ if _REAL_GENAI_LOADED and GEMINI_API_KEY: await asyncio.sleep(0.1) # Small delay for real API
525
 
526
  if __name__ == "__main__":
527
+ # Note: To test with real API, ensure GEMINI_API_KEY is set in your environment
528
+ # and 'google-generativeai' is installed.
529
+ # Otherwise, it will run in dummy mode.
530
+
531
+ # Check mode before running test
532
+ print(f"Script starting... Real GenAI Library Loaded: {_REAL_GENAI_LOADED}, API Key Set: {bool(GEMINI_API_KEY)}")
533
+
534
+ try:
535
+ asyncio.run(main_test())
536
+ except RuntimeError as e:
537
+ if "asyncio.run() cannot be called from a running event loop" in str(e):
538
+ print("Skipping asyncio.run(main_test()) as it seems to be in an existing event loop (e.g., Jupyter).")
539
+ print("If in Jupyter, you might need to 'await main_test()' in a cell after defining it.")
540
+ else:
541
+ raise
542
+ except Exception as e_main:
543
+ print(f"Error during main_test execution: {e_main}")
544
+