Spaces:
Sleeping
Sleeping
Update eb_agent_module.py
Browse files- eb_agent_module.py +89 -100
eb_agent_module.py
CHANGED
@@ -11,7 +11,6 @@ import textwrap
|
|
11 |
class _DummyGenAIClientModels: # Represents the dummy model service client
|
12 |
async def generate_content_async(self, model=None, contents=None, generation_config=None, safety_settings=None, stream=False, tools=None, tool_config=None):
|
13 |
print(f"Dummy _DummyGenAI.Client.models.generate_content_async called for model: {model}")
|
14 |
-
# Simplified dummy response structure
|
15 |
class DummyPart: text = "# Dummy response from _DummyGenAI async"
|
16 |
class DummyContent: parts = [DummyPart()]
|
17 |
class DummyCandidate: content = DummyContent(); finish_reason = "_DUMMY_STOP"; safety_ratings = []; token_count = 0; index = 0
|
@@ -45,7 +44,7 @@ class _DummyGenAIGenerativeModel:
|
|
45 |
return DummyResponse()
|
46 |
|
47 |
class _ActualDummyGenAI: # type: ignore # Renamed the main dummy class
|
48 |
-
Client = _DummyGenAIClient
|
49 |
|
50 |
@staticmethod
|
51 |
def configure(api_key):
|
@@ -58,41 +57,56 @@ class _ActualDummyGenAI: # type: ignore # Renamed the main dummy class
|
|
58 |
|
59 |
@staticmethod
|
60 |
def embed_content(model, content, task_type, title=None):
|
61 |
-
# This print is crucial for debugging which embed_content is called
|
62 |
print(f"Dummy _ActualDummyGenAI.embed_content called for model: {model}, task_type: {task_type}, title: {title}")
|
63 |
return {"embedding": [0.1] * 768}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
-
class _ActualDummyGenAITypes: # type: ignore # Renamed the main dummy types class
|
66 |
-
@staticmethod
|
67 |
-
def GenerationConfig(**kwargs):
|
68 |
-
print(f"Dummy _ActualDummyGenAITypes.GenerationConfig created with: {kwargs}")
|
69 |
-
return dict(kwargs)
|
70 |
-
|
71 |
-
@staticmethod
|
72 |
-
def SafetySetting(category, threshold):
|
73 |
-
print(f"Dummy _ActualDummyGenAITypes.SafetySetting created: category={category}, threshold={threshold}")
|
74 |
-
return {"category": category, "threshold": threshold}
|
75 |
-
|
76 |
-
class HarmCategory:
|
77 |
-
HARM_CATEGORY_UNSPECIFIED = "HARM_CATEGORY_UNSPECIFIED"; HARM_CATEGORY_HARASSMENT = "HARM_CATEGORY_HARASSMENT"; HARM_CATEGORY_HATE_SPEECH = "HARM_CATEGORY_HATE_SPEECH"; HARM_CATEGORY_SEXUALLY_EXPLICIT = "HARM_CATEGORY_SEXUALLY_EXPLICIT"; HARM_CATEGORY_DANGEROUS_CONTENT = "HARM_CATEGORY_DANGEROUS_CONTENT"
|
78 |
-
class HarmBlockThreshold:
|
79 |
-
BLOCK_NONE = "BLOCK_NONE"; BLOCK_LOW_AND_ABOVE = "BLOCK_LOW_AND_ABOVE"; BLOCK_MEDIUM_AND_ABOVE = "BLOCK_MEDIUM_AND_ABOVE"; BLOCK_ONLY_HIGH = "BLOCK_ONLY_HIGH"
|
80 |
-
class FinishReason: # This should match the structure of the real FinishReason enum if possible
|
81 |
-
FINISH_REASON_UNSPECIFIED = "UNSPECIFIED"; STOP = "STOP"; MAX_TOKENS = "MAX_TOKENS"; SAFETY = "SAFETY"; RECITATION = "RECITATION"; OTHER = "OTHER"
|
82 |
-
class BlockedReason:
|
83 |
-
BLOCKED_REASON_UNSPECIFIED = "BLOCKED_REASON_UNSPECIFIED"; SAFETY = "SAFETY"; OTHER = "OTHER"
|
84 |
|
85 |
# --- Attempt to import the real library ---
|
86 |
_REAL_GENAI_LOADED = False
|
|
|
|
|
87 |
try:
|
88 |
-
from google import
|
89 |
-
from google
|
|
|
90 |
_REAL_GENAI_LOADED = True
|
91 |
-
logging.info("Successfully imported 'google.
|
92 |
except ImportError:
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
|
98 |
# --- Configuration ---
|
@@ -105,7 +119,7 @@ GENERATION_CONFIG_PARAMS = {
|
|
105 |
}
|
106 |
|
107 |
# Default safety settings list for Gemini
|
108 |
-
#
|
109 |
try:
|
110 |
DEFAULT_SAFETY_SETTINGS = [
|
111 |
genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
|
@@ -113,8 +127,8 @@ try:
|
|
113 |
genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
|
114 |
genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
|
115 |
]
|
116 |
-
except Exception as e_safety:
|
117 |
-
logging.warning(f"Could not define DEFAULT_SAFETY_SETTINGS using genai_types: {e_safety}. Using placeholder list of dicts.")
|
118 |
DEFAULT_SAFETY_SETTINGS = [
|
119 |
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
120 |
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
@@ -126,16 +140,16 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
126 |
|
127 |
if GEMINI_API_KEY and _REAL_GENAI_LOADED:
|
128 |
try:
|
129 |
-
genai.configure(api_key=GEMINI_API_KEY)
|
130 |
logging.info(f"Gemini API key configured globally (real genai active: {_REAL_GENAI_LOADED}).")
|
131 |
except Exception as e:
|
132 |
logging.error(f"Failed to configure Gemini API globally: {e}", exc_info=True)
|
133 |
elif not GEMINI_API_KEY and _REAL_GENAI_LOADED:
|
134 |
-
logging.warning("GEMINI_API_KEY environment variable not set, but real library is loaded. API calls will likely fail.")
|
135 |
elif not _REAL_GENAI_LOADED:
|
136 |
-
logging.info("Operating in DUMMY mode because 'google
|
137 |
-
if GEMINI_API_KEY:
|
138 |
-
genai.configure(api_key=GEMINI_API_KEY)
|
139 |
|
140 |
|
141 |
# --- RAG Documents Definition (Example) ---
|
@@ -164,28 +178,24 @@ class AdvancedRAGSystem:
|
|
164 |
self.embedding_model_name = embedding_model_name
|
165 |
self.documents_df = documents_df.copy()
|
166 |
self.embeddings_generated = False
|
167 |
-
# Use _REAL_GENAI_LOADED to determine if real client is available
|
168 |
self.real_client_available_for_rag = _REAL_GENAI_LOADED and bool(GEMINI_API_KEY)
|
169 |
|
170 |
if self.real_client_available_for_rag:
|
171 |
try:
|
172 |
self._precompute_embeddings()
|
173 |
self.embeddings_generated = True
|
174 |
-
# This log should only appear if real genai.embed_content was used without printing dummy message
|
175 |
logging.info(f"RAG embeddings precomputed using REAL genai.embed_content for '{self.embedding_model_name}'.")
|
176 |
except Exception as e: logging.error(f"RAG precomputation error with real client: {e}", exc_info=True)
|
177 |
else:
|
178 |
-
logging.warning(f"RAG
|
179 |
-
# If in dummy mode
|
180 |
-
|
181 |
-
self._precompute_embeddings() # This will call dummy genai.embed_content
|
182 |
|
183 |
def _embed_fn(self, title: str, text: str) -> list[float]:
|
184 |
-
# genai here is now consistently the real or the aliased dummy
|
185 |
try:
|
186 |
content_to_embed = text if text else title
|
187 |
if not content_to_embed: return [0.0] * 768
|
188 |
-
#
|
189 |
return genai.embed_content(model=self.embedding_model_name, content=content_to_embed, task_type="retrieval_document", title=title if title else None)["embedding"]
|
190 |
except Exception as e:
|
191 |
logging.error(f"Error in _embed_fn for '{title}' (real_genai_loaded: {_REAL_GENAI_LOADED}): {e}", exc_info=True)
|
@@ -195,26 +205,22 @@ class AdvancedRAGSystem:
|
|
195 |
if 'Embeddings' not in self.documents_df.columns: self.documents_df['Embeddings'] = pd.Series(dtype='object')
|
196 |
mask = (self.documents_df['Text'].notna() & (self.documents_df['Text'] != '')) | (self.documents_df['Title'].notna() & (self.documents_df['Title'] != ''))
|
197 |
if not mask.any(): logging.warning("No content for RAG embeddings."); return
|
198 |
-
# This will call _embed_fn, which calls the current 'genai.embed_content' (real or dummy)
|
199 |
self.documents_df.loc[mask, 'Embeddings'] = self.documents_df[mask].apply(lambda row: self._embed_fn(row.get('Title', ''), row.get('Text', '')), axis=1)
|
200 |
logging.info(f"Applied RAG embedding function to {mask.sum()} rows (real_genai_loaded: {_REAL_GENAI_LOADED}).")
|
201 |
|
202 |
|
203 |
def retrieve_relevant_info(self, query_text: str, top_k: int = 2) -> str:
|
204 |
-
if not
|
205 |
-
# If
|
206 |
-
|
207 |
-
if not _REAL_GENAI_LOADED: # If in dummy mode, call dummy embed_content to see log
|
208 |
-
genai.embed_content(model=self.embedding_model_name, content=query_text, task_type="retrieval_query") # Call for log
|
209 |
logging.warning(f"Skipping real RAG retrieval. Real GenAI: {_REAL_GENAI_LOADED}, API Key: {bool(GEMINI_API_KEY)}")
|
210 |
return "\n[RAG Context]\nReal RAG retrieval skipped (check logs for mode).\n"
|
211 |
|
212 |
-
# At this point, _REAL_GENAI_LOADED and GEMINI_API_KEY are true
|
213 |
-
# So, genai.embed_content should be the real one.
|
214 |
try:
|
|
|
215 |
query_embedding = np.array(genai.embed_content(model=self.embedding_model_name, content=query_text, task_type="retrieval_query")["embedding"])
|
216 |
valid_df = self.documents_df.dropna(subset=['Embeddings'])
|
217 |
-
valid_df = valid_df[valid_df['Embeddings'].apply(lambda x: isinstance(x, (list, np.ndarray)) and len(x) > 0 and np.any(x))]
|
218 |
if valid_df.empty: return "\n[RAG Context]\nNo valid document embeddings after filtering.\n"
|
219 |
|
220 |
doc_embeddings = np.stack(valid_df['Embeddings'].apply(np.array).values)
|
@@ -247,42 +253,32 @@ class PandasLLM:
|
|
247 |
|
248 |
if _REAL_GENAI_LOADED and GEMINI_API_KEY:
|
249 |
try:
|
250 |
-
self.client = genai.Client() #
|
251 |
self.model_service = self.client.models
|
252 |
logging.info(f"PandasLLM: Initialized with REAL genai.Client().models for '{self.llm_model_name}'.")
|
253 |
except Exception as e:
|
254 |
logging.error(f"Failed to initialize REAL PandasLLM with genai.Client: {e}", exc_info=True)
|
255 |
-
#
|
256 |
else:
|
257 |
-
logging.warning(f"PandasLLM: Not using REAL genai.Client. RealGenAILoaded: {_REAL_GENAI_LOADED}, APIKeySet: {bool(GEMINI_API_KEY)}.
|
258 |
if not _REAL_GENAI_LOADED: # If import failed, genai is already the dummy
|
259 |
self.client = genai.Client() # Instantiates _ActualDummyGenAI.Client
|
260 |
-
self.model_service = self.client.models
|
261 |
-
logging.info("PandasLLM: Initialized with DUMMY genai.Client().models
|
262 |
|
263 |
|
264 |
async def _call_gemini_api_async(self, prompt_text: str, history: list = None) -> str:
|
265 |
-
# Determine if we should use the real service or expect dummy behavior
|
266 |
use_real_service = _REAL_GENAI_LOADED and GEMINI_API_KEY and self.model_service is not None
|
267 |
|
268 |
-
# If not using real service, and we are in dummy mode (library not loaded),
|
269 |
-
# self.model_service should be the dummy one.
|
270 |
-
# If real library loaded but no key, self.model_service might be None or real (but calls would fail).
|
271 |
-
|
272 |
active_model_service = self.model_service
|
273 |
-
if not use_real_service and not _REAL_GENAI_LOADED:
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
logging.debug("PandasLLM._call_gemini_api_async: active_model_service is None in dummy mode, attempting to get dummy service.")
|
278 |
-
dummy_client_instance = _ActualDummyGenAI.Client() # Get a fresh dummy client models service
|
279 |
-
active_model_service = dummy_client_instance.models
|
280 |
-
|
281 |
|
282 |
if not active_model_service:
|
283 |
-
logging.error("PandasLLM: Model service not available (
|
284 |
-
return "# Error: Gemini model service not available."
|
285 |
-
|
286 |
|
287 |
gemini_history = []
|
288 |
if history:
|
@@ -300,17 +296,15 @@ class PandasLLM:
|
|
300 |
|
301 |
api_generation_config = None
|
302 |
if self.generation_config_dict:
|
303 |
-
try:
|
304 |
api_generation_config = genai_types.GenerationConfig(**self.generation_config_dict)
|
305 |
except Exception as e_cfg:
|
306 |
-
logging.error(f"Error creating GenerationConfig
|
307 |
api_generation_config = self.generation_config_dict
|
308 |
|
309 |
logging.info(f"\n--- Calling Gemini API (model: {model_id_for_api}, RealMode: {use_real_service}) ---\nConfig: {api_generation_config}\nSafety: {bool(self.safety_settings_list)}\nContent (last part text): {contents_for_api[-1]['parts'][0]['text'][:100]}...\n")
|
310 |
|
311 |
try:
|
312 |
-
# This call will use either the real model_service or the dummy one.
|
313 |
-
# The dummy service's methods have print statements.
|
314 |
response = await active_model_service.generate_content_async(
|
315 |
model=model_id_for_api,
|
316 |
contents=contents_for_api,
|
@@ -328,17 +322,15 @@ class PandasLLM:
|
|
328 |
llm_output = ""
|
329 |
if hasattr(response, 'text') and isinstance(response.text, str):
|
330 |
llm_output = response.text
|
331 |
-
elif response.candidates:
|
332 |
candidate = response.candidates[0]
|
333 |
if candidate.content and candidate.content.parts:
|
334 |
llm_output = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
|
335 |
|
336 |
if not llm_output and candidate.finish_reason:
|
337 |
finish_reason_val = candidate.finish_reason
|
338 |
-
# Try to get enum name if available (for real API) or use string (for dummy)
|
339 |
finish_reason_str = str(finish_reason_val.name if hasattr(finish_reason_val, 'name') and not isinstance(finish_reason_val, str) else finish_reason_val)
|
340 |
|
341 |
-
|
342 |
if finish_reason_str == "SAFETY":
|
343 |
safety_messages = []
|
344 |
if hasattr(candidate, 'safety_ratings') and candidate.safety_ratings:
|
@@ -357,13 +349,20 @@ class PandasLLM:
|
|
357 |
|
358 |
return llm_output
|
359 |
|
360 |
-
#
|
361 |
-
except genai_types.BlockedPromptException as bpe:
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
except Exception as e:
|
368 |
logging.error(f"Error calling Gemini API (RealMode: {use_real_service}): {e}", exc_info=True)
|
369 |
return f"# Error during API call: {type(e).__name__} - {str(e)[:100]}."
|
@@ -386,11 +385,9 @@ class PandasLLM:
|
|
386 |
except IndexError: code_to_execute = ""
|
387 |
|
388 |
if llm_response_text.startswith("# Error:") or not code_to_execute.strip():
|
389 |
-
# Log if it's an error from LLM or if it's just non-code/comment response.
|
390 |
logging.warning(f"LLM response is an error, or no valid Python code block found for sandbox. Raw LLM response: {llm_response_text[:200]}")
|
391 |
if not code_to_execute.strip() and not llm_response_text.startswith("# Error:"):
|
392 |
-
|
393 |
-
if "```" not in llm_response_text and len(llm_response_text.strip()) > 0: # Heuristic for non-code text
|
394 |
logging.info(f"LLM produced text output instead of Python code in sandbox mode. Passing through: {llm_response_text[:200]}")
|
395 |
return llm_response_text
|
396 |
|
@@ -459,7 +456,6 @@ class EmployerBrandingAgent:
|
|
459 |
prompt += "\n--- AVAILABLE DATA AND SCHEMAS ---\n"
|
460 |
prompt += self.schemas_representation if self.schemas_representation.strip() != "No DataFrames provided." else "No DataFrames loaded.\n"
|
461 |
|
462 |
-
# RAG retrieval will use the current state of 'genai' (real or dummy)
|
463 |
rag_context = self.rag_system.retrieve_relevant_info(user_query)
|
464 |
meaningful_rag_keywords = ["Error", "No valid", "No relevant", "Cannot retrieve", "not available", "not generated", "Skipped"]
|
465 |
is_meaningful_rag = bool(rag_context.strip()) and not any(keyword in rag_context for keyword in meaningful_rag_keywords)
|
@@ -504,7 +500,6 @@ class EmployerBrandingAgent:
|
|
504 |
|
505 |
# --- Example Usage (Conceptual) ---
|
506 |
async def main_test():
|
507 |
-
# This test will reflect whether _REAL_GENAI_LOADED is true or false
|
508 |
logging.info(f"Starting main_test for EmployerBrandingAgent (Real GenAI Loaded: {_REAL_GENAI_LOADED}, API Key Set: {bool(GEMINI_API_KEY)})")
|
509 |
|
510 |
df_follower_stats = pd.DataFrame({'date': pd.to_datetime(['2023-01-01']), 'country': ['USA'], 'new_followers': [10]})
|
@@ -521,16 +516,10 @@ async def main_test():
|
|
521 |
logging.info(f"\n\n--- Query: {query} ---")
|
522 |
response = await agent.process_query(user_query=query)
|
523 |
logging.info(f"--- Response for '{query}': ---\n{response}\n---------------------------\n")
|
524 |
-
if _REAL_GENAI_LOADED and GEMINI_API_KEY: await asyncio.sleep(0.1)
|
525 |
|
526 |
if __name__ == "__main__":
|
527 |
-
# Note: To test with real API, ensure GEMINI_API_KEY is set in your environment
|
528 |
-
# and 'google-generativeai' is installed.
|
529 |
-
# Otherwise, it will run in dummy mode.
|
530 |
-
|
531 |
-
# Check mode before running test
|
532 |
print(f"Script starting... Real GenAI Library Loaded: {_REAL_GENAI_LOADED}, API Key Set: {bool(GEMINI_API_KEY)}")
|
533 |
-
|
534 |
try:
|
535 |
asyncio.run(main_test())
|
536 |
except RuntimeError as e:
|
|
|
11 |
class _DummyGenAIClientModels: # Represents the dummy model service client
|
12 |
async def generate_content_async(self, model=None, contents=None, generation_config=None, safety_settings=None, stream=False, tools=None, tool_config=None):
|
13 |
print(f"Dummy _DummyGenAI.Client.models.generate_content_async called for model: {model}")
|
|
|
14 |
class DummyPart: text = "# Dummy response from _DummyGenAI async"
|
15 |
class DummyContent: parts = [DummyPart()]
|
16 |
class DummyCandidate: content = DummyContent(); finish_reason = "_DUMMY_STOP"; safety_ratings = []; token_count = 0; index = 0
|
|
|
44 |
return DummyResponse()
|
45 |
|
46 |
class _ActualDummyGenAI: # type: ignore # Renamed the main dummy class
|
47 |
+
Client = _DummyGenAIClient
|
48 |
|
49 |
@staticmethod
|
50 |
def configure(api_key):
|
|
|
57 |
|
58 |
@staticmethod
|
59 |
def embed_content(model, content, task_type, title=None):
|
|
|
60 |
print(f"Dummy _ActualDummyGenAI.embed_content called for model: {model}, task_type: {task_type}, title: {title}")
|
61 |
return {"embedding": [0.1] * 768}
|
62 |
+
|
63 |
+
# Add a dummy 'types' attribute to the dummy genai class
|
64 |
+
class types:
|
65 |
+
@staticmethod
|
66 |
+
def GenerationConfig(**kwargs):
|
67 |
+
print(f"Dummy _ActualDummyGenAI.types.GenerationConfig created with: {kwargs}")
|
68 |
+
return dict(kwargs)
|
69 |
+
|
70 |
+
@staticmethod
|
71 |
+
def SafetySetting(category, threshold):
|
72 |
+
print(f"Dummy _ActualDummyGenAI.types.SafetySetting created: category={category}, threshold={threshold}")
|
73 |
+
return {"category": category, "threshold": threshold}
|
74 |
+
|
75 |
+
class HarmCategory:
|
76 |
+
HARM_CATEGORY_UNSPECIFIED = "HARM_CATEGORY_UNSPECIFIED"; HARM_CATEGORY_HARASSMENT = "HARM_CATEGORY_HARASSMENT"; HARM_CATEGORY_HATE_SPEECH = "HARM_CATEGORY_HATE_SPEECH"; HARM_CATEGORY_SEXUALLY_EXPLICIT = "HARM_CATEGORY_SEXUALLY_EXPLICIT"; HARM_CATEGORY_DANGEROUS_CONTENT = "HARM_CATEGORY_DANGEROUS_CONTENT"
|
77 |
+
class HarmBlockThreshold:
|
78 |
+
BLOCK_NONE = "BLOCK_NONE"; BLOCK_LOW_AND_ABOVE = "BLOCK_LOW_AND_ABOVE"; BLOCK_MEDIUM_AND_ABOVE = "BLOCK_MEDIUM_AND_ABOVE"; BLOCK_ONLY_HIGH = "BLOCK_ONLY_HIGH"
|
79 |
+
class FinishReason:
|
80 |
+
FINISH_REASON_UNSPECIFIED = "UNSPECIFIED"; STOP = "STOP"; MAX_TOKENS = "MAX_TOKENS"; SAFETY = "SAFETY"; RECITATION = "RECITATION"; OTHER = "OTHER"
|
81 |
+
class BlockedReason:
|
82 |
+
BLOCKED_REASON_UNSPECIFIED = "BLOCKED_REASON_UNSPECIFIED"; SAFETY = "SAFETY"; OTHER = "OTHER"
|
83 |
+
# Add other dummy types if needed by the script, e.g. BlockedPromptException
|
84 |
+
class BlockedPromptException(Exception): pass
|
85 |
+
class StopCandidateException(Exception): pass
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
# --- Attempt to import the real library ---
|
89 |
_REAL_GENAI_LOADED = False
|
90 |
+
genai_types = None # Initialize genai_types
|
91 |
+
|
92 |
try:
|
93 |
+
from google import genai # Use the direct import as requested
|
94 |
+
# If 'from google import genai' succeeds, 'genai.types' should be available
|
95 |
+
genai_types = genai.types # Assign the real types
|
96 |
_REAL_GENAI_LOADED = True
|
97 |
+
logging.info("Successfully imported 'google.genai' and accessed 'genai.types'.")
|
98 |
except ImportError:
|
99 |
+
# If 'from google import genai' fails, use the dummy genai and its dummy types
|
100 |
+
genai = _ActualDummyGenAI()
|
101 |
+
genai_types = genai.types # This will now point to _ActualDummyGenAI.types
|
102 |
+
logging.warning("Google AI library ('google.genai') not found. Using dummy implementations for 'genai' and 'genai_types'.")
|
103 |
+
except AttributeError:
|
104 |
+
# This handles the case where 'from google import genai' succeeds, but 'genai.types' is not found
|
105 |
+
# (which would be unusual for the official library but good for robustness)
|
106 |
+
genai = _ActualDummyGenAI() # Fallback to full dummy
|
107 |
+
genai_types = genai.types
|
108 |
+
_REAL_GENAI_LOADED = False # Mark as not fully loaded if types are missing
|
109 |
+
logging.warning("'google.genai' imported, but 'genai.types' not found. Falling back to dummy implementations.")
|
110 |
|
111 |
|
112 |
# --- Configuration ---
|
|
|
119 |
}
|
120 |
|
121 |
# Default safety settings list for Gemini
|
122 |
+
# genai_types is now consistently the real genai.types or the dummy _ActualDummyGenAI.types
|
123 |
try:
|
124 |
DEFAULT_SAFETY_SETTINGS = [
|
125 |
genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
|
|
|
127 |
genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
|
128 |
genai_types.SafetySetting(category=genai_types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold=genai_types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE),
|
129 |
]
|
130 |
+
except Exception as e_safety:
|
131 |
+
logging.warning(f"Could not define DEFAULT_SAFETY_SETTINGS using 'genai_types' (real_loaded: {_REAL_GENAI_LOADED}): {e_safety}. Using placeholder list of dicts.")
|
132 |
DEFAULT_SAFETY_SETTINGS = [
|
133 |
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
134 |
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
|
|
140 |
|
141 |
if GEMINI_API_KEY and _REAL_GENAI_LOADED:
|
142 |
try:
|
143 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
144 |
logging.info(f"Gemini API key configured globally (real genai active: {_REAL_GENAI_LOADED}).")
|
145 |
except Exception as e:
|
146 |
logging.error(f"Failed to configure Gemini API globally: {e}", exc_info=True)
|
147 |
elif not GEMINI_API_KEY and _REAL_GENAI_LOADED:
|
148 |
+
logging.warning("GEMINI_API_KEY environment variable not set, but real library is loaded. API calls will likely fail or use default credentials if available.")
|
149 |
elif not _REAL_GENAI_LOADED:
|
150 |
+
logging.info("Operating in DUMMY mode because 'google.genai' library was not found or 'genai.types' was inaccessible.")
|
151 |
+
if GEMINI_API_KEY:
|
152 |
+
genai.configure(api_key=GEMINI_API_KEY) # Calls dummy configure
|
153 |
|
154 |
|
155 |
# --- RAG Documents Definition (Example) ---
|
|
|
178 |
self.embedding_model_name = embedding_model_name
|
179 |
self.documents_df = documents_df.copy()
|
180 |
self.embeddings_generated = False
|
|
|
181 |
self.real_client_available_for_rag = _REAL_GENAI_LOADED and bool(GEMINI_API_KEY)
|
182 |
|
183 |
if self.real_client_available_for_rag:
|
184 |
try:
|
185 |
self._precompute_embeddings()
|
186 |
self.embeddings_generated = True
|
|
|
187 |
logging.info(f"RAG embeddings precomputed using REAL genai.embed_content for '{self.embedding_model_name}'.")
|
188 |
except Exception as e: logging.error(f"RAG precomputation error with real client: {e}", exc_info=True)
|
189 |
else:
|
190 |
+
logging.warning(f"RAG: Not using real client. Real GenAI loaded: {_REAL_GENAI_LOADED}, API Key set: {bool(GEMINI_API_KEY)}.")
|
191 |
+
if not _REAL_GENAI_LOADED: # If in full dummy mode
|
192 |
+
self._precompute_embeddings() # This will call dummy genai.embed_content via _embed_fn
|
|
|
193 |
|
194 |
def _embed_fn(self, title: str, text: str) -> list[float]:
|
|
|
195 |
try:
|
196 |
content_to_embed = text if text else title
|
197 |
if not content_to_embed: return [0.0] * 768
|
198 |
+
# genai.embed_content will be real or dummy based on import success
|
199 |
return genai.embed_content(model=self.embedding_model_name, content=content_to_embed, task_type="retrieval_document", title=title if title else None)["embedding"]
|
200 |
except Exception as e:
|
201 |
logging.error(f"Error in _embed_fn for '{title}' (real_genai_loaded: {_REAL_GENAI_LOADED}): {e}", exc_info=True)
|
|
|
205 |
if 'Embeddings' not in self.documents_df.columns: self.documents_df['Embeddings'] = pd.Series(dtype='object')
|
206 |
mask = (self.documents_df['Text'].notna() & (self.documents_df['Text'] != '')) | (self.documents_df['Title'].notna() & (self.documents_df['Title'] != ''))
|
207 |
if not mask.any(): logging.warning("No content for RAG embeddings."); return
|
|
|
208 |
self.documents_df.loc[mask, 'Embeddings'] = self.documents_df[mask].apply(lambda row: self._embed_fn(row.get('Title', ''), row.get('Text', '')), axis=1)
|
209 |
logging.info(f"Applied RAG embedding function to {mask.sum()} rows (real_genai_loaded: {_REAL_GENAI_LOADED}).")
|
210 |
|
211 |
|
212 |
def retrieve_relevant_info(self, query_text: str, top_k: int = 2) -> str:
|
213 |
+
if not self.real_client_available_for_rag:
|
214 |
+
if not _REAL_GENAI_LOADED: # If in full dummy mode, make the dummy call for logging
|
215 |
+
genai.embed_content(model=self.embedding_model_name, content=query_text, task_type="retrieval_query")
|
|
|
|
|
216 |
logging.warning(f"Skipping real RAG retrieval. Real GenAI: {_REAL_GENAI_LOADED}, API Key: {bool(GEMINI_API_KEY)}")
|
217 |
return "\n[RAG Context]\nReal RAG retrieval skipped (check logs for mode).\n"
|
218 |
|
|
|
|
|
219 |
try:
|
220 |
+
# genai.embed_content is the real one here
|
221 |
query_embedding = np.array(genai.embed_content(model=self.embedding_model_name, content=query_text, task_type="retrieval_query")["embedding"])
|
222 |
valid_df = self.documents_df.dropna(subset=['Embeddings'])
|
223 |
+
valid_df = valid_df[valid_df['Embeddings'].apply(lambda x: isinstance(x, (list, np.ndarray)) and len(x) > 0 and np.any(x))]
|
224 |
if valid_df.empty: return "\n[RAG Context]\nNo valid document embeddings after filtering.\n"
|
225 |
|
226 |
doc_embeddings = np.stack(valid_df['Embeddings'].apply(np.array).values)
|
|
|
253 |
|
254 |
if _REAL_GENAI_LOADED and GEMINI_API_KEY:
|
255 |
try:
|
256 |
+
self.client = genai.Client() # Real genai.Client
|
257 |
self.model_service = self.client.models
|
258 |
logging.info(f"PandasLLM: Initialized with REAL genai.Client().models for '{self.llm_model_name}'.")
|
259 |
except Exception as e:
|
260 |
logging.error(f"Failed to initialize REAL PandasLLM with genai.Client: {e}", exc_info=True)
|
261 |
+
# If this fails, self.model_service remains None, _call_gemini_api_async might fallback if _REAL_GENAI_LOADED somehow becomes False
|
262 |
else:
|
263 |
+
logging.warning(f"PandasLLM: Not using REAL genai.Client. RealGenAILoaded: {_REAL_GENAI_LOADED}, APIKeySet: {bool(GEMINI_API_KEY)}.")
|
264 |
if not _REAL_GENAI_LOADED: # If import failed, genai is already the dummy
|
265 |
self.client = genai.Client() # Instantiates _ActualDummyGenAI.Client
|
266 |
+
self.model_service = self.client.models
|
267 |
+
logging.info("PandasLLM: Initialized with DUMMY genai.Client().models (real library failed to load).")
|
268 |
|
269 |
|
270 |
async def _call_gemini_api_async(self, prompt_text: str, history: list = None) -> str:
|
|
|
271 |
use_real_service = _REAL_GENAI_LOADED and GEMINI_API_KEY and self.model_service is not None
|
272 |
|
|
|
|
|
|
|
|
|
273 |
active_model_service = self.model_service
|
274 |
+
if not use_real_service and not _REAL_GENAI_LOADED: # Full dummy mode
|
275 |
+
if active_model_service is None: # Should have been set by __init__
|
276 |
+
logging.debug("PandasLLM._call_gemini_api_async: active_model_service is None in dummy mode, using global dummy genai.Client().models.")
|
277 |
+
active_model_service = genai.Client().models # genai is _ActualDummyGenAI here
|
|
|
|
|
|
|
|
|
278 |
|
279 |
if not active_model_service:
|
280 |
+
logging.error(f"PandasLLM: Model service not available (use_real_service: {use_real_service}, _REAL_GENAI_LOADED: {_REAL_GENAI_LOADED}). Cannot call API.")
|
281 |
+
return "# Error: Gemini model service not available for API call."
|
|
|
282 |
|
283 |
gemini_history = []
|
284 |
if history:
|
|
|
296 |
|
297 |
api_generation_config = None
|
298 |
if self.generation_config_dict:
|
299 |
+
try:
|
300 |
api_generation_config = genai_types.GenerationConfig(**self.generation_config_dict)
|
301 |
except Exception as e_cfg:
|
302 |
+
logging.error(f"Error creating GenerationConfig (real_loaded: {_REAL_GENAI_LOADED}): {e_cfg}. Using dict fallback.")
|
303 |
api_generation_config = self.generation_config_dict
|
304 |
|
305 |
logging.info(f"\n--- Calling Gemini API (model: {model_id_for_api}, RealMode: {use_real_service}) ---\nConfig: {api_generation_config}\nSafety: {bool(self.safety_settings_list)}\nContent (last part text): {contents_for_api[-1]['parts'][0]['text'][:100]}...\n")
|
306 |
|
307 |
try:
|
|
|
|
|
308 |
response = await active_model_service.generate_content_async(
|
309 |
model=model_id_for_api,
|
310 |
contents=contents_for_api,
|
|
|
322 |
llm_output = ""
|
323 |
if hasattr(response, 'text') and isinstance(response.text, str):
|
324 |
llm_output = response.text
|
325 |
+
elif response.candidates:
|
326 |
candidate = response.candidates[0]
|
327 |
if candidate.content and candidate.content.parts:
|
328 |
llm_output = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
|
329 |
|
330 |
if not llm_output and candidate.finish_reason:
|
331 |
finish_reason_val = candidate.finish_reason
|
|
|
332 |
finish_reason_str = str(finish_reason_val.name if hasattr(finish_reason_val, 'name') and not isinstance(finish_reason_val, str) else finish_reason_val)
|
333 |
|
|
|
334 |
if finish_reason_str == "SAFETY":
|
335 |
safety_messages = []
|
336 |
if hasattr(candidate, 'safety_ratings') and candidate.safety_ratings:
|
|
|
349 |
|
350 |
return llm_output
|
351 |
|
352 |
+
# Use genai_types for exceptions if real library is loaded
|
353 |
+
except (genai_types.BlockedPromptException if _REAL_GENAI_LOADED and hasattr(genai_types, 'BlockedPromptException') else Exception) as bpe:
|
354 |
+
if _REAL_GENAI_LOADED and type(bpe).__name__ == 'BlockedPromptException': # Check specific type if real
|
355 |
+
logging.error(f"Prompt blocked (BlockedPromptException): {bpe}", exc_info=True)
|
356 |
+
return f"# Error: Prompt blocked. Details: {bpe}"
|
357 |
+
# Fallthrough for general exception if not the specific type or in dummy mode
|
358 |
+
pass # Let the general Exception handler catch it or re-raise if needed
|
359 |
+
|
360 |
+
except (genai_types.StopCandidateException if _REAL_GENAI_LOADED and hasattr(genai_types, 'StopCandidateException') else Exception) as sce:
|
361 |
+
if _REAL_GENAI_LOADED and type(sce).__name__ == 'StopCandidateException': # Check specific type if real
|
362 |
+
logging.error(f"Candidate stopped (StopCandidateException): {sce}", exc_info=True)
|
363 |
+
return f"# Error: Content generation stopped. Details: {sce}"
|
364 |
+
pass # Fallthrough
|
365 |
+
|
366 |
except Exception as e:
|
367 |
logging.error(f"Error calling Gemini API (RealMode: {use_real_service}): {e}", exc_info=True)
|
368 |
return f"# Error during API call: {type(e).__name__} - {str(e)[:100]}."
|
|
|
385 |
except IndexError: code_to_execute = ""
|
386 |
|
387 |
if llm_response_text.startswith("# Error:") or not code_to_execute.strip():
|
|
|
388 |
logging.warning(f"LLM response is an error, or no valid Python code block found for sandbox. Raw LLM response: {llm_response_text[:200]}")
|
389 |
if not code_to_execute.strip() and not llm_response_text.startswith("# Error:"):
|
390 |
+
if "```" not in llm_response_text and len(llm_response_text.strip()) > 0:
|
|
|
391 |
logging.info(f"LLM produced text output instead of Python code in sandbox mode. Passing through: {llm_response_text[:200]}")
|
392 |
return llm_response_text
|
393 |
|
|
|
456 |
prompt += "\n--- AVAILABLE DATA AND SCHEMAS ---\n"
|
457 |
prompt += self.schemas_representation if self.schemas_representation.strip() != "No DataFrames provided." else "No DataFrames loaded.\n"
|
458 |
|
|
|
459 |
rag_context = self.rag_system.retrieve_relevant_info(user_query)
|
460 |
meaningful_rag_keywords = ["Error", "No valid", "No relevant", "Cannot retrieve", "not available", "not generated", "Skipped"]
|
461 |
is_meaningful_rag = bool(rag_context.strip()) and not any(keyword in rag_context for keyword in meaningful_rag_keywords)
|
|
|
500 |
|
501 |
# --- Example Usage (Conceptual) ---
|
502 |
async def main_test():
|
|
|
503 |
logging.info(f"Starting main_test for EmployerBrandingAgent (Real GenAI Loaded: {_REAL_GENAI_LOADED}, API Key Set: {bool(GEMINI_API_KEY)})")
|
504 |
|
505 |
df_follower_stats = pd.DataFrame({'date': pd.to_datetime(['2023-01-01']), 'country': ['USA'], 'new_followers': [10]})
|
|
|
516 |
logging.info(f"\n\n--- Query: {query} ---")
|
517 |
response = await agent.process_query(user_query=query)
|
518 |
logging.info(f"--- Response for '{query}': ---\n{response}\n---------------------------\n")
|
519 |
+
if _REAL_GENAI_LOADED and GEMINI_API_KEY: await asyncio.sleep(0.1)
|
520 |
|
521 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
522 |
print(f"Script starting... Real GenAI Library Loaded: {_REAL_GENAI_LOADED}, API Key Set: {bool(GEMINI_API_KEY)}")
|
|
|
523 |
try:
|
524 |
asyncio.run(main_test())
|
525 |
except RuntimeError as e:
|