Spaces:
Running
Running
Update eb_agent_module.py
Browse files- eb_agent_module.py +36 -48
eb_agent_module.py
CHANGED
@@ -8,10 +8,8 @@ import textwrap
|
|
8 |
from datetime import datetime
|
9 |
from typing import Dict, List, Optional, Union, Any
|
10 |
import traceback
|
11 |
-
|
12 |
-
from
|
13 |
-
from pandasai.responses.response_parser import ResponseParser
|
14 |
-
from pandasai.middlewares.base import BaseMiddleware
|
15 |
|
16 |
# Configure logging
|
17 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
@@ -362,56 +360,38 @@ class EnhancedEmployerBrandingAgent:
|
|
362 |
logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
|
363 |
|
364 |
def _initialize_pandas_agent(self):
|
365 |
-
"""Initialize PandasAI
|
366 |
if not self.all_dataframes or not GEMINI_API_KEY:
|
367 |
logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
|
368 |
return
|
369 |
-
|
370 |
try:
|
371 |
-
#
|
372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
for name, df in self.all_dataframes.items():
|
374 |
-
#
|
375 |
df_description = self._generate_dataframe_description(name, df)
|
376 |
-
|
377 |
-
|
378 |
-
name=name,
|
379 |
-
description=df_description
|
380 |
-
)
|
381 |
-
smart_dfs.append(smart_df)
|
382 |
-
|
383 |
-
# Configure PandasAI with Gemini
|
384 |
-
pandas_llm = GoogleGemini(
|
385 |
-
api_token=GEMINI_API_KEY,
|
386 |
-
model=self.llm_model_name,
|
387 |
-
temperature=0.7,
|
388 |
-
top_p=0.95,
|
389 |
-
top_k=40,
|
390 |
-
max_output_tokens=4096
|
391 |
-
)
|
392 |
-
|
393 |
-
# Create agent with enhanced configuration
|
394 |
-
self.pandas_agent = Agent(
|
395 |
-
dfs=smart_dfs,
|
396 |
-
config={
|
397 |
-
"llm": pandas_llm,
|
398 |
-
"verbose": True,
|
399 |
-
"enable_cache": True,
|
400 |
-
"save_charts": True,
|
401 |
-
"save_charts_path": "charts/",
|
402 |
-
"custom_whitelisted_dependencies": ["matplotlib", "seaborn", "plotly"],
|
403 |
-
"middlewares": [EmployerBrandingMiddleware()],
|
404 |
-
"response_parser": ResponseParser,
|
405 |
-
"max_retries": 3,
|
406 |
-
"conversational": True
|
407 |
-
}
|
408 |
-
)
|
409 |
|
410 |
-
|
|
|
411 |
|
412 |
except Exception as e:
|
413 |
-
logging.error(f"Failed to initialize PandasAI
|
414 |
self.pandas_agent = None
|
|
|
415 |
|
416 |
def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
|
417 |
"""Generate a descriptive summary for PandasAI to better understand the data"""
|
@@ -551,13 +531,21 @@ class EnhancedEmployerBrandingAgent:
|
|
551 |
|
552 |
async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
|
553 |
"""Generate response using PandasAI for data queries"""
|
554 |
-
if not self.pandas_agent:
|
555 |
-
return "Data analysis not available - PandasAI
|
556 |
|
557 |
try:
|
558 |
-
# Use PandasAI to analyze the data
|
559 |
logging.info(f"Processing data query with PandasAI: {query[:100]}...")
|
560 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
561 |
|
562 |
# Check if response is meaningful
|
563 |
if pandas_response and str(pandas_response).strip():
|
|
|
8 |
from datetime import datetime
|
9 |
from typing import Dict, List, Optional, Union, Any
|
10 |
import traceback
|
11 |
+
import pandasai as pai
|
12 |
+
from pandasai_litellm import LiteLLM
|
|
|
|
|
13 |
|
14 |
# Configure logging
|
15 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
|
|
360 |
logging.info(f"EnhancedEmployerBrandingAgent initialized. LLM: {self.llm_model_name}. RAG docs: {len(self.rag_system.documents_df)}. DataFrames: {list(self.all_dataframes.keys())}")
|
361 |
|
362 |
def _initialize_pandas_agent(self):
|
363 |
+
"""Initialize PandasAI with enhanced configuration"""
|
364 |
if not self.all_dataframes or not GEMINI_API_KEY:
|
365 |
logging.warning("Cannot initialize PandasAI agent: missing dataframes or API key")
|
366 |
return
|
367 |
+
|
368 |
try:
|
369 |
+
# Configure LiteLLM with Gemini
|
370 |
+
llm = LiteLLM(model="gemini-2.5-flash-preview-05-20")
|
371 |
+
|
372 |
+
# Set PandasAI configuration
|
373 |
+
pai.config.set({
|
374 |
+
"llm": llm,
|
375 |
+
"temperature": 0.7,
|
376 |
+
"verbose": True,
|
377 |
+
"enable_cache": True
|
378 |
+
})
|
379 |
+
|
380 |
+
# Store dataframes for chat queries (we'll use them directly)
|
381 |
+
self.pandas_dfs = {}
|
382 |
for name, df in self.all_dataframes.items():
|
383 |
+
# Convert to PandasAI DataFrame with description
|
384 |
df_description = self._generate_dataframe_description(name, df)
|
385 |
+
pandas_df = pai.DataFrame(df, description=df_description)
|
386 |
+
self.pandas_dfs[name] = pandas_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
|
388 |
+
self.pandas_agent = True # Flag to indicate PandasAI is ready
|
389 |
+
logging.info(f"PandasAI initialized successfully with {len(self.pandas_dfs)} DataFrames")
|
390 |
|
391 |
except Exception as e:
|
392 |
+
logging.error(f"Failed to initialize PandasAI: {e}", exc_info=True)
|
393 |
self.pandas_agent = None
|
394 |
+
self.pandas_dfs = {}
|
395 |
|
396 |
def _generate_dataframe_description(self, name: str, df: pd.DataFrame) -> str:
|
397 |
"""Generate a descriptive summary for PandasAI to better understand the data"""
|
|
|
531 |
|
532 |
async def _generate_pandas_response(self, query: str) -> tuple[str, bool]:
|
533 |
"""Generate response using PandasAI for data queries"""
|
534 |
+
if not self.pandas_agent or not hasattr(self, 'pandas_dfs'):
|
535 |
+
return "Data analysis not available - PandasAI not initialized.", False
|
536 |
|
537 |
try:
|
|
|
538 |
logging.info(f"Processing data query with PandasAI: {query[:100]}...")
|
539 |
+
|
540 |
+
# Use the first available dataframe for single-df queries
|
541 |
+
# For multi-df queries, you'd use pai.chat(query, df1, df2, ...)
|
542 |
+
if len(self.pandas_dfs) == 1:
|
543 |
+
df = list(self.pandas_dfs.values())[0]
|
544 |
+
pandas_response = df.chat(query)
|
545 |
+
else:
|
546 |
+
# For multiple dataframes, use pai.chat with all dfs
|
547 |
+
dfs = list(self.pandas_dfs.values())
|
548 |
+
pandas_response = pai.chat(query, *dfs)
|
549 |
|
550 |
# Check if response is meaningful
|
551 |
if pandas_response and str(pandas_response).strip():
|