|
|
|
import os |
|
import json |
|
import numpy as np |
|
import torch |
|
import torch.nn as nn |
|
from openai import AzureOpenAI |
|
from stable_baselines3 import PPO |
|
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor |
|
import gymnasium as gym |
|
from gymnasium import spaces |
|
from dataclasses import dataclass |
|
from typing import List, Dict, Any |
|
import argparse |
|
import logging |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
|
handlers=[logging.StreamHandler()] |
|
) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
if torch.cuda.is_available(): |
|
device = torch.device("cuda") |
|
logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}") |
|
else: |
|
device = torch.device("cpu") |
|
logger.info("GPU not available, using CPU for inference") |
|
|
|
|
|
|
|
@dataclass |
|
class ConversationState: |
|
conversation_history: List[Dict[str, str]] |
|
embedding: np.ndarray |
|
conversation_metrics: Dict[str, float] |
|
turn_number: int |
|
conversion_probabilities: List[float] |
|
|
|
@property |
|
def state_vector(self) -> np.ndarray: |
|
metric_values = np.array(list(self.conversation_metrics.values()), dtype=np.float32) |
|
turn_info = np.array([self.turn_number], dtype=np.float32) |
|
padded_probs = np.zeros(10, dtype=np.float32) |
|
probs_to_pad = self.conversion_probabilities[-10:] |
|
padded_probs[:len(probs_to_pad)] = probs_to_pad |
|
|
|
return np.concatenate([ |
|
self.embedding, |
|
metric_values, |
|
turn_info, |
|
padded_probs |
|
]).astype(np.float32) |
|
|
|
class CustomLN(BaseFeaturesExtractor): |
|
def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 128): |
|
super().__init__(observation_space, features_dim) |
|
n_input_channels = observation_space.shape[0] |
|
self.linear_network = nn.Sequential( |
|
nn.Linear(n_input_channels, 512), |
|
nn.ReLU(), |
|
nn.Linear(512, 256), |
|
nn.ReLU(), |
|
nn.Linear(256, features_dim), |
|
nn.ReLU(), |
|
).to(device) |
|
|
|
def forward(self, observations: torch.Tensor) -> torch.Tensor: |
|
return self.linear_network(observations) |
|
|
|
|
|
|
|
def get_azure_openai_embedding( |
|
text: str, |
|
client: AzureOpenAI, |
|
deployment_name: str |
|
) -> np.ndarray: |
|
"""Gets embedding from Azure OpenAI for the given text.""" |
|
try: |
|
response = client.embeddings.create( |
|
input=text, |
|
model=deployment_name |
|
) |
|
embedding_vector = np.array(response.data[0].embedding, dtype=np.float32) |
|
logger.debug(f"Received embedding from Azure. Shape: {embedding_vector.shape}") |
|
return embedding_vector |
|
except Exception as e: |
|
logger.error(f"Error getting embedding from Azure OpenAI: {e}") |
|
|
|
|
|
logger.warning("Falling back to zero embedding. This will impact prediction quality.") |
|
|
|
|
|
return np.zeros(3072, dtype=np.float32) |
|
|
|
def process_raw_embedding( |
|
raw_embedding: np.ndarray, |
|
turn: int, |
|
max_turns_for_scaling: int, |
|
target_model_embedding_dim: int, |
|
use_miniembeddings: bool |
|
) -> np.ndarray: |
|
""" |
|
Scales and potentially reduces/pads the raw embedding (from Azure) |
|
to match the model's expected input dimension and characteristics. |
|
""" |
|
dim_of_raw_embedding = len(raw_embedding) |
|
logger.debug(f"Processing raw_embedding. Dim: {dim_of_raw_embedding}, Target model dim: {target_model_embedding_dim}, Use mini: {use_miniembeddings}") |
|
|
|
|
|
|
|
progress = min(1.0, turn / max_turns_for_scaling) |
|
scaled_embedding = raw_embedding * (0.6 + 0.4 * progress) |
|
|
|
|
|
if use_miniembeddings and dim_of_raw_embedding > target_model_embedding_dim: |
|
logger.debug(f"Applying mini-embedding reduction from {dim_of_raw_embedding} to {target_model_embedding_dim}") |
|
if target_model_embedding_dim <= 0: |
|
logger.error("Target model embedding dimension is <=0. Cannot pool.") |
|
return np.zeros(1, dtype=np.float32) |
|
|
|
pool_factor = dim_of_raw_embedding // target_model_embedding_dim |
|
if pool_factor == 0: pool_factor = 1 |
|
|
|
num_elements_to_pool = pool_factor * target_model_embedding_dim |
|
|
|
|
|
|
|
|
|
elements_for_pooling = scaled_embedding[:num_elements_to_pool] if num_elements_to_pool <= dim_of_raw_embedding else scaled_embedding |
|
|
|
if len(elements_for_pooling) < target_model_embedding_dim : |
|
logger.warning(f"Not enough elements ({len(elements_for_pooling)}) to pool into target_dim ({target_model_embedding_dim}). Padding result.") |
|
reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32) |
|
fill_len = min(len(elements_for_pooling), target_model_embedding_dim) |
|
reduced_embedding[:fill_len] = elements_for_pooling[:fill_len] |
|
else: |
|
try: |
|
|
|
reshapable_length = (len(elements_for_pooling) // pool_factor) * pool_factor |
|
reshaped_for_pooling = elements_for_pooling[:reshapable_length].reshape(-1, pool_factor) |
|
|
|
|
|
if reshaped_for_pooling.shape[0] > target_model_embedding_dim: |
|
reshaped_for_pooling = reshaped_for_pooling[:target_model_embedding_dim, :] |
|
elif reshaped_for_pooling.shape[0] < target_model_embedding_dim: |
|
|
|
logger.warning(f"Pooling resulted in fewer dimensions ({reshaped_for_pooling.shape[0]}) than target ({target_model_embedding_dim}). Will pad.") |
|
temp_reduced = np.mean(reshaped_for_pooling, axis=1) |
|
reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32) |
|
reduced_embedding[:len(temp_reduced)] = temp_reduced |
|
else: |
|
reduced_embedding = np.mean(reshaped_for_pooling, axis=1) |
|
|
|
except ValueError as e: |
|
logger.error(f"Reshape for pooling failed: {e}. Lengths: elements_for_pooling={len(elements_for_pooling)}, pool_factor={pool_factor}. Falling back to simple truncation/padding.") |
|
if dim_of_raw_embedding > target_model_embedding_dim: |
|
reduced_embedding = scaled_embedding[:target_model_embedding_dim] |
|
else: |
|
reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32) |
|
reduced_embedding[:dim_of_raw_embedding] = scaled_embedding |
|
|
|
processed_embedding = reduced_embedding |
|
|
|
elif dim_of_raw_embedding == target_model_embedding_dim: |
|
processed_embedding = scaled_embedding |
|
elif dim_of_raw_embedding > target_model_embedding_dim: |
|
logger.debug(f"Truncating embedding from {dim_of_raw_embedding} to {target_model_embedding_dim}") |
|
processed_embedding = scaled_embedding[:target_model_embedding_dim] |
|
else: |
|
logger.debug(f"Padding embedding from {dim_of_raw_embedding} to {target_model_embedding_dim}") |
|
processed_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32) |
|
processed_embedding[:dim_of_raw_embedding] = scaled_embedding |
|
|
|
if len(processed_embedding) != target_model_embedding_dim: |
|
logger.warning(f"Dimension mismatch after processing. Expected {target_model_embedding_dim}, got {len(processed_embedding)}. Adjusting...") |
|
final_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32) |
|
fill_len = min(len(processed_embedding), target_model_embedding_dim) |
|
final_embedding[:fill_len] = processed_embedding[:fill_len] |
|
return final_embedding.astype(np.float32) |
|
|
|
return processed_embedding.astype(np.float32) |
|
|
|
|
|
|
|
|
|
def predict_conversation_trajectory( |
|
model: PPO, |
|
azure_openai_client: AzureOpenAI, |
|
azure_deployment_name: str, |
|
conversation_messages: List[Dict[str, str]], |
|
initial_metrics: Dict[str, float], |
|
model_expected_embedding_dim: int, |
|
use_miniembeddings_on_azure_emb: bool, |
|
max_conversation_turns_scaling: int = 20 |
|
): |
|
logger.info(f"Starting prediction. Model expects embedding_dim: {model_expected_embedding_dim}. use_mini_on_azure: {use_miniembeddings_on_azure_emb}") |
|
|
|
current_conversation_history_text = [] |
|
current_conversation_history_struct = [] |
|
agent_predicted_probabilities = [] |
|
output_predictions = [] |
|
|
|
num_metrics = 5 |
|
expected_obs_dim = model_expected_embedding_dim + num_metrics + 1 + 10 |
|
if model.observation_space.shape[0] != expected_obs_dim: |
|
logger.error(f"CRITICAL: Model observation space dimension mismatch! Model expects total obs_dim {model.observation_space.shape[0]}, " |
|
f"but calculations suggest {expected_obs_dim} based on model_expected_embedding_dim={model_expected_embedding_dim}. " |
|
f"Ensure --embedding_dim matches the dimension used for the embedding component during training.") |
|
inferred_emb_dim = model.observation_space.shape[0] - num_metrics - 1 - 10 |
|
logger.error(f"The model might have been trained with an embedding component of dimension: {inferred_emb_dim}") |
|
raise ValueError("Observation space dimension mismatch. Check --embedding_dim.") |
|
|
|
for turn_idx, message_info in enumerate(conversation_messages): |
|
speaker = message_info.get("speaker", "unknown") |
|
message = message_info.get("message", "") |
|
|
|
current_conversation_history_struct.append(message_info) |
|
current_conversation_history_text.append(f"{speaker}: {message}") |
|
|
|
text_for_embedding = "\n".join(current_conversation_history_text) |
|
if not text_for_embedding.strip(): |
|
logger.warning("Empty text for embedding at turn_idx %s, using zero vector from Azure (or fallback).", turn_idx) |
|
|
|
|
|
raw_turn_embedding = get_azure_openai_embedding(" ", azure_openai_client, azure_deployment_name) |
|
if np.all(raw_turn_embedding == 0): |
|
logger.warning("Fallback zero embedding used for empty text. Assuming 3072 dim if Azure call failed internally.") |
|
raw_turn_embedding = np.zeros(3072, dtype=np.float32) |
|
else: |
|
raw_turn_embedding = get_azure_openai_embedding( |
|
text_for_embedding, |
|
azure_openai_client, |
|
azure_deployment_name |
|
) |
|
|
|
final_turn_embedding = process_raw_embedding( |
|
raw_turn_embedding, |
|
turn_idx, |
|
max_conversation_turns_scaling, |
|
model_expected_embedding_dim, |
|
use_miniembeddings_on_azure_emb |
|
) |
|
|
|
if final_turn_embedding.shape[0] != model_expected_embedding_dim: |
|
logger.error(f"Embedding dimension mismatch after processing. Expected {model_expected_embedding_dim}, got {final_turn_embedding.shape[0]}. Critical error.") |
|
raise ValueError("Embedding dimension error after processing.") |
|
|
|
metrics = initial_metrics.copy() |
|
metrics['conversation_length'] = len(current_conversation_history_struct) |
|
metrics['progress'] = min(1.0, turn_idx / max_conversation_turns_scaling) |
|
if 'outcome' not in metrics: metrics['outcome'] = 0.5 |
|
|
|
state = ConversationState( |
|
conversation_history=current_conversation_history_struct, |
|
embedding=final_turn_embedding, |
|
conversation_metrics=metrics, |
|
turn_number=turn_idx, |
|
conversion_probabilities=agent_predicted_probabilities |
|
) |
|
|
|
observation_vector = state.state_vector |
|
|
|
if observation_vector.shape[0] != model.observation_space.shape[0]: |
|
logger.error(f"Observation vector dimension mismatch before prediction! Expected {model.observation_space.shape[0]}, Got {observation_vector.shape[0]}") |
|
raise ValueError("Observation vector dimension mismatch.") |
|
|
|
action_probs, _ = model.predict(observation_vector, deterministic=True) |
|
predicted_prob_this_turn = float(action_probs[0]) |
|
|
|
output_predictions.append({ |
|
"turn": turn_idx + 1, |
|
"speaker": speaker, |
|
"message": message, |
|
"predicted_conversion_probability": predicted_prob_this_turn |
|
}) |
|
agent_predicted_probabilities.append(predicted_prob_this_turn) |
|
|
|
return output_predictions |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser(description="Run inference with Azure OpenAI embeddings.") |
|
parser.add_argument("--model_path", type=str, required=True, help="Path to the trained PPO model (.zip file).") |
|
parser.add_argument("--conversation_json", type=str, required=True, |
|
help="JSON string or path to JSON file for the conversation.") |
|
|
|
parser.add_argument("--azure_api_key", type=str, required=True, help="Azure OpenAI API Key.") |
|
parser.add_argument("--azure_endpoint", type=str, required=True, help="Azure OpenAI Endpoint URL.") |
|
parser.add_argument("--azure_deployment_name", type=str, required=True, help="Azure OpenAI embedding deployment name (e.g., for text-embedding-3-large).") |
|
parser.add_argument("--azure_api_version", type=str, default="2023-12-01-preview", help="Azure OpenAI API Version (e.g., 2023-05-15 or 2023-12-01-preview for newer models).") |
|
|
|
parser.add_argument("--embedding_dim", type=int, required=True, |
|
help="The dimension of the embedding vector component EXPECTED BY THE PPO MODEL's observation space.") |
|
parser.add_argument("--use_miniembeddings", action="store_true", |
|
help="Flag if the Azure OpenAI embedding should be reduced (if larger than --embedding_dim) using the mini-embedding logic.") |
|
parser.add_argument("--max_turns_scaling", type=int, default=20, |
|
help="The 'max_turns' value used for progress scaling (default: 20).") |
|
args = parser.parse_args() |
|
|
|
try: |
|
azure_client = AzureOpenAI( |
|
api_key=args.azure_api_key, |
|
azure_endpoint=args.azure_endpoint, |
|
api_version=args.azure_api_version |
|
) |
|
logger.info("Testing Azure OpenAI connection by embedding a short string...") |
|
test_embedding = get_azure_openai_embedding("test connection", azure_client, args.azure_deployment_name) |
|
logger.info(f"Azure OpenAI connection successful. Received test embedding of shape: {test_embedding.shape}") |
|
|
|
|
|
|
|
except Exception as e: |
|
logger.error(f"Failed to initialize or test Azure OpenAI client: {e}") |
|
return |
|
|
|
try: |
|
if os.path.exists(args.conversation_json): |
|
with open(args.conversation_json, 'r') as f: |
|
sample_conversation = json.load(f) |
|
else: |
|
sample_conversation = json.loads(args.conversation_json) |
|
if not isinstance(sample_conversation, list): |
|
raise ValueError("Conversation JSON must be a list of message objects.") |
|
except Exception as e: |
|
logger.error(f"Error loading conversation JSON: {e}") |
|
return |
|
|
|
initial_metrics = { |
|
'customer_engagement': 0.5, 'sales_effectiveness': 0.5, |
|
'conversation_length': 0, 'outcome': 0.5, 'progress': 0.0 |
|
} |
|
|
|
try: |
|
model = PPO.load(args.model_path, device=device) |
|
logger.info(f"Model loaded from {args.model_path}") |
|
logger.info(f"Model's observation space shape: {model.observation_space.shape}") |
|
except Exception as e: |
|
logger.error(f"Error loading PPO model: {e}") |
|
return |
|
|
|
predictions = predict_conversation_trajectory( |
|
model, |
|
azure_client, |
|
args.azure_deployment_name, |
|
sample_conversation, |
|
initial_metrics, |
|
model_expected_embedding_dim=args.embedding_dim, |
|
use_miniembeddings_on_azure_emb=args.use_miniembeddings, |
|
max_conversation_turns_scaling=args.max_turns_scaling |
|
) |
|
|
|
print("\n--- Conversation Predictions (with Azure OpenAI Embeddings) ---") |
|
for pred_info in predictions: |
|
print(f"Turn {pred_info['turn']} ({pred_info['speaker']}): \"{pred_info['message'][:60]}...\" -> Probability: {pred_info['predicted_conversion_probability']:.4f}") |
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main() |