File size: 19,200 Bytes
a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 fc3428f a730190 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 |
import os
import json
import numpy as np
import torch
import torch.nn as nn
from openai import AzureOpenAI # Use the new AzureOpenAI client
from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import gymnasium as gym
from gymnasium import spaces
from dataclasses import dataclass
from typing import List, Dict, Any
import argparse
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)
# GPU Setup
if torch.cuda.is_available():
device = torch.device("cuda")
logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
device = torch.device("cpu")
logger.info("GPU not available, using CPU for inference")
# --- Replicated/Necessary Classes from train.py ---
@dataclass
class ConversationState:
conversation_history: List[Dict[str, str]]
embedding: np.ndarray
conversation_metrics: Dict[str, float]
turn_number: int
conversion_probabilities: List[float]
@property
def state_vector(self) -> np.ndarray:
metric_values = np.array(list(self.conversation_metrics.values()), dtype=np.float32)
turn_info = np.array([self.turn_number], dtype=np.float32)
padded_probs = np.zeros(10, dtype=np.float32)
probs_to_pad = self.conversion_probabilities[-10:]
padded_probs[:len(probs_to_pad)] = probs_to_pad
return np.concatenate([
self.embedding,
metric_values,
turn_info,
padded_probs
]).astype(np.float32)
class CustomLN(BaseFeaturesExtractor):
def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 128):
super().__init__(observation_space, features_dim)
n_input_channels = observation_space.shape[0]
self.linear_network = nn.Sequential(
nn.Linear(n_input_channels, 512),
nn.ReLU(),
nn.Linear(512, 256),
nn.ReLU(),
nn.Linear(256, features_dim),
nn.ReLU(),
).to(device)
def forward(self, observations: torch.Tensor) -> torch.Tensor:
return self.linear_network(observations)
# --- Azure OpenAI Embedding Function ---
def get_azure_openai_embedding(
text: str,
client: AzureOpenAI,
deployment_name: str
) -> np.ndarray:
"""Gets embedding from Azure OpenAI for the given text."""
try:
response = client.embeddings.create(
input=text,
model=deployment_name # For Azure, this is the deployment name
)
embedding_vector = np.array(response.data[0].embedding, dtype=np.float32)
logger.debug(f"Received embedding from Azure. Shape: {embedding_vector.shape}")
return embedding_vector
except Exception as e:
logger.error(f"Error getting embedding from Azure OpenAI: {e}")
# Fallback to a zero vector of a common dimension, or raise error
# For text-embedding-3-large, dimension is 3072. For ada-002 it's 1536.
logger.warning("Falling back to zero embedding. This will impact prediction quality.")
# It's better if the calling function determines the expected fallback dimension
# based on the actual deployment model, but for simplicity here, we'll assume 3072 if error.
return np.zeros(3072, dtype=np.float32) # Default to text-embedding-3-large dim
def process_raw_embedding(
raw_embedding: np.ndarray,
turn: int,
max_turns_for_scaling: int,
target_model_embedding_dim: int, # The dimension model's observation space expects
use_miniembeddings: bool
) -> np.ndarray:
"""
Scales and potentially reduces/pads the raw embedding (from Azure)
to match the model's expected input dimension and characteristics.
"""
dim_of_raw_embedding = len(raw_embedding)
logger.debug(f"Processing raw_embedding. Dim: {dim_of_raw_embedding}, Target model dim: {target_model_embedding_dim}, Use mini: {use_miniembeddings}")
# 1. Apply turn-based dynamic scaling (mimicking training)
progress = min(1.0, turn / max_turns_for_scaling)
scaled_embedding = raw_embedding * (0.6 + 0.4 * progress)
# 2. Adjust dimension to target_model_embedding_dim
if use_miniembeddings and dim_of_raw_embedding > target_model_embedding_dim:
logger.debug(f"Applying mini-embedding reduction from {dim_of_raw_embedding} to {target_model_embedding_dim}")
if target_model_embedding_dim <= 0:
logger.error("Target model embedding dimension is <=0. Cannot pool.")
return np.zeros(1, dtype=np.float32) # Return a minimal valid array
pool_factor = dim_of_raw_embedding // target_model_embedding_dim
if pool_factor == 0: pool_factor = 1
num_elements_to_pool = pool_factor * target_model_embedding_dim
# If not enough elements for perfect pooling (e.g. raw_dim=5, target_dim=3 -> pool_factor=1, num_elements_to_pool=3)
# or too many (e.g. raw_dim=5, target_dim=2 -> pool_factor=2, num_elements_to_pool=4)
# We'll pool from the available part of scaled_embedding
elements_for_pooling = scaled_embedding[:num_elements_to_pool] if num_elements_to_pool <= dim_of_raw_embedding else scaled_embedding
if len(elements_for_pooling) < target_model_embedding_dim : # Not enough elements even to form the target dim vector
logger.warning(f"Not enough elements ({len(elements_for_pooling)}) to pool into target_dim ({target_model_embedding_dim}). Padding result.")
reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
fill_len = min(len(elements_for_pooling), target_model_embedding_dim)
reduced_embedding[:fill_len] = elements_for_pooling[:fill_len] # Simplified: take first elements if pooling fails
else:
try:
# Adjust elements_for_pooling to be perfectly divisible if necessary
reshapable_length = (len(elements_for_pooling) // pool_factor) * pool_factor
reshaped_for_pooling = elements_for_pooling[:reshapable_length].reshape(-1, pool_factor) # -1 infers target_model_embedding_dim or similar
# Ensure the first dimension of reshaped matches target_model_embedding_dim
if reshaped_for_pooling.shape[0] > target_model_embedding_dim:
reshaped_for_pooling = reshaped_for_pooling[:target_model_embedding_dim, :]
elif reshaped_for_pooling.shape[0] < target_model_embedding_dim:
# This case should ideally be handled by padding the result
logger.warning(f"Pooling resulted in fewer dimensions ({reshaped_for_pooling.shape[0]}) than target ({target_model_embedding_dim}). Will pad.")
temp_reduced = np.mean(reshaped_for_pooling, axis=1)
reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
reduced_embedding[:len(temp_reduced)] = temp_reduced
else:
reduced_embedding = np.mean(reshaped_for_pooling, axis=1)
except ValueError as e:
logger.error(f"Reshape for pooling failed: {e}. Lengths: elements_for_pooling={len(elements_for_pooling)}, pool_factor={pool_factor}. Falling back to simple truncation/padding.")
if dim_of_raw_embedding > target_model_embedding_dim:
reduced_embedding = scaled_embedding[:target_model_embedding_dim]
else:
reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
reduced_embedding[:dim_of_raw_embedding] = scaled_embedding
processed_embedding = reduced_embedding
elif dim_of_raw_embedding == target_model_embedding_dim:
processed_embedding = scaled_embedding
elif dim_of_raw_embedding > target_model_embedding_dim:
logger.debug(f"Truncating embedding from {dim_of_raw_embedding} to {target_model_embedding_dim}")
processed_embedding = scaled_embedding[:target_model_embedding_dim]
else:
logger.debug(f"Padding embedding from {dim_of_raw_embedding} to {target_model_embedding_dim}")
processed_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
processed_embedding[:dim_of_raw_embedding] = scaled_embedding
if len(processed_embedding) != target_model_embedding_dim:
logger.warning(f"Dimension mismatch after processing. Expected {target_model_embedding_dim}, got {len(processed_embedding)}. Adjusting...")
final_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
fill_len = min(len(processed_embedding), target_model_embedding_dim)
final_embedding[:fill_len] = processed_embedding[:fill_len]
return final_embedding.astype(np.float32)
return processed_embedding.astype(np.float32)
# --- Main Prediction Logic ---
def predict_conversation_trajectory(
model: PPO,
azure_openai_client: AzureOpenAI,
azure_deployment_name: str,
conversation_messages: List[Dict[str, str]],
initial_metrics: Dict[str, float],
model_expected_embedding_dim: int,
use_miniembeddings_on_azure_emb: bool,
max_conversation_turns_scaling: int = 20
):
logger.info(f"Starting prediction. Model expects embedding_dim: {model_expected_embedding_dim}. use_mini_on_azure: {use_miniembeddings_on_azure_emb}")
current_conversation_history_text = []
current_conversation_history_struct = []
agent_predicted_probabilities = []
output_predictions = []
num_metrics = 5
expected_obs_dim = model_expected_embedding_dim + num_metrics + 1 + 10
if model.observation_space.shape[0] != expected_obs_dim:
logger.error(f"CRITICAL: Model observation space dimension mismatch! Model expects total obs_dim {model.observation_space.shape[0]}, "
f"but calculations suggest {expected_obs_dim} based on model_expected_embedding_dim={model_expected_embedding_dim}. "
f"Ensure --embedding_dim matches the dimension used for the embedding component during training.")
inferred_emb_dim = model.observation_space.shape[0] - num_metrics - 1 - 10
logger.error(f"The model might have been trained with an embedding component of dimension: {inferred_emb_dim}")
raise ValueError("Observation space dimension mismatch. Check --embedding_dim.")
for turn_idx, message_info in enumerate(conversation_messages):
speaker = message_info.get("speaker", "unknown")
message = message_info.get("message", "")
current_conversation_history_struct.append(message_info)
current_conversation_history_text.append(f"{speaker}: {message}")
text_for_embedding = "\n".join(current_conversation_history_text)
if not text_for_embedding.strip():
logger.warning("Empty text for embedding at turn_idx %s, using zero vector from Azure (or fallback).", turn_idx)
# Attempt to get an embedding for a neutral character to get shape, or use a known default.
# This path should be rare if conversations always start with text.
raw_turn_embedding = get_azure_openai_embedding(" ", azure_openai_client, azure_deployment_name)
if np.all(raw_turn_embedding == 0): # If fallback was hit
logger.warning("Fallback zero embedding used for empty text. Assuming 3072 dim if Azure call failed internally.")
raw_turn_embedding = np.zeros(3072, dtype=np.float32) # Default to text-embedding-3-large dim
else:
raw_turn_embedding = get_azure_openai_embedding(
text_for_embedding,
azure_openai_client,
azure_deployment_name
)
final_turn_embedding = process_raw_embedding(
raw_turn_embedding,
turn_idx,
max_conversation_turns_scaling,
model_expected_embedding_dim,
use_miniembeddings_on_azure_emb
)
if final_turn_embedding.shape[0] != model_expected_embedding_dim:
logger.error(f"Embedding dimension mismatch after processing. Expected {model_expected_embedding_dim}, got {final_turn_embedding.shape[0]}. Critical error.")
raise ValueError("Embedding dimension error after processing.")
metrics = initial_metrics.copy()
metrics['conversation_length'] = len(current_conversation_history_struct)
metrics['progress'] = min(1.0, turn_idx / max_conversation_turns_scaling)
if 'outcome' not in metrics: metrics['outcome'] = 0.5
state = ConversationState(
conversation_history=current_conversation_history_struct,
embedding=final_turn_embedding,
conversation_metrics=metrics,
turn_number=turn_idx,
conversion_probabilities=agent_predicted_probabilities
)
observation_vector = state.state_vector
if observation_vector.shape[0] != model.observation_space.shape[0]:
logger.error(f"Observation vector dimension mismatch before prediction! Expected {model.observation_space.shape[0]}, Got {observation_vector.shape[0]}")
raise ValueError("Observation vector dimension mismatch.")
action_probs, _ = model.predict(observation_vector, deterministic=True)
predicted_prob_this_turn = float(action_probs[0])
output_predictions.append({
"turn": turn_idx + 1,
"speaker": speaker,
"message": message,
"predicted_conversion_probability": predicted_prob_this_turn
})
agent_predicted_probabilities.append(predicted_prob_this_turn)
return output_predictions
def main():
parser = argparse.ArgumentParser(description="Run inference with Azure OpenAI embeddings.")
parser.add_argument("--model_path", type=str, required=True, help="Path to the trained PPO model (.zip file).")
parser.add_argument("--conversation_json", type=str, required=True,
help="JSON string or path to JSON file for the conversation.")
parser.add_argument("--azure_api_key", type=str, required=True, help="Azure OpenAI API Key.")
parser.add_argument("--azure_endpoint", type=str, required=True, help="Azure OpenAI Endpoint URL.")
parser.add_argument("--azure_deployment_name", type=str, required=True, help="Azure OpenAI embedding deployment name (e.g., for text-embedding-3-large).")
parser.add_argument("--azure_api_version", type=str, default="2023-12-01-preview", help="Azure OpenAI API Version (e.g., 2023-05-15 or 2023-12-01-preview for newer models).")
parser.add_argument("--embedding_dim", type=int, required=True,
help="The dimension of the embedding vector component EXPECTED BY THE PPO MODEL's observation space.")
parser.add_argument("--use_miniembeddings", action="store_true",
help="Flag if the Azure OpenAI embedding should be reduced (if larger than --embedding_dim) using the mini-embedding logic.")
parser.add_argument("--max_turns_scaling", type=int, default=20,
help="The 'max_turns' value used for progress scaling (default: 20).")
args = parser.parse_args()
try:
azure_client = AzureOpenAI(
api_key=args.azure_api_key,
azure_endpoint=args.azure_endpoint,
api_version=args.azure_api_version
)
logger.info("Testing Azure OpenAI connection by embedding a short string...")
test_embedding = get_azure_openai_embedding("test connection", azure_client, args.azure_deployment_name)
logger.info(f"Azure OpenAI connection successful. Received test embedding of shape: {test_embedding.shape}")
# This also implicitly tells us the dimension of the deployed Azure model
# We could store test_embedding.shape[0] and use it, but process_raw_embedding gets it anyway.
except Exception as e:
logger.error(f"Failed to initialize or test Azure OpenAI client: {e}")
return
try:
if os.path.exists(args.conversation_json):
with open(args.conversation_json, 'r') as f:
sample_conversation = json.load(f)
else:
sample_conversation = json.loads(args.conversation_json)
if not isinstance(sample_conversation, list):
raise ValueError("Conversation JSON must be a list of message objects.")
except Exception as e:
logger.error(f"Error loading conversation JSON: {e}")
return
initial_metrics = {
'customer_engagement': 0.5, 'sales_effectiveness': 0.5,
'conversation_length': 0, 'outcome': 0.5, 'progress': 0.0
}
try:
model = PPO.load(args.model_path, device=device)
logger.info(f"Model loaded from {args.model_path}")
logger.info(f"Model's observation space shape: {model.observation_space.shape}")
except Exception as e:
logger.error(f"Error loading PPO model: {e}")
return
predictions = predict_conversation_trajectory(
model,
azure_client,
args.azure_deployment_name,
sample_conversation,
initial_metrics,
model_expected_embedding_dim=args.embedding_dim,
use_miniembeddings_on_azure_emb=args.use_miniembeddings,
max_conversation_turns_scaling=args.max_turns_scaling
)
print("\n--- Conversation Predictions (with Azure OpenAI Embeddings) ---")
for pred_info in predictions:
print(f"Turn {pred_info['turn']} ({pred_info['speaker']}): \"{pred_info['message'][:60]}...\" -> Probability: {pred_info['predicted_conversion_probability']:.4f}")
if __name__ == "__main__":
# python inference_azure_openai_v2.py \
# --model_path models/sales_conversion_model.zip \
# --conversation_json sample_conv.json \
# --azure_api_key "YOUR_AZURE_API_KEY" \
# --azure_endpoint "YOUR_AZURE_ENDPOINT" \
# --azure_deployment_name "your-text-embedding-3-large-deployment-name" \
# --azure_api_version "2023-12-01-preview" \
# --embedding_dim 1024 \
# --use_miniembeddings
#
# (The above example assumes your PPO model was trained expecting 1024-dim embeddings,
# and text-embedding-3-large (3072-dim) will be reduced to 1024)
#
# If your PPO model was trained directly with 3072-dim embeddings:
# python inference_azure_openai_v2.py \
# --model_path models/sales_conversion_model.zip \
# --conversation_json sample_conv.json \
# --azure_api_key "YOUR_AZURE_API_KEY" \
# --azure_endpoint "YOUR_AZURE_ENDPOINT" \
# --azure_deployment_name "your-text-embedding-3-large-deployment-name" \
# --azure_api_version "2023-12-01-preview" \
# --embedding_dim 3072
# (Do NOT specify --use_miniembeddings in this case, as 3072 (Azure) == 3072 (model))
main() |