DeepMostInnovations's picture
Update sales_inference.py
a730190 verified
import os
import json
import numpy as np
import torch
import torch.nn as nn
from openai import AzureOpenAI # Use the new AzureOpenAI client
from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import gymnasium as gym
from gymnasium import spaces
from dataclasses import dataclass
from typing import List, Dict, Any
import argparse
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)
# GPU Setup
if torch.cuda.is_available():
device = torch.device("cuda")
logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
device = torch.device("cpu")
logger.info("GPU not available, using CPU for inference")
# --- Replicated/Necessary Classes from train.py ---
@dataclass
class ConversationState:
conversation_history: List[Dict[str, str]]
embedding: np.ndarray
conversation_metrics: Dict[str, float]
turn_number: int
conversion_probabilities: List[float]
@property
def state_vector(self) -> np.ndarray:
metric_values = np.array(list(self.conversation_metrics.values()), dtype=np.float32)
turn_info = np.array([self.turn_number], dtype=np.float32)
padded_probs = np.zeros(10, dtype=np.float32)
probs_to_pad = self.conversion_probabilities[-10:]
padded_probs[:len(probs_to_pad)] = probs_to_pad
return np.concatenate([
self.embedding,
metric_values,
turn_info,
padded_probs
]).astype(np.float32)
class CustomLN(BaseFeaturesExtractor):
def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 128):
super().__init__(observation_space, features_dim)
n_input_channels = observation_space.shape[0]
self.linear_network = nn.Sequential(
nn.Linear(n_input_channels, 512),
nn.ReLU(),
nn.Linear(512, 256),
nn.ReLU(),
nn.Linear(256, features_dim),
nn.ReLU(),
).to(device)
def forward(self, observations: torch.Tensor) -> torch.Tensor:
return self.linear_network(observations)
# --- Azure OpenAI Embedding Function ---
def get_azure_openai_embedding(
text: str,
client: AzureOpenAI,
deployment_name: str
) -> np.ndarray:
"""Gets embedding from Azure OpenAI for the given text."""
try:
response = client.embeddings.create(
input=text,
model=deployment_name # For Azure, this is the deployment name
)
embedding_vector = np.array(response.data[0].embedding, dtype=np.float32)
logger.debug(f"Received embedding from Azure. Shape: {embedding_vector.shape}")
return embedding_vector
except Exception as e:
logger.error(f"Error getting embedding from Azure OpenAI: {e}")
# Fallback to a zero vector of a common dimension, or raise error
# For text-embedding-3-large, dimension is 3072. For ada-002 it's 1536.
logger.warning("Falling back to zero embedding. This will impact prediction quality.")
# It's better if the calling function determines the expected fallback dimension
# based on the actual deployment model, but for simplicity here, we'll assume 3072 if error.
return np.zeros(3072, dtype=np.float32) # Default to text-embedding-3-large dim
def process_raw_embedding(
raw_embedding: np.ndarray,
turn: int,
max_turns_for_scaling: int,
target_model_embedding_dim: int, # The dimension model's observation space expects
use_miniembeddings: bool
) -> np.ndarray:
"""
Scales and potentially reduces/pads the raw embedding (from Azure)
to match the model's expected input dimension and characteristics.
"""
dim_of_raw_embedding = len(raw_embedding)
logger.debug(f"Processing raw_embedding. Dim: {dim_of_raw_embedding}, Target model dim: {target_model_embedding_dim}, Use mini: {use_miniembeddings}")
# 1. Apply turn-based dynamic scaling (mimicking training)
progress = min(1.0, turn / max_turns_for_scaling)
scaled_embedding = raw_embedding * (0.6 + 0.4 * progress)
# 2. Adjust dimension to target_model_embedding_dim
if use_miniembeddings and dim_of_raw_embedding > target_model_embedding_dim:
logger.debug(f"Applying mini-embedding reduction from {dim_of_raw_embedding} to {target_model_embedding_dim}")
if target_model_embedding_dim <= 0:
logger.error("Target model embedding dimension is <=0. Cannot pool.")
return np.zeros(1, dtype=np.float32) # Return a minimal valid array
pool_factor = dim_of_raw_embedding // target_model_embedding_dim
if pool_factor == 0: pool_factor = 1
num_elements_to_pool = pool_factor * target_model_embedding_dim
# If not enough elements for perfect pooling (e.g. raw_dim=5, target_dim=3 -> pool_factor=1, num_elements_to_pool=3)
# or too many (e.g. raw_dim=5, target_dim=2 -> pool_factor=2, num_elements_to_pool=4)
# We'll pool from the available part of scaled_embedding
elements_for_pooling = scaled_embedding[:num_elements_to_pool] if num_elements_to_pool <= dim_of_raw_embedding else scaled_embedding
if len(elements_for_pooling) < target_model_embedding_dim : # Not enough elements even to form the target dim vector
logger.warning(f"Not enough elements ({len(elements_for_pooling)}) to pool into target_dim ({target_model_embedding_dim}). Padding result.")
reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
fill_len = min(len(elements_for_pooling), target_model_embedding_dim)
reduced_embedding[:fill_len] = elements_for_pooling[:fill_len] # Simplified: take first elements if pooling fails
else:
try:
# Adjust elements_for_pooling to be perfectly divisible if necessary
reshapable_length = (len(elements_for_pooling) // pool_factor) * pool_factor
reshaped_for_pooling = elements_for_pooling[:reshapable_length].reshape(-1, pool_factor) # -1 infers target_model_embedding_dim or similar
# Ensure the first dimension of reshaped matches target_model_embedding_dim
if reshaped_for_pooling.shape[0] > target_model_embedding_dim:
reshaped_for_pooling = reshaped_for_pooling[:target_model_embedding_dim, :]
elif reshaped_for_pooling.shape[0] < target_model_embedding_dim:
# This case should ideally be handled by padding the result
logger.warning(f"Pooling resulted in fewer dimensions ({reshaped_for_pooling.shape[0]}) than target ({target_model_embedding_dim}). Will pad.")
temp_reduced = np.mean(reshaped_for_pooling, axis=1)
reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
reduced_embedding[:len(temp_reduced)] = temp_reduced
else:
reduced_embedding = np.mean(reshaped_for_pooling, axis=1)
except ValueError as e:
logger.error(f"Reshape for pooling failed: {e}. Lengths: elements_for_pooling={len(elements_for_pooling)}, pool_factor={pool_factor}. Falling back to simple truncation/padding.")
if dim_of_raw_embedding > target_model_embedding_dim:
reduced_embedding = scaled_embedding[:target_model_embedding_dim]
else:
reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
reduced_embedding[:dim_of_raw_embedding] = scaled_embedding
processed_embedding = reduced_embedding
elif dim_of_raw_embedding == target_model_embedding_dim:
processed_embedding = scaled_embedding
elif dim_of_raw_embedding > target_model_embedding_dim:
logger.debug(f"Truncating embedding from {dim_of_raw_embedding} to {target_model_embedding_dim}")
processed_embedding = scaled_embedding[:target_model_embedding_dim]
else:
logger.debug(f"Padding embedding from {dim_of_raw_embedding} to {target_model_embedding_dim}")
processed_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
processed_embedding[:dim_of_raw_embedding] = scaled_embedding
if len(processed_embedding) != target_model_embedding_dim:
logger.warning(f"Dimension mismatch after processing. Expected {target_model_embedding_dim}, got {len(processed_embedding)}. Adjusting...")
final_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
fill_len = min(len(processed_embedding), target_model_embedding_dim)
final_embedding[:fill_len] = processed_embedding[:fill_len]
return final_embedding.astype(np.float32)
return processed_embedding.astype(np.float32)
# --- Main Prediction Logic ---
def predict_conversation_trajectory(
model: PPO,
azure_openai_client: AzureOpenAI,
azure_deployment_name: str,
conversation_messages: List[Dict[str, str]],
initial_metrics: Dict[str, float],
model_expected_embedding_dim: int,
use_miniembeddings_on_azure_emb: bool,
max_conversation_turns_scaling: int = 20
):
logger.info(f"Starting prediction. Model expects embedding_dim: {model_expected_embedding_dim}. use_mini_on_azure: {use_miniembeddings_on_azure_emb}")
current_conversation_history_text = []
current_conversation_history_struct = []
agent_predicted_probabilities = []
output_predictions = []
num_metrics = 5
expected_obs_dim = model_expected_embedding_dim + num_metrics + 1 + 10
if model.observation_space.shape[0] != expected_obs_dim:
logger.error(f"CRITICAL: Model observation space dimension mismatch! Model expects total obs_dim {model.observation_space.shape[0]}, "
f"but calculations suggest {expected_obs_dim} based on model_expected_embedding_dim={model_expected_embedding_dim}. "
f"Ensure --embedding_dim matches the dimension used for the embedding component during training.")
inferred_emb_dim = model.observation_space.shape[0] - num_metrics - 1 - 10
logger.error(f"The model might have been trained with an embedding component of dimension: {inferred_emb_dim}")
raise ValueError("Observation space dimension mismatch. Check --embedding_dim.")
for turn_idx, message_info in enumerate(conversation_messages):
speaker = message_info.get("speaker", "unknown")
message = message_info.get("message", "")
current_conversation_history_struct.append(message_info)
current_conversation_history_text.append(f"{speaker}: {message}")
text_for_embedding = "\n".join(current_conversation_history_text)
if not text_for_embedding.strip():
logger.warning("Empty text for embedding at turn_idx %s, using zero vector from Azure (or fallback).", turn_idx)
# Attempt to get an embedding for a neutral character to get shape, or use a known default.
# This path should be rare if conversations always start with text.
raw_turn_embedding = get_azure_openai_embedding(" ", azure_openai_client, azure_deployment_name)
if np.all(raw_turn_embedding == 0): # If fallback was hit
logger.warning("Fallback zero embedding used for empty text. Assuming 3072 dim if Azure call failed internally.")
raw_turn_embedding = np.zeros(3072, dtype=np.float32) # Default to text-embedding-3-large dim
else:
raw_turn_embedding = get_azure_openai_embedding(
text_for_embedding,
azure_openai_client,
azure_deployment_name
)
final_turn_embedding = process_raw_embedding(
raw_turn_embedding,
turn_idx,
max_conversation_turns_scaling,
model_expected_embedding_dim,
use_miniembeddings_on_azure_emb
)
if final_turn_embedding.shape[0] != model_expected_embedding_dim:
logger.error(f"Embedding dimension mismatch after processing. Expected {model_expected_embedding_dim}, got {final_turn_embedding.shape[0]}. Critical error.")
raise ValueError("Embedding dimension error after processing.")
metrics = initial_metrics.copy()
metrics['conversation_length'] = len(current_conversation_history_struct)
metrics['progress'] = min(1.0, turn_idx / max_conversation_turns_scaling)
if 'outcome' not in metrics: metrics['outcome'] = 0.5
state = ConversationState(
conversation_history=current_conversation_history_struct,
embedding=final_turn_embedding,
conversation_metrics=metrics,
turn_number=turn_idx,
conversion_probabilities=agent_predicted_probabilities
)
observation_vector = state.state_vector
if observation_vector.shape[0] != model.observation_space.shape[0]:
logger.error(f"Observation vector dimension mismatch before prediction! Expected {model.observation_space.shape[0]}, Got {observation_vector.shape[0]}")
raise ValueError("Observation vector dimension mismatch.")
action_probs, _ = model.predict(observation_vector, deterministic=True)
predicted_prob_this_turn = float(action_probs[0])
output_predictions.append({
"turn": turn_idx + 1,
"speaker": speaker,
"message": message,
"predicted_conversion_probability": predicted_prob_this_turn
})
agent_predicted_probabilities.append(predicted_prob_this_turn)
return output_predictions
def main():
parser = argparse.ArgumentParser(description="Run inference with Azure OpenAI embeddings.")
parser.add_argument("--model_path", type=str, required=True, help="Path to the trained PPO model (.zip file).")
parser.add_argument("--conversation_json", type=str, required=True,
help="JSON string or path to JSON file for the conversation.")
parser.add_argument("--azure_api_key", type=str, required=True, help="Azure OpenAI API Key.")
parser.add_argument("--azure_endpoint", type=str, required=True, help="Azure OpenAI Endpoint URL.")
parser.add_argument("--azure_deployment_name", type=str, required=True, help="Azure OpenAI embedding deployment name (e.g., for text-embedding-3-large).")
parser.add_argument("--azure_api_version", type=str, default="2023-12-01-preview", help="Azure OpenAI API Version (e.g., 2023-05-15 or 2023-12-01-preview for newer models).")
parser.add_argument("--embedding_dim", type=int, required=True,
help="The dimension of the embedding vector component EXPECTED BY THE PPO MODEL's observation space.")
parser.add_argument("--use_miniembeddings", action="store_true",
help="Flag if the Azure OpenAI embedding should be reduced (if larger than --embedding_dim) using the mini-embedding logic.")
parser.add_argument("--max_turns_scaling", type=int, default=20,
help="The 'max_turns' value used for progress scaling (default: 20).")
args = parser.parse_args()
try:
azure_client = AzureOpenAI(
api_key=args.azure_api_key,
azure_endpoint=args.azure_endpoint,
api_version=args.azure_api_version
)
logger.info("Testing Azure OpenAI connection by embedding a short string...")
test_embedding = get_azure_openai_embedding("test connection", azure_client, args.azure_deployment_name)
logger.info(f"Azure OpenAI connection successful. Received test embedding of shape: {test_embedding.shape}")
# This also implicitly tells us the dimension of the deployed Azure model
# We could store test_embedding.shape[0] and use it, but process_raw_embedding gets it anyway.
except Exception as e:
logger.error(f"Failed to initialize or test Azure OpenAI client: {e}")
return
try:
if os.path.exists(args.conversation_json):
with open(args.conversation_json, 'r') as f:
sample_conversation = json.load(f)
else:
sample_conversation = json.loads(args.conversation_json)
if not isinstance(sample_conversation, list):
raise ValueError("Conversation JSON must be a list of message objects.")
except Exception as e:
logger.error(f"Error loading conversation JSON: {e}")
return
initial_metrics = {
'customer_engagement': 0.5, 'sales_effectiveness': 0.5,
'conversation_length': 0, 'outcome': 0.5, 'progress': 0.0
}
try:
model = PPO.load(args.model_path, device=device)
logger.info(f"Model loaded from {args.model_path}")
logger.info(f"Model's observation space shape: {model.observation_space.shape}")
except Exception as e:
logger.error(f"Error loading PPO model: {e}")
return
predictions = predict_conversation_trajectory(
model,
azure_client,
args.azure_deployment_name,
sample_conversation,
initial_metrics,
model_expected_embedding_dim=args.embedding_dim,
use_miniembeddings_on_azure_emb=args.use_miniembeddings,
max_conversation_turns_scaling=args.max_turns_scaling
)
print("\n--- Conversation Predictions (with Azure OpenAI Embeddings) ---")
for pred_info in predictions:
print(f"Turn {pred_info['turn']} ({pred_info['speaker']}): \"{pred_info['message'][:60]}...\" -> Probability: {pred_info['predicted_conversion_probability']:.4f}")
if __name__ == "__main__":
# python inference_azure_openai_v2.py \
# --model_path models/sales_conversion_model.zip \
# --conversation_json sample_conv.json \
# --azure_api_key "YOUR_AZURE_API_KEY" \
# --azure_endpoint "YOUR_AZURE_ENDPOINT" \
# --azure_deployment_name "your-text-embedding-3-large-deployment-name" \
# --azure_api_version "2023-12-01-preview" \
# --embedding_dim 1024 \
# --use_miniembeddings
#
# (The above example assumes your PPO model was trained expecting 1024-dim embeddings,
# and text-embedding-3-large (3072-dim) will be reduced to 1024)
#
# If your PPO model was trained directly with 3072-dim embeddings:
# python inference_azure_openai_v2.py \
# --model_path models/sales_conversion_model.zip \
# --conversation_json sample_conv.json \
# --azure_api_key "YOUR_AZURE_API_KEY" \
# --azure_endpoint "YOUR_AZURE_ENDPOINT" \
# --azure_deployment_name "your-text-embedding-3-large-deployment-name" \
# --azure_api_version "2023-12-01-preview" \
# --embedding_dim 3072
# (Do NOT specify --use_miniembeddings in this case, as 3072 (Azure) == 3072 (model))
main()