File size: 19,200 Bytes
a730190
fc3428f
a730190
fc3428f
 
a730190
 
fc3428f
 
a730190
 
fc3428f
a730190
 
 
fc3428f
a730190
 
 
 
 
fc3428f
a730190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc3428f
a730190
 
 
 
 
 
 
 
 
 
 
 
 
 
fc3428f
a730190
 
fc3428f
a730190
 
 
 
 
 
 
 
fc3428f
a730190
 
 
fc3428f
a730190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc3428f
a730190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc3428f
a730190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc3428f
a730190
 
 
 
 
 
 
 
 
 
fc3428f
a730190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc3428f
a730190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc3428f
a730190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385

import os
import json
import numpy as np
import torch
import torch.nn as nn
from openai import AzureOpenAI # Use the new AzureOpenAI client
from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import gymnasium as gym
from gymnasium import spaces
from dataclasses import dataclass
from typing import List, Dict, Any
import argparse
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

# GPU Setup
if torch.cuda.is_available():
    device = torch.device("cuda")
    logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    logger.info("GPU not available, using CPU for inference")

# --- Replicated/Necessary Classes from train.py ---

@dataclass
class ConversationState:
    conversation_history: List[Dict[str, str]]
    embedding: np.ndarray
    conversation_metrics: Dict[str, float]
    turn_number: int
    conversion_probabilities: List[float]

    @property
    def state_vector(self) -> np.ndarray:
        metric_values = np.array(list(self.conversation_metrics.values()), dtype=np.float32)
        turn_info = np.array([self.turn_number], dtype=np.float32)
        padded_probs = np.zeros(10, dtype=np.float32)
        probs_to_pad = self.conversion_probabilities[-10:]
        padded_probs[:len(probs_to_pad)] = probs_to_pad
        
        return np.concatenate([
            self.embedding,
            metric_values,
            turn_info,
            padded_probs
        ]).astype(np.float32)

class CustomLN(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 128):
        super().__init__(observation_space, features_dim)
        n_input_channels = observation_space.shape[0]
        self.linear_network = nn.Sequential(
            nn.Linear(n_input_channels, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, features_dim),
            nn.ReLU(),
        ).to(device)
        
    def forward(self, observations: torch.Tensor) -> torch.Tensor:
        return self.linear_network(observations)

# --- Azure OpenAI Embedding Function ---

def get_azure_openai_embedding(
    text: str,
    client: AzureOpenAI,
    deployment_name: str
) -> np.ndarray:
    """Gets embedding from Azure OpenAI for the given text."""
    try:
        response = client.embeddings.create(
            input=text,
            model=deployment_name # For Azure, this is the deployment name
        )
        embedding_vector = np.array(response.data[0].embedding, dtype=np.float32)
        logger.debug(f"Received embedding from Azure. Shape: {embedding_vector.shape}")
        return embedding_vector
    except Exception as e:
        logger.error(f"Error getting embedding from Azure OpenAI: {e}")
        # Fallback to a zero vector of a common dimension, or raise error
        # For text-embedding-3-large, dimension is 3072. For ada-002 it's 1536.
        logger.warning("Falling back to zero embedding. This will impact prediction quality.")
        # It's better if the calling function determines the expected fallback dimension
        # based on the actual deployment model, but for simplicity here, we'll assume 3072 if error.
        return np.zeros(3072, dtype=np.float32) # Default to text-embedding-3-large dim

def process_raw_embedding(
    raw_embedding: np.ndarray,
    turn: int,
    max_turns_for_scaling: int,
    target_model_embedding_dim: int, # The dimension model's observation space expects
    use_miniembeddings: bool
) -> np.ndarray:
    """
    Scales and potentially reduces/pads the raw embedding (from Azure)
    to match the model's expected input dimension and characteristics.
    """
    dim_of_raw_embedding = len(raw_embedding)
    logger.debug(f"Processing raw_embedding. Dim: {dim_of_raw_embedding}, Target model dim: {target_model_embedding_dim}, Use mini: {use_miniembeddings}")


    # 1. Apply turn-based dynamic scaling (mimicking training)
    progress = min(1.0, turn / max_turns_for_scaling)
    scaled_embedding = raw_embedding * (0.6 + 0.4 * progress)

    # 2. Adjust dimension to target_model_embedding_dim
    if use_miniembeddings and dim_of_raw_embedding > target_model_embedding_dim:
        logger.debug(f"Applying mini-embedding reduction from {dim_of_raw_embedding} to {target_model_embedding_dim}")
        if target_model_embedding_dim <= 0:
            logger.error("Target model embedding dimension is <=0. Cannot pool.")
            return np.zeros(1, dtype=np.float32) # Return a minimal valid array

        pool_factor = dim_of_raw_embedding // target_model_embedding_dim
        if pool_factor == 0: pool_factor = 1 

        num_elements_to_pool = pool_factor * target_model_embedding_dim
        
        # If not enough elements for perfect pooling (e.g. raw_dim=5, target_dim=3 -> pool_factor=1, num_elements_to_pool=3)
        # or too many (e.g. raw_dim=5, target_dim=2 -> pool_factor=2, num_elements_to_pool=4)
        # We'll pool from the available part of scaled_embedding
        elements_for_pooling = scaled_embedding[:num_elements_to_pool] if num_elements_to_pool <= dim_of_raw_embedding else scaled_embedding
        
        if len(elements_for_pooling) < target_model_embedding_dim : # Not enough elements even to form the target dim vector
            logger.warning(f"Not enough elements ({len(elements_for_pooling)}) to pool into target_dim ({target_model_embedding_dim}). Padding result.")
            reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
            fill_len = min(len(elements_for_pooling), target_model_embedding_dim)
            reduced_embedding[:fill_len] = elements_for_pooling[:fill_len] # Simplified: take first elements if pooling fails
        else:
            try:
                # Adjust elements_for_pooling to be perfectly divisible if necessary
                reshapable_length = (len(elements_for_pooling) // pool_factor) * pool_factor
                reshaped_for_pooling = elements_for_pooling[:reshapable_length].reshape(-1, pool_factor) # -1 infers target_model_embedding_dim or similar
                
                # Ensure the first dimension of reshaped matches target_model_embedding_dim
                if reshaped_for_pooling.shape[0] > target_model_embedding_dim:
                    reshaped_for_pooling = reshaped_for_pooling[:target_model_embedding_dim, :]
                elif reshaped_for_pooling.shape[0] < target_model_embedding_dim:
                     # This case should ideally be handled by padding the result
                    logger.warning(f"Pooling resulted in fewer dimensions ({reshaped_for_pooling.shape[0]}) than target ({target_model_embedding_dim}). Will pad.")
                    temp_reduced = np.mean(reshaped_for_pooling, axis=1)
                    reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
                    reduced_embedding[:len(temp_reduced)] = temp_reduced
                else:
                    reduced_embedding = np.mean(reshaped_for_pooling, axis=1)

            except ValueError as e: 
                logger.error(f"Reshape for pooling failed: {e}. Lengths: elements_for_pooling={len(elements_for_pooling)}, pool_factor={pool_factor}. Falling back to simple truncation/padding.")
                if dim_of_raw_embedding > target_model_embedding_dim:
                    reduced_embedding = scaled_embedding[:target_model_embedding_dim]
                else: 
                    reduced_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
                    reduced_embedding[:dim_of_raw_embedding] = scaled_embedding
        
        processed_embedding = reduced_embedding

    elif dim_of_raw_embedding == target_model_embedding_dim:
        processed_embedding = scaled_embedding
    elif dim_of_raw_embedding > target_model_embedding_dim: 
        logger.debug(f"Truncating embedding from {dim_of_raw_embedding} to {target_model_embedding_dim}")
        processed_embedding = scaled_embedding[:target_model_embedding_dim]
    else: 
        logger.debug(f"Padding embedding from {dim_of_raw_embedding} to {target_model_embedding_dim}")
        processed_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
        processed_embedding[:dim_of_raw_embedding] = scaled_embedding
    
    if len(processed_embedding) != target_model_embedding_dim:
        logger.warning(f"Dimension mismatch after processing. Expected {target_model_embedding_dim}, got {len(processed_embedding)}. Adjusting...")
        final_embedding = np.zeros(target_model_embedding_dim, dtype=np.float32)
        fill_len = min(len(processed_embedding), target_model_embedding_dim)
        final_embedding[:fill_len] = processed_embedding[:fill_len]
        return final_embedding.astype(np.float32)

    return processed_embedding.astype(np.float32)


# --- Main Prediction Logic ---

def predict_conversation_trajectory(
    model: PPO,
    azure_openai_client: AzureOpenAI,
    azure_deployment_name: str,
    conversation_messages: List[Dict[str, str]],
    initial_metrics: Dict[str, float],
    model_expected_embedding_dim: int,
    use_miniembeddings_on_azure_emb: bool,
    max_conversation_turns_scaling: int = 20
):
    logger.info(f"Starting prediction. Model expects embedding_dim: {model_expected_embedding_dim}. use_mini_on_azure: {use_miniembeddings_on_azure_emb}")

    current_conversation_history_text = []
    current_conversation_history_struct = []
    agent_predicted_probabilities = []
    output_predictions = []

    num_metrics = 5 
    expected_obs_dim = model_expected_embedding_dim + num_metrics + 1 + 10 
    if model.observation_space.shape[0] != expected_obs_dim:
        logger.error(f"CRITICAL: Model observation space dimension mismatch! Model expects total obs_dim {model.observation_space.shape[0]}, "
                     f"but calculations suggest {expected_obs_dim} based on model_expected_embedding_dim={model_expected_embedding_dim}. "
                     f"Ensure --embedding_dim matches the dimension used for the embedding component during training.")
        inferred_emb_dim = model.observation_space.shape[0] - num_metrics - 1 - 10
        logger.error(f"The model might have been trained with an embedding component of dimension: {inferred_emb_dim}")
        raise ValueError("Observation space dimension mismatch. Check --embedding_dim.")

    for turn_idx, message_info in enumerate(conversation_messages):
        speaker = message_info.get("speaker", "unknown")
        message = message_info.get("message", "")
        
        current_conversation_history_struct.append(message_info)
        current_conversation_history_text.append(f"{speaker}: {message}")
        
        text_for_embedding = "\n".join(current_conversation_history_text)
        if not text_for_embedding.strip(): 
            logger.warning("Empty text for embedding at turn_idx %s, using zero vector from Azure (or fallback).", turn_idx)
            # Attempt to get an embedding for a neutral character to get shape, or use a known default.
            # This path should be rare if conversations always start with text.
            raw_turn_embedding = get_azure_openai_embedding(" ", azure_openai_client, azure_deployment_name)
            if np.all(raw_turn_embedding == 0): # If fallback was hit
                logger.warning("Fallback zero embedding used for empty text. Assuming 3072 dim if Azure call failed internally.")
                raw_turn_embedding = np.zeros(3072, dtype=np.float32) # Default to text-embedding-3-large dim
        else:
            raw_turn_embedding = get_azure_openai_embedding(
                text_for_embedding,
                azure_openai_client,
                azure_deployment_name
            )
        
        final_turn_embedding = process_raw_embedding(
            raw_turn_embedding,
            turn_idx,
            max_conversation_turns_scaling,
            model_expected_embedding_dim,
            use_miniembeddings_on_azure_emb
        )
        
        if final_turn_embedding.shape[0] != model_expected_embedding_dim:
            logger.error(f"Embedding dimension mismatch after processing. Expected {model_expected_embedding_dim}, got {final_turn_embedding.shape[0]}. Critical error.")
            raise ValueError("Embedding dimension error after processing.")

        metrics = initial_metrics.copy()
        metrics['conversation_length'] = len(current_conversation_history_struct)
        metrics['progress'] = min(1.0, turn_idx / max_conversation_turns_scaling)
        if 'outcome' not in metrics: metrics['outcome'] = 0.5 

        state = ConversationState(
            conversation_history=current_conversation_history_struct,
            embedding=final_turn_embedding,
            conversation_metrics=metrics,
            turn_number=turn_idx,
            conversion_probabilities=agent_predicted_probabilities
        )
        
        observation_vector = state.state_vector
        
        if observation_vector.shape[0] != model.observation_space.shape[0]:
            logger.error(f"Observation vector dimension mismatch before prediction! Expected {model.observation_space.shape[0]}, Got {observation_vector.shape[0]}")
            raise ValueError("Observation vector dimension mismatch.")

        action_probs, _ = model.predict(observation_vector, deterministic=True)
        predicted_prob_this_turn = float(action_probs[0])
        
        output_predictions.append({
            "turn": turn_idx + 1,
            "speaker": speaker,
            "message": message,
            "predicted_conversion_probability": predicted_prob_this_turn
        })
        agent_predicted_probabilities.append(predicted_prob_this_turn)

    return output_predictions


def main():
    parser = argparse.ArgumentParser(description="Run inference with Azure OpenAI embeddings.")
    parser.add_argument("--model_path", type=str, required=True, help="Path to the trained PPO model (.zip file).")
    parser.add_argument("--conversation_json", type=str, required=True,
                        help="JSON string or path to JSON file for the conversation.")
    
    parser.add_argument("--azure_api_key", type=str, required=True, help="Azure OpenAI API Key.")
    parser.add_argument("--azure_endpoint", type=str, required=True, help="Azure OpenAI Endpoint URL.")
    parser.add_argument("--azure_deployment_name", type=str, required=True, help="Azure OpenAI embedding deployment name (e.g., for text-embedding-3-large).")
    parser.add_argument("--azure_api_version", type=str, default="2023-12-01-preview", help="Azure OpenAI API Version (e.g., 2023-05-15 or 2023-12-01-preview for newer models).")

    parser.add_argument("--embedding_dim", type=int, required=True,
                        help="The dimension of the embedding vector component EXPECTED BY THE PPO MODEL's observation space.")
    parser.add_argument("--use_miniembeddings", action="store_true",
                        help="Flag if the Azure OpenAI embedding should be reduced (if larger than --embedding_dim) using the mini-embedding logic.")
    parser.add_argument("--max_turns_scaling", type=int, default=20,
                        help="The 'max_turns' value used for progress scaling (default: 20).")
    args = parser.parse_args()

    try:
        azure_client = AzureOpenAI(
            api_key=args.azure_api_key,
            azure_endpoint=args.azure_endpoint,
            api_version=args.azure_api_version
        )
        logger.info("Testing Azure OpenAI connection by embedding a short string...")
        test_embedding = get_azure_openai_embedding("test connection", azure_client, args.azure_deployment_name)
        logger.info(f"Azure OpenAI connection successful. Received test embedding of shape: {test_embedding.shape}")
        # This also implicitly tells us the dimension of the deployed Azure model
        # We could store test_embedding.shape[0] and use it, but process_raw_embedding gets it anyway.

    except Exception as e:
        logger.error(f"Failed to initialize or test Azure OpenAI client: {e}")
        return

    try:
        if os.path.exists(args.conversation_json):
            with open(args.conversation_json, 'r') as f:
                sample_conversation = json.load(f)
        else:
            sample_conversation = json.loads(args.conversation_json)
        if not isinstance(sample_conversation, list):
            raise ValueError("Conversation JSON must be a list of message objects.")
    except Exception as e:
        logger.error(f"Error loading conversation JSON: {e}")
        return

    initial_metrics = {
        'customer_engagement': 0.5, 'sales_effectiveness': 0.5,
        'conversation_length': 0, 'outcome': 0.5, 'progress': 0.0
    }

    try:
        model = PPO.load(args.model_path, device=device)
        logger.info(f"Model loaded from {args.model_path}")
        logger.info(f"Model's observation space shape: {model.observation_space.shape}")
    except Exception as e:
        logger.error(f"Error loading PPO model: {e}")
        return

    predictions = predict_conversation_trajectory(
        model,
        azure_client,
        args.azure_deployment_name,
        sample_conversation,
        initial_metrics,
        model_expected_embedding_dim=args.embedding_dim,
        use_miniembeddings_on_azure_emb=args.use_miniembeddings,
        max_conversation_turns_scaling=args.max_turns_scaling
    )

    print("\n--- Conversation Predictions (with Azure OpenAI Embeddings) ---")
    for pred_info in predictions:
        print(f"Turn {pred_info['turn']} ({pred_info['speaker']}): \"{pred_info['message'][:60]}...\" -> Probability: {pred_info['predicted_conversion_probability']:.4f}")

if __name__ == "__main__":
    # python inference_azure_openai_v2.py \
    #   --model_path models/sales_conversion_model.zip \
    #   --conversation_json sample_conv.json \
    #   --azure_api_key "YOUR_AZURE_API_KEY" \
    #   --azure_endpoint "YOUR_AZURE_ENDPOINT" \
    #   --azure_deployment_name "your-text-embedding-3-large-deployment-name" \
    #   --azure_api_version "2023-12-01-preview" \
    #   --embedding_dim 1024 \
    #   --use_miniembeddings 
    #
    # (The above example assumes your PPO model was trained expecting 1024-dim embeddings,
    # and text-embedding-3-large (3072-dim) will be reduced to 1024)
    #
    # If your PPO model was trained directly with 3072-dim embeddings:
    # python inference_azure_openai_v2.py \
    #   --model_path models/sales_conversion_model.zip \
    #   --conversation_json sample_conv.json \
    #   --azure_api_key "YOUR_AZURE_API_KEY" \
    #   --azure_endpoint "YOUR_AZURE_ENDPOINT" \
    #   --azure_deployment_name "your-text-embedding-3-large-deployment-name" \
    #   --azure_api_version "2023-12-01-preview" \
    #   --embedding_dim 3072 
    #   (Do NOT specify --use_miniembeddings in this case, as 3072 (Azure) == 3072 (model))

    main()