print("Step 1: Importing libraries...")
import torch
from transformers import GPT2Tokenizer
import gradio as gr
from Assign12_Model import GPT, GPTConfig
import torchvision
torchvision.disable_beta_transforms_warning()

print("Step 2: Loading the model...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
config = GPTConfig(block_size=1024, vocab_size=50257, n_layer=12, n_head=12, n_embd=768)
model = GPT(config)

print("Step 3: Loading model weights...")
model.load_state_dict(torch.load("trained_model.pth", map_location=device, weights_only=True))
model.eval().to(device)

print("Step 4: Loading tokenizer...")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Add print statements in the function
print("Step 5: Defining the inference function...")
def generate_text(prompt, max_length=50, num_return_sequences=1):
    print(f"Received input prompt: {prompt}")
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
    outputs = []
    for _ in range(num_return_sequences):
        with torch.no_grad():
            logits, _ = model(inputs)
            generated_token = torch.argmax(logits[:, -1, :], dim=-1)
            inputs = torch.cat((inputs, generated_token.unsqueeze(0)), dim=1)
            if inputs.size(1) >= max_length:
                break
        output = tokenizer.decode(inputs[0].tolist())
        outputs.append(output)
    return outputs

import os
import torch
import torch.nn.functional as F
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import gradio as gr

# Initialize model and tokenizer
model_name = 'gpt2'  # You can replace this with your specific model
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model.eval()

# Ensure we're using CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define parameters
max_length = 100
num_return_sequences = 1  # Default number of sequences to generate

# Function to generate text
def generate_text(prompt, max_len=50, num_outputs=1):
    global max_length, num_return_sequences
    
    max_length = max_len
    num_return_sequences = num_outputs
    
    # Encode the input text
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)

    generated_sequences = []  # List to store generated text

    # Generate sequences
    with torch.no_grad():
        for i in range(num_return_sequences):
            x = input_ids.clone()
            while x.size(1) < max_length:
                logits = model(x).logits  # (B, T, vocab_size)
                logits = logits[:, -1, :]  # (B, vocab_size)
                probs = F.softmax(logits, dim=-1)
                topk_probs, topk_indices = torch.topk(probs, 50, dim=-1)
                ix = torch.multinomial(topk_probs, 1)  # (B, 1)
                xcol = torch.gather(topk_indices, -1, ix)  # (B, 1)
                x = torch.cat((x, xcol), dim=1)

            # Decode the generated tokens and append it to the list
            tokens = x[0, :max_length].tolist()
            decoded = tokenizer.decode(tokens, skip_special_tokens=True)
            generated_sequences.append(f"Generated Text {i+1}:")
            generated_sequences.append(f"> {decoded}\n")

    # Join the generated sequences into a structured output
    structured_output = "\n".join(generated_sequences)
    
    return structured_output

# Set up Gradio interface
print("Step 6: Setting up the Gradio interface...")
interface = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(label="Input Prompt"),
        gr.Slider(10, 200, step=10, label="Max Length", value=50),
        gr.Number(label="Number of Outputs", value=10),
    ],
    outputs=gr.Textbox(label="Generated Text"),
    title="Transformer Text Generator",
    description="Enter a prompt and generate text using the trained transformer model.",
)

print("Step 7: Launching the Gradio interface...")
interface.launch(share=True)