print("Step 1: Importing libraries...") import torch from transformers import GPT2Tokenizer import gradio as gr from Assign12_Model import GPT, GPTConfig import torchvision torchvision.disable_beta_transforms_warning() print("Step 2: Loading the model...") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") config = GPTConfig(block_size=1024, vocab_size=50257, n_layer=12, n_head=12, n_embd=768) model = GPT(config) print("Step 3: Loading model weights...") model.load_state_dict(torch.load("trained_model.pth", map_location=device, weights_only=True)) model.eval().to(device) print("Step 4: Loading tokenizer...") tokenizer = GPT2Tokenizer.from_pretrained("gpt2") # Add print statements in the function print("Step 5: Defining the inference function...") def generate_text(prompt, max_length=50, num_return_sequences=1): print(f"Received input prompt: {prompt}") inputs = tokenizer.encode(prompt, return_tensors="pt").to(device) outputs = [] for _ in range(num_return_sequences): with torch.no_grad(): logits, _ = model(inputs) generated_token = torch.argmax(logits[:, -1, :], dim=-1) inputs = torch.cat((inputs, generated_token.unsqueeze(0)), dim=1) if inputs.size(1) >= max_length: break output = tokenizer.decode(inputs[0].tolist()) outputs.append(output) return outputs import os import torch import torch.nn.functional as F from transformers import GPT2LMHeadModel, GPT2Tokenizer import gradio as gr # Initialize model and tokenizer model_name = 'gpt2' # You can replace this with your specific model model = GPT2LMHeadModel.from_pretrained(model_name) tokenizer = GPT2Tokenizer.from_pretrained(model_name) model.eval() # Ensure we're using CUDA if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Define parameters max_length = 100 num_return_sequences = 1 # Default number of sequences to generate # Function to generate text def generate_text(prompt, max_len=50, num_outputs=1): global max_length, num_return_sequences max_length = max_len num_return_sequences = num_outputs # Encode the input text input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device) generated_sequences = [] # List to store generated text # Generate sequences with torch.no_grad(): for i in range(num_return_sequences): x = input_ids.clone() while x.size(1) < max_length: logits = model(x).logits # (B, T, vocab_size) logits = logits[:, -1, :] # (B, vocab_size) probs = F.softmax(logits, dim=-1) topk_probs, topk_indices = torch.topk(probs, 50, dim=-1) ix = torch.multinomial(topk_probs, 1) # (B, 1) xcol = torch.gather(topk_indices, -1, ix) # (B, 1) x = torch.cat((x, xcol), dim=1) # Decode the generated tokens and append it to the list tokens = x[0, :max_length].tolist() decoded = tokenizer.decode(tokens, skip_special_tokens=True) generated_sequences.append(f"Generated Text {i+1}:") generated_sequences.append(f"> {decoded}\n") # Join the generated sequences into a structured output structured_output = "\n".join(generated_sequences) return structured_output # Set up Gradio interface print("Step 6: Setting up the Gradio interface...") interface = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(label="Input Prompt"), gr.Slider(10, 200, step=10, label="Max Length", value=50), gr.Number(label="Number of Outputs", value=10), ], outputs=gr.Textbox(label="Generated Text"), title="Transformer Text Generator", description="Enter a prompt and generate text using the trained transformer model.", ) print("Step 7: Launching the Gradio interface...") interface.launch(share=True)