Shriti09's picture
Upload 4 files
7970870 verified
raw
history blame
2.06 kB
import torch
import gradio as gr
from model import CustomLLM
from transformers import GPT2Tokenizer
class ModelLoader:
def __init__(self):
# Load config
self.config = {
"vocab_size": 50257, # Update with your actual values
"hidden_size": 768,
"num_hidden_layers": 12,
"rms_norm_eps": 1e-6
}
# Instantiate model
self.model = CustomLLM(self.config)
# Load trained weights
state_dict = torch.load('pytorch_model.bin', map_location='cpu')
self.model.load_state_dict(state_dict)
self.model.eval()
# Load tokenizer
self.tokenizer = GPT2Tokenizer.from_pretrained('tokenizer/')
self.tokenizer.pad_token = self.tokenizer.eos_token
def generate(self, prompt, max_new_tokens=100, temperature=0.9, top_k=50, top_p=0.95):
inputs = self.tokenizer(prompt, return_tensors="pt")
input_ids = inputs.input_ids
with torch.no_grad():
generated = self.model.generate(
input_ids=input_ids,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_k=top_k,
top_p=top_p,
eos_token_id=self.tokenizer.eos_token_id,
pad_token_id=self.tokenizer.pad_token_id
)
return self.tokenizer.decode(generated[0], skip_special_tokens=True)
# Initialize model
loader = ModelLoader()
# Create Gradio interface
interface = gr.Interface(
fn=loader.generate,
inputs=[
gr.Textbox(lines=4, label="Input Prompt"),
gr.Slider(1, 500, value=100, label="Max New Tokens"),
gr.Slider(0.1, 2.0, value=0.9, label="Temperature"),
gr.Slider(1, 100, value=50, label="Top K"),
gr.Slider(0.1, 1.0, value=0.95, label="Top P")
],
outputs=gr.Textbox(label="Generated Output"),
title="Custom LLM Demo",
description="Generate text using your custom-trained LLM"
)
interface.launch()