Spaces:

AkashDataScience
/

nanoGPT

Sleeping

App Files Files Community

nanoGPT / app.py

AkashDataScience

Updated encoding

324c700 about 1 year ago

raw

history blame

1.94 kB

	import torch
	import gradio as gr
	from model import BigramLanguageModel

	cuda = torch.cuda.is_available()
	device = 'cuda' if cuda else 'cpu'

	model = BigramLanguageModel()
	model.load_state_dict(torch.load("nanogpt.pth", map_location=torch.device(device)), strict=False)

	# read text file
	with open('input.txt', 'r', encoding='utf-8') as f:
	text = f.read()

	# collect all the unique characters that occur in this text
	chars = sorted(list(set(text)))
	vocab_size = len(chars)

	# create a maaping from charaters that occur in this text
	stoi = { ch:i for i,ch in enumerate(chars) }
	itos = { i:ch for i,ch in enumerate(chars) }
	encode = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers
	decode = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a

	def inference(input_text, max_new_tokens=500):
	context = torch.tensor(encode(input_text), dtype=torch.long, device=device).view(1, -1)

	output_text = decode(model.generate(context, max_new_tokens=max_new_tokens)[0].tolist())

	return output_text

	title = "NanoGPT trained on Shakespeare Plays dataset"
	description = "A simple Gradio interface to generate text from gpt model trained on Shakespeare Plays"
	examples = [["Shape", 500],
	["Answer", 500],
	["Ideology", 500],
	["Absorb", 500],
	["Triangle", 500],
	["Listen", 500],
	["Census", 500],
	["Balance", 500],
	["Representative", 500],
	["Cinema", 500],
	]
	demo = gr.Interface(
	inference,
	inputs = [
	gr.Textbox(label="Enter any word", type="text"),
	gr.Slider(minimum=100, maximum=10000, step=100, value=500, label="Max character to generate")
	],
	outputs = [
	gr.Textbox(label="Output", type="text")
	],
	title = title,
	description = description,
	examples = examples,
	)
	demo.launch()