Spaces:

randomblock1
/

phi-2

Sleeping

phi-2 / app.py

Benjamin Gonzalez

fix token length

4d07925 almost 2 years ago

1.77 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import gradio as gr

	if torch.cuda.is_available():
	torch.set_default_device("cuda")

	tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	"microsoft/phi-2",
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	trust_remote_code=True,
	)


	def generate(prompt, length):
	inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
	input_token_len = len(inputs.tokens())
	outputs = model.generate(**inputs, max_length=length if length >= input_token_len else input_token_len
	return tokenizer.batch_decode(outputs)[0]


	demo = gr.Interface(
	fn=generate,
	inputs=[
	gr.Text(
	label="prompt",
	value="Write a detailed analogy between mathematics and a lighthouse.",
	),
	gr.Number(value=100, label="max length", maximum=500),
	],
	outputs="text",
	examples=[
	[
	"Write a detailed analogy between mathematics and a lighthouse.",
	75,
	],
	[
	"Instruct: Write a detailed analogy between mathematics and a lighthouse.\nOutput:",
	75,
	],
	[
	"Alice: I don't know why, I'm struggling to maintain focus while studying. Any suggestions?\n\nBob: ",
	150,
	],
	[
	'''def print_prime(n):
	"""
	Print all primes between 1 and n
	"""\n''',
	100,
	],
	],
	title="Microsoft Phi-2",
	description="Unofficial demo of Microsoft Phi-2, a high performing model with only 2.7B parameters.",
	)


	if __name__ == "__main__":
	demo.launch(show_api=False)