phi-2 / app.py
Benjamin Gonzalez
fix token length
4d07925
raw
history blame
1.77 kB
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
if torch.cuda.is_available():
torch.set_default_device("cuda")
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"microsoft/phi-2",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
trust_remote_code=True,
)
def generate(prompt, length):
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
input_token_len = len(inputs.tokens())
outputs = model.generate(**inputs, max_length=length if length >= input_token_len else input_token_len
return tokenizer.batch_decode(outputs)[0]
demo = gr.Interface(
fn=generate,
inputs=[
gr.Text(
label="prompt",
value="Write a detailed analogy between mathematics and a lighthouse.",
),
gr.Number(value=100, label="max length", maximum=500),
],
outputs="text",
examples=[
[
"Write a detailed analogy between mathematics and a lighthouse.",
75,
],
[
"Instruct: Write a detailed analogy between mathematics and a lighthouse.\nOutput:",
75,
],
[
"Alice: I don't know why, I'm struggling to maintain focus while studying. Any suggestions?\n\nBob: ",
150,
],
[
'''def print_prime(n):
"""
Print all primes between 1 and n
"""\n''',
100,
],
],
title="Microsoft Phi-2",
description="Unofficial demo of Microsoft Phi-2, a high performing model with only 2.7B parameters.",
)
if __name__ == "__main__":
demo.launch(show_api=False)