Spaces:
Sleeping
Sleeping
File size: 2,792 Bytes
fb38431 c4f947a 5315eed c778ae5 4d07925 c778ae5 b461977 839fca3 b461977 c778ae5 c4f947a 5315eed b461977 c4f947a b461977 839fca3 ff04433 839fca3 ff04433 839fca3 ff04433 839fca3 4d07925 839fca3 4d07925 839fca3 c4f947a 839fca3 b461977 5315eed b461977 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TextIteratorStreamer,
StoppingCriteriaList,
)
from threading import Thread
import gradio as gr
if torch.cuda.is_available():
torch.set_default_device("cuda")
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"microsoft/phi-2",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
trust_remote_code=True,
)
def Phi2StoppingCriteria(
input_ids: torch.LongTensor, score: torch.FloatTensor, **kwargs
) -> bool:
stop_list = ["Exercise", "Exercises", "<|endoftext|>"]
stop_tokens = []
for stop in stop_list:
stop_tokens.append(
tokenizer(stop, add_special_tokens=False, return_tensors="pt").input_ids
)
return input_ids[-1] in stop_tokens
stopping_criteria = StoppingCriteriaList([Phi2StoppingCriteria])
def generate(prompt, max_new_tokens):
inputs = tokenizer(prompt, return_tensors="pt")
# thanks https://huggingface.co/spaces/joaogante/transformers_streaming/blob/main/app.py
streamer = TextIteratorStreamer(inputs)
generation_kwargs = dict(
inputs,
streamer=streamer,
max_new_tokens=max_new_tokens,
do_sample=True,
stopping_criteria=stopping_criteria,
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
model_output = ""
for new_text in streamer:
model_output += new_text
yield model_output
return model_output
demo = gr.Interface(
fn=generate,
inputs=[
gr.Text(
label="prompt",
value="Write a detailed analogy between mathematics and a lighthouse.",
),
gr.Number(value=100, label="max new tokens", maximum=500),
],
outputs="text",
examples=[
[
"Write a detailed analogy between mathematics and a lighthouse.",
75,
],
[
"Instruct: Write a detailed analogy between mathematics and a lighthouse.\nOutput:",
75,
],
[
"Alice: I don't know why, I'm struggling to maintain focus while studying. Any suggestions?\n\nBob: ",
150,
],
[
'''def print_prime(n):
"""
Print all primes between 1 and n
"""\n''',
100,
],
["User: How does sleep affect mood?\nAI:", 125],
["Who was Ada Lovelace?", 100],
["Explain the concept of skip lists.", 125],
],
title="Microsoft Phi-2",
description="Unofficial demo of Microsoft Phi-2, a high performing model with only 2.7B parameters.",
)
if __name__ == "__main__":
demo.launch(show_api=False)
|