Spaces:
Sleeping
Sleeping
import gradio as gr | |
import spaces | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
tokenizer = AutoTokenizer.from_pretrained("Upstage/SOLAR-10.7B-Instruct-v1.0") | |
model = AutoModelForCausalLM.from_pretrained( | |
"rishiraj/meow", | |
device_map="auto", | |
torch_dtype=torch.float16, | |
) | |
zero = torch.Tensor([0]).cuda() | |
#print(zero.device) # <-- 'cpu' π€ | |
def chat(prompt): | |
conversation = [ {'role': 'user', 'content': prompt} ] | |
prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True) | |
inputs = tokenizer(prompt, return_tensors="pt").to(zero.device) | |
outputs = model.generate(**inputs, use_cache=True, max_length=4096) | |
output_text = tokenizer.decode(outputs[0]) | |
print(output_text) | |
return output_text | |
#print() # <-- 'cuda:0' π€ | |
return f"Hello {zero + n} Tensor" | |
gr.Interface( | |
fn=chat, | |
inputs=gr.Text(), | |
outputs=gr.Text() | |
).launch() |