ddosxd commited on
Commit
a111203
Β·
verified Β·
1 Parent(s): 7f6b5fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -3
app.py CHANGED
@@ -1,15 +1,35 @@
1
  import gradio as gr
2
  import spaces
3
  import torch
 
4
 
5
- zero = torch.Tensor([0]).cuda()
6
- print(zero.device) # <-- 'cpu' πŸ€”
 
 
 
 
 
 
 
7
 
8
  @spaces.GPU
9
  def chat(prompt):
10
- print(zero.device) # <-- 'cuda:0' πŸ€—
 
 
 
 
 
 
 
 
 
 
11
  return f"Hello {zero + n} Tensor"
12
 
 
 
13
  gr.Interface(
14
  fn=chat,
15
  inputs=gr.Text(),
 
1
  import gradio as gr
2
  import spaces
3
  import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
+ tokenizer = AutoTokenizer.from_pretrained("Upstage/SOLAR-10.7B-Instruct-v1.0")
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ "rishiraj/meow",
9
+ device_map="auto",
10
+ torch_dtype=torch.float16,
11
+ )
12
+
13
+ #zero = torch.Tensor([0]).cuda()
14
+ #print(zero.device) # <-- 'cpu' πŸ€”
15
 
16
  @spaces.GPU
17
  def chat(prompt):
18
+ conversation = [ {'role': 'user', 'content': 'Hello?'} ]
19
+
20
+ prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
21
+
22
+ inputs = tokenizer(prompt, return_tensors="pt").to(zero.device)
23
+ outputs = model.generate(**inputs, use_cache=True, max_length=4096)
24
+ output_text = tokenizer.decode(outputs[0])
25
+ print(output_text)
26
+ return output_text
27
+
28
+ #print() # <-- 'cuda:0' πŸ€—
29
  return f"Hello {zero + n} Tensor"
30
 
31
+
32
+
33
  gr.Interface(
34
  fn=chat,
35
  inputs=gr.Text(),