dasomaru commited on
Commit
e7303ec
ยท
verified ยท
1 Parent(s): e180d51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -8
app.py CHANGED
@@ -1,14 +1,64 @@
1
  import gradio as gr
2
  import spaces
3
  import torch
 
4
 
5
- zero = torch.Tensor([0]).cuda()
6
- print(zero.device) # <-- 'cpu' ๐Ÿค”
7
 
8
- @spaces.GPU
9
- def greet(n):
10
- print(zero.device) # <-- 'cuda:0' ๐Ÿค—
11
- return f"Hello {zero + n} Tensor"
 
 
 
 
12
 
13
- demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
14
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import spaces
3
  import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
+ model_name = "dasomaru/gemma-3-4bit-it-demo"
 
7
 
8
+ # ๐Ÿš€ tokenizer๋Š” CPU์—์„œ๋„ ๋ฏธ๋ฆฌ ๋ถˆ๋Ÿฌ์˜ฌ ์ˆ˜ ์žˆ์Œ
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
10
+ # ๐Ÿš€ model์€ CPU๋กœ๋งŒ ๋จผ์ € ์˜ฌ๋ฆผ (GPU ์•„์ง ์—†์Œ)
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_name,
13
+ torch_dtype=torch.float16, # 4bit model์ด๋‹ˆ๊นŒ
14
+ trust_remote_code=True,
15
+ )
16
 
17
+ @spaces.GPU # ์ด ํ•จ์ˆ˜ ์‹คํ–‰๋  ๋•Œ GPU ํ• ๋‹น๋จ!
18
+ def chat(user_input):
19
+ model.to("cuda") # ํ•จ์ˆ˜ ์•ˆ์—์„œ GPU๋กœ ์ด๋™!
20
+
21
+ messages = [{
22
+ "role": "user",
23
+ "content": [{"type": "text", "text": user_input}]
24
+ }]
25
+ prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
26
+
27
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
28
+
29
+ with torch.no_grad():
30
+ outputs = model.generate(
31
+ **inputs,
32
+ max_new_tokens=256,
33
+ temperature=1.0,
34
+ top_p=0.95,
35
+ top_k=64,
36
+ do_sample=True,
37
+ )
38
+
39
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+ return output_text.split(user_input)[-1].strip()
41
+
42
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
43
+ demo = gr.Interface(
44
+ fn=chat,
45
+ inputs=gr.Textbox(lines=2, placeholder="Type your message..."),
46
+ outputs=gr.Textbox(lines=10),
47
+ title="๐Ÿง  Gemma-3 4bit (ZeroGPU)",
48
+ description="This Space uses the ZeroGPU feature. First request might take a few seconds!"
49
+ )
50
+
51
+ demo.launch()
52
+
53
+
54
+
55
+ # zero = torch.Tensor([0]).cuda()
56
+ # print(zero.device) # <-- 'cpu' ๐Ÿค”
57
+
58
+ # @spaces.GPU
59
+ # def greet(n):
60
+ # print(zero.device) # <-- 'cuda:0' ๐Ÿค—
61
+ # return f"Hello {zero + n} Tensor"
62
+
63
+ # demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
64
+ # demo.launch()