dasomaru commited on
Commit
0a05b9e
Β·
verified Β·
1 Parent(s): 473f119
Files changed (1) hide show
  1. app.py +9 -31
app.py CHANGED
@@ -14,40 +14,18 @@ model = AutoModelForCausalLM.from_pretrained(
14
  trust_remote_code=True,
15
  )
16
 
17
- @spaces.GPU # 이 ν•¨μˆ˜ 싀행될 λ•Œ GPU 할당됨!
18
- def chat(user_input):
19
- model.to("cuda") # ν•¨μˆ˜ μ•ˆμ—μ„œ GPU둜 이동!
20
-
21
- messages = [{
22
- "role": "user",
23
- "content": [{"type": "text", "text": user_input}]
24
- }]
25
- prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
26
 
27
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
 
 
28
 
29
- with torch.no_grad():
30
- outputs = model.generate(
31
- **inputs,
32
- max_new_tokens=256,
33
- temperature=1.0,
34
- top_p=0.95,
35
- top_k=64,
36
- do_sample=True,
37
- )
38
-
39
- output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
- return output_text.split(user_input)[-1].strip()
41
-
42
- # Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
43
- demo = gr.Interface(
44
- fn=chat,
45
- inputs=gr.Textbox(lines=2, placeholder="Type your message..."),
46
- outputs=gr.Textbox(lines=10),
47
- title="🧠 Gemma-3 4bit (ZeroGPU)",
48
- description="This Space uses the ZeroGPU feature. First request might take a few seconds!"
49
- )
50
-
51
  demo.launch()
52
 
53
 
 
14
  trust_remote_code=True,
15
  )
16
 
17
+ @spaces.GPU(duration=300)
18
+ def generate_response(prompt):
19
+ # λͺ¨λΈ 및 ν† ν¬λ‚˜μ΄μ € λ‘œλ”©μ€ ν•¨μˆ˜ λ‚΄λΆ€μ—μ„œ μˆ˜ν–‰
20
+ tokenizer = AutoTokenizer.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
21
+ model = AutoModelForCausalLM.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
22
+ model.to("cuda")
 
 
 
23
 
24
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
25
+ outputs = model.generate(**inputs, max_new_tokens=64)
26
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
27
 
28
+ demo = gr.Interface(fn=generate_response, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  demo.launch()
30
 
31