File size: 955 Bytes
b4d301c
cb95a71
 
b4d301c
cb95a71
 
 
 
 
 
 
 
 
b4d301c
cb95a71
 
 
 
b4d301c
cb95a71
e3ac475
cb95a71
 
 
 
e3ac475
cb95a71
e3ac475
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# 加载 Qwen3-0.6B 模型和 tokenizer
model_name = "Qwen/Qwen3-0.6B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,  # 半精度减少显存占用
    device_map="auto",          # 自动选择 GPU
    trust_remote_code=True      # 信任远程代码(Qwen 需要)
)

def generate_text(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=100)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# 创建 Gradio 界面
demo = gr.Interface(
    fn=generate_text,
    inputs=gr.Textbox(lines=3, placeholder="输入你的问题..."),
    outputs=gr.Textbox(label="Qwen3-0.6B 的回答"),
    title="Qwen3-0.6B 演示 (Free GPU)",
)

demo.launch()