Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# 加载 Qwen3-0.6B 模型和 tokenizer | |
model_name = "Qwen/Qwen3-0.6B" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch.float16, # 半精度减少显存占用 | |
device_map="auto", # 自动选择 GPU | |
trust_remote_code=True # 信任远程代码(Qwen 需要) | |
) | |
def generate_text(prompt): | |
inputs = tokenizer(prompt, return_tensors="pt").to("cuda") | |
outputs = model.generate(**inputs, max_new_tokens=100) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# 创建 Gradio 界面 | |
demo = gr.Interface( | |
fn=generate_text, | |
inputs=gr.Textbox(lines=3, placeholder="输入你的问题..."), | |
outputs=gr.Textbox(label="Qwen3-0.6B 的回答"), | |
title="Qwen3-0.6B 演示 (Free GPU)", | |
) | |
demo.launch() |