test-zerogpu-2

Sleeping

test-zerogpu-2 / app.py

Update app.py

2ee9d24 verified 5 months ago

1.62 kB

	import spaces
	import gradio as gr
	from transformers import AutoTokenizer, pipeline
	import torch

	#model_name = "tiiuae/falcon-7b-instruct"
	model_name = "meta-llama/Llama-2-7b-hf"

	# トークナイザとテキスト生成パイプラインの準備
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	generator = pipeline(
	"text-generation",
	model=model_name,
	tokenizer=tokenizer,
	torch_dtype=torch.bfloat16, # モデルをbfloat16精度でロード（メモリ節約）
	trust_remote_code=True, # モデルのリポジトリ内の追加コードを信頼して読み込む
	device_map="auto" # 利用可能なGPUに自動割り当て（ZeroGPU環境ではA100を使用）
	)

	# GPUを利用する推論関数を定義（ZeroGPUのためデコレータを使用）
	@spaces.GPU(duration=120)
	def generate_text(prompt):
	# プロンプトからテキストを生成し、結果文字列を返す
	result = generator(prompt, max_new_tokens=100, do_sample=True)
	generated = result[0]["generated_text"]
	return generated

	# Gradioインタフェースの構築（テキスト入力→テキスト出力）
	demo = gr.Interface(
	fn=generate_text,
	inputs=gr.Textbox(lines=3, label="入力プロンプト"),
	outputs=gr.Textbox(label="生成されたテキスト"),
	title="Falcon-7B-Instruct テキスト生成デモ",
	description="プロンプトを入力すると、大規模言語モデルが続きのテキストを生成します。"
	)

	# アプリの起動（Spaces上ではこれによりサービスが公開される）
	demo.launch()