Spaces:

Staticaliza
/

Zero-5

Paused

Zero-5 / app.py

Update app.py

eb2d7fc verified 5 months ago

1.42 kB

	import spaces
	@spaces.GPU(duration=15)
	def gpu():
	print("[GPU] \| GPU maintained.")

	import os
	import sys
	import subprocess
	import urllib.request
	import gradio as gr

	# download run_inference.py at startup if it’s missing
	SCRIPT_PATH = os.path.join(os.getcwd(), "run_inference.py")
	if not os.path.isfile(SCRIPT_PATH):
	urllib.request.urlretrieve(
	"https://raw.githubusercontent.com/microsoft/BitNet/main/run_inference.py",
	SCRIPT_PATH
	)

	MODEL_PATH = os.environ.get("MODEL_PATH", "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf")

	def generate(prompt, max_tokens=128, temperature=0.7):
	cmd = [
	sys.executable,
	SCRIPT_PATH,
	"-m", MODEL_PATH,
	"-p", prompt,
	"-n", str(max_tokens),
	"-temp", str(temperature)
	]
	proc = subprocess.run(cmd, capture_output=True, text=True)
	return proc.stdout.strip() if proc.returncode == 0 else proc.stderr.strip()

	iface = gr.Interface(
	fn=generate,
	inputs=[
	gr.Textbox(lines=2, placeholder="enter your prompt here", label="prompt"),
	gr.Slider(1, 512, value=128, step=1, label="max tokens"),
	gr.Slider(0.0, 1.0, value=0.7, step=0.01, label="temperature")
	],
	outputs=gr.Textbox(label="completion"),
	title="bitnet.cpp completion demo",
	description="downloads inference script via python so no bash needed"
	)

	if __name__ == "__main__":
	iface.launch()