Zero-5 / app.py
Staticaliza's picture
Update app.py
eb2d7fc verified
raw
history blame
1.42 kB
import spaces
@spaces.GPU(duration=15)
def gpu():
print("[GPU] | GPU maintained.")
import os
import sys
import subprocess
import urllib.request
import gradio as gr
# download run_inference.py at startup if it’s missing
SCRIPT_PATH = os.path.join(os.getcwd(), "run_inference.py")
if not os.path.isfile(SCRIPT_PATH):
urllib.request.urlretrieve(
"https://raw.githubusercontent.com/microsoft/BitNet/main/run_inference.py",
SCRIPT_PATH
)
MODEL_PATH = os.environ.get("MODEL_PATH", "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf")
def generate(prompt, max_tokens=128, temperature=0.7):
cmd = [
sys.executable,
SCRIPT_PATH,
"-m", MODEL_PATH,
"-p", prompt,
"-n", str(max_tokens),
"-temp", str(temperature)
]
proc = subprocess.run(cmd, capture_output=True, text=True)
return proc.stdout.strip() if proc.returncode == 0 else proc.stderr.strip()
iface = gr.Interface(
fn=generate,
inputs=[
gr.Textbox(lines=2, placeholder="enter your prompt here", label="prompt"),
gr.Slider(1, 512, value=128, step=1, label="max tokens"),
gr.Slider(0.0, 1.0, value=0.7, step=0.01, label="temperature")
],
outputs=gr.Textbox(label="completion"),
title="bitnet.cpp completion demo",
description="downloads inference script via python so no bash needed"
)
if __name__ == "__main__":
iface.launch()