Spaces:
Paused
Paused
import spaces | |
def gpu(): | |
print("[GPU] | GPU maintained.") | |
import os | |
import sys | |
import subprocess | |
import urllib.request | |
import gradio as gr | |
# download run_inference.py at startup if it’s missing | |
SCRIPT_PATH = os.path.join(os.getcwd(), "run_inference.py") | |
if not os.path.isfile(SCRIPT_PATH): | |
urllib.request.urlretrieve( | |
"https://raw.githubusercontent.com/microsoft/BitNet/main/run_inference.py", | |
SCRIPT_PATH | |
) | |
MODEL_PATH = os.environ.get("MODEL_PATH", "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf") | |
def generate(prompt, max_tokens=128, temperature=0.7): | |
cmd = [ | |
sys.executable, | |
SCRIPT_PATH, | |
"-m", MODEL_PATH, | |
"-p", prompt, | |
"-n", str(max_tokens), | |
"-temp", str(temperature) | |
] | |
proc = subprocess.run(cmd, capture_output=True, text=True) | |
return proc.stdout.strip() if proc.returncode == 0 else proc.stderr.strip() | |
iface = gr.Interface( | |
fn=generate, | |
inputs=[ | |
gr.Textbox(lines=2, placeholder="enter your prompt here", label="prompt"), | |
gr.Slider(1, 512, value=128, step=1, label="max tokens"), | |
gr.Slider(0.0, 1.0, value=0.7, step=0.01, label="temperature") | |
], | |
outputs=gr.Textbox(label="completion"), | |
title="bitnet.cpp completion demo", | |
description="downloads inference script via python so no bash needed" | |
) | |
if __name__ == "__main__": | |
iface.launch() |