Spaces:
Paused
Paused
import spaces | |
def gpu(): | |
print("[GPU] | GPU maintained.") | |
import os | |
import subprocess | |
import gradio as gr | |
MODEL_PATH = os.environ.get("MODEL_PATH", "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf") | |
def generate(prompt, max_tokens=128, temperature=0.7): | |
cmd = [ | |
"python", "run_inference.py", | |
"-m", MODEL_PATH, | |
"-p", prompt, | |
"-n", str(max_tokens), | |
"-temp", str(temperature) | |
] | |
result = subprocess.run(cmd, capture_output=True, text=True) | |
return result.stdout.strip() | |
iface = gr.Interface( | |
fn=generate, | |
inputs=[ | |
gr.Textbox(lines=2, placeholder="Enter your prompt here", label="Prompt"), | |
gr.Slider(1, 512, value=128, step=1, label="Max Tokens"), | |
gr.Slider(0.0, 1.0, value=0.7, step=0.01, label="Temperature") | |
], | |
outputs=gr.Textbox(label="Completion"), | |
title="BitNet.cpp Completion Demo", | |
description="demo of bitnet.cpp inference for 1-bit llms" | |
) | |
if __name__ == "__main__": | |
iface.launch() |