Zero-5 / app.py
Staticaliza's picture
Update app.py
5ad9406 verified
raw
history blame
1.03 kB
import spaces
@spaces.GPU(duration=15)
def gpu():
print("[GPU] | GPU maintained.")
import os
import subprocess
import gradio as gr
MODEL_PATH = os.environ.get("MODEL_PATH", "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf")
def generate(prompt, max_tokens=128, temperature=0.7):
cmd = [
"python", "run_inference.py",
"-m", MODEL_PATH,
"-p", prompt,
"-n", str(max_tokens),
"-temp", str(temperature)
]
result = subprocess.run(cmd, capture_output=True, text=True)
return result.stdout.strip()
iface = gr.Interface(
fn=generate,
inputs=[
gr.Textbox(lines=2, placeholder="Enter your prompt here", label="Prompt"),
gr.Slider(1, 512, value=128, step=1, label="Max Tokens"),
gr.Slider(0.0, 1.0, value=0.7, step=0.01, label="Temperature")
],
outputs=gr.Textbox(label="Completion"),
title="BitNet.cpp Completion Demo",
description="demo of bitnet.cpp inference for 1-bit llms"
)
if __name__ == "__main__":
iface.launch()