Spaces:

Staticaliza
/

Zero-5

Paused

Staticaliza commited on Apr 30

Commit

814fd8f

verified ·

1 Parent(s): 66fddc5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,42 +4,46 @@ def gpu():
     print("[GPU] | GPU maintained.")
 import os
-import subprocess
 import sys
 import gradio as gr
 MODEL_PATH = os.environ.get("MODEL_PATH", "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf")
 def generate(prompt, max_tokens=128, temperature=0.7):
-    cwd = os.path.dirname(__file__)
     cmd = [
         sys.executable,
-        os.path.join(cwd, "run_inference.py"),
         "-m", MODEL_PATH,
         "-p", prompt,
         "-n", str(max_tokens),
         "-temp", str(temperature)
     ]
-    proc = subprocess.run(
-        cmd,
-        capture_output=True,
-        text=True,
-        cwd=cwd
-    )
-    if proc.returncode != 0:
-        return f"error: {proc.stderr.strip()}"
     return proc.stdout.strip()
 iface = gr.Interface(
     fn=generate,
     inputs=[
-        gr.Textbox(lines=2, placeholder="Enter your prompt here", label="Prompt"),
-        gr.Slider(1, 512, value=128, step=1, label="Max Tokens"),
-        gr.Slider(0.0, 1.0, value=0.7, step=0.01, label="Temperature")
     ],
-    outputs=gr.Textbox(label="Completion"),
-    title="BitNet.cpp Completion Demo",
-    description="demo of bitnet.cpp inference for 1-bit llms"
 )
 if __name__ == "__main__":

     print("[GPU] | GPU maintained.")
 import os
 import sys
+import subprocess
 import gradio as gr
+def find_run_script():
+    for root, dirs, files in os.walk(os.getcwd()):
+        if "run_inference.py" in files:
+            return os.path.join(root, "run_inference.py")
+    return None
+SCRIPT_PATH = find_run_script()
+if not SCRIPT_PATH:
+    raise FileNotFoundError("run_inference.py not found in repo")
 MODEL_PATH = os.environ.get("MODEL_PATH", "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf")
 def generate(prompt, max_tokens=128, temperature=0.7):
     cmd = [
         sys.executable,
+        SCRIPT_PATH,
         "-m", MODEL_PATH,
         "-p", prompt,
         "-n", str(max_tokens),
         "-temp", str(temperature)
     ]
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    if proc.returncode:
+        return proc.stderr.strip()
     return proc.stdout.strip()
 iface = gr.Interface(
     fn=generate,
     inputs=[
+        gr.Textbox(lines=2, placeholder="enter your prompt here", label="prompt"),
+        gr.Slider(1, 512, value=128, step=1, label="max tokens"),
+        gr.Slider(0.0, 1.0, value=0.7, step=0.01, label="temperature")
     ],
+    outputs=gr.Textbox(label="completion"),
+    title="bitnet.cpp completion demo",
+    description="auto-detects run_inference.py so it won’t 404"
 )
 if __name__ == "__main__":