Staticaliza commited on
Commit
814fd8f
·
verified ·
1 Parent(s): 66fddc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -17
app.py CHANGED
@@ -4,42 +4,46 @@ def gpu():
4
  print("[GPU] | GPU maintained.")
5
 
6
  import os
7
- import subprocess
8
  import sys
 
9
  import gradio as gr
10
 
 
 
 
 
 
 
 
 
 
 
11
  MODEL_PATH = os.environ.get("MODEL_PATH", "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf")
12
 
13
  def generate(prompt, max_tokens=128, temperature=0.7):
14
- cwd = os.path.dirname(__file__)
15
  cmd = [
16
  sys.executable,
17
- os.path.join(cwd, "run_inference.py"),
18
  "-m", MODEL_PATH,
19
  "-p", prompt,
20
  "-n", str(max_tokens),
21
  "-temp", str(temperature)
22
  ]
23
- proc = subprocess.run(
24
- cmd,
25
- capture_output=True,
26
- text=True,
27
- cwd=cwd
28
- )
29
- if proc.returncode != 0:
30
- return f"error: {proc.stderr.strip()}"
31
  return proc.stdout.strip()
32
 
33
  iface = gr.Interface(
34
  fn=generate,
35
  inputs=[
36
- gr.Textbox(lines=2, placeholder="Enter your prompt here", label="Prompt"),
37
- gr.Slider(1, 512, value=128, step=1, label="Max Tokens"),
38
- gr.Slider(0.0, 1.0, value=0.7, step=0.01, label="Temperature")
39
  ],
40
- outputs=gr.Textbox(label="Completion"),
41
- title="BitNet.cpp Completion Demo",
42
- description="demo of bitnet.cpp inference for 1-bit llms"
43
  )
44
 
45
  if __name__ == "__main__":
 
4
  print("[GPU] | GPU maintained.")
5
 
6
  import os
 
7
  import sys
8
+ import subprocess
9
  import gradio as gr
10
 
11
+ def find_run_script():
12
+ for root, dirs, files in os.walk(os.getcwd()):
13
+ if "run_inference.py" in files:
14
+ return os.path.join(root, "run_inference.py")
15
+ return None
16
+
17
+ SCRIPT_PATH = find_run_script()
18
+ if not SCRIPT_PATH:
19
+ raise FileNotFoundError("run_inference.py not found in repo")
20
+
21
  MODEL_PATH = os.environ.get("MODEL_PATH", "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf")
22
 
23
  def generate(prompt, max_tokens=128, temperature=0.7):
 
24
  cmd = [
25
  sys.executable,
26
+ SCRIPT_PATH,
27
  "-m", MODEL_PATH,
28
  "-p", prompt,
29
  "-n", str(max_tokens),
30
  "-temp", str(temperature)
31
  ]
32
+ proc = subprocess.run(cmd, capture_output=True, text=True)
33
+ if proc.returncode:
34
+ return proc.stderr.strip()
 
 
 
 
 
35
  return proc.stdout.strip()
36
 
37
  iface = gr.Interface(
38
  fn=generate,
39
  inputs=[
40
+ gr.Textbox(lines=2, placeholder="enter your prompt here", label="prompt"),
41
+ gr.Slider(1, 512, value=128, step=1, label="max tokens"),
42
+ gr.Slider(0.0, 1.0, value=0.7, step=0.01, label="temperature")
43
  ],
44
+ outputs=gr.Textbox(label="completion"),
45
+ title="bitnet.cpp completion demo",
46
+ description="auto-detects run_inference.py so it won’t 404"
47
  )
48
 
49
  if __name__ == "__main__":