Staticaliza commited on
Commit
7438c3c
·
verified ·
1 Parent(s): eb2d7fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -22
app.py CHANGED
@@ -6,42 +6,43 @@ def gpu():
6
  import os
7
  import sys
8
  import subprocess
9
- import urllib.request
10
  import gradio as gr
11
 
12
- # download run_inference.py at startup if it’s missing
13
- SCRIPT_PATH = os.path.join(os.getcwd(), "run_inference.py")
14
- if not os.path.isfile(SCRIPT_PATH):
15
- urllib.request.urlretrieve(
16
- "https://raw.githubusercontent.com/microsoft/BitNet/main/run_inference.py",
17
- SCRIPT_PATH
18
- )
 
19
 
20
- MODEL_PATH = os.environ.get("MODEL_PATH", "models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf")
 
21
 
22
  def generate(prompt, max_tokens=128, temperature=0.7):
23
  cmd = [
24
  sys.executable,
25
- SCRIPT_PATH,
26
- "-m", MODEL_PATH,
27
- "-p", prompt,
28
- "-n", str(max_tokens),
29
- "-temp", str(temperature)
30
  ]
31
- proc = subprocess.run(cmd, capture_output=True, text=True)
32
  return proc.stdout.strip() if proc.returncode == 0 else proc.stderr.strip()
33
 
34
  iface = gr.Interface(
35
  fn=generate,
36
  inputs=[
37
- gr.Textbox(lines=2, placeholder="enter your prompt here", label="prompt"),
38
- gr.Slider(1, 512, value=128, step=1, label="max tokens"),
39
- gr.Slider(0.0, 1.0, value=0.7, step=0.01, label="temperature")
40
  ],
41
- outputs=gr.Textbox(label="completion"),
42
- title="bitnet.cpp completion demo",
43
- description="downloads inference script via python so no bash needed"
44
  )
45
 
46
- if __name__ == "__main__":
47
  iface.launch()
 
6
  import os
7
  import sys
8
  import subprocess
9
+ import multiprocessing
10
  import gradio as gr
11
 
12
+ bitnet_dir = os.path.join(os.getcwd(), 'bitnet')
13
+ if not os.path.isdir(bitnet_dir):
14
+ subprocess.run(['git','clone','--depth','1','https://github.com/microsoft/BitNet.git','bitnet'], check=True)
15
+ build_dir = os.path.join(bitnet_dir, 'build')
16
+ if not os.path.isdir(build_dir):
17
+ os.makedirs(build_dir, exist_ok=True)
18
+ subprocess.run(['cmake','..'], cwd=build_dir, check=True)
19
+ subprocess.run(['cmake','--build','.','--config','Release','--parallel', str(multiprocessing.cpu_count())], cwd=build_dir, check=True)
20
 
21
+ script_path = os.path.join(bitnet_dir, 'run_inference.py')
22
+ model_path = os.environ.get('MODEL_PATH', 'models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf')
23
 
24
  def generate(prompt, max_tokens=128, temperature=0.7):
25
  cmd = [
26
  sys.executable,
27
+ script_path,
28
+ '-m', model_path,
29
+ '-p', prompt,
30
+ '-n', str(max_tokens),
31
+ '-temp', str(temperature)
32
  ]
33
+ proc = subprocess.run(cmd, cwd=bitnet_dir, capture_output=True, text=True)
34
  return proc.stdout.strip() if proc.returncode == 0 else proc.stderr.strip()
35
 
36
  iface = gr.Interface(
37
  fn=generate,
38
  inputs=[
39
+ gr.Textbox(lines=2, placeholder='enter your prompt here', label='prompt'),
40
+ gr.Slider(1, 512, value=128, step=1, label='max tokens'),
41
+ gr.Slider(0.0, 1.0, value=0.7, step=0.01, label='temperature')
42
  ],
43
+ outputs=gr.Textbox(label='completion'),
44
+ title='bitnet.cpp completion demo'
 
45
  )
46
 
47
+ if __name__ == '__main__':
48
  iface.launch()