ruslanmv commited on
Commit
9b12079
·
verified ·
1 Parent(s): 053de85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -5,22 +5,20 @@ is_gpu=False
5
 
6
  #download for mecab
7
  os.system('python -m unidic download')
8
-
9
- # we need to compile a CUBLAS version
10
- # Or get it from https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/
11
- os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.11')
12
-
13
  # By using XTTS you agree to CPML license https://coqui.ai/cpml
14
  os.environ["COQUI_TOS_AGREED"] = "1"
15
-
16
  # NOTE: for streaming will require gradio audio streaming fix
17
  # pip install --upgrade -y gradio==0.50.2 git+https://github.com/gorkemgoknar/gradio.git@patch-1
18
-
19
  #Now you’re ready to install 🤗 Transformers with the following command:
20
  if not is_gpu:
21
  #For CPU-support only, Transformers and PyTorch with:
22
  os.system('pip install transformers[torch]')
23
  #pip install 'transformers[tf-cpu]' #Transformers and TensorFlow 2.0:
 
 
 
 
 
24
  import textwrap
25
  from scipy.io.wavfile import write
26
  from pydub import AudioSegment
@@ -155,14 +153,21 @@ from llama_cpp import Llama
155
  # set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
156
  # else 35 full layers + XTTS works fine on T4 16GB
157
  # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
158
- GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 35))
159
-
 
 
160
  LLM_STOP_WORDS= ["</s>","<|user|>","/s>"]
161
-
162
  LLAMA_VERBOSE=False
163
 
164
  print("Running LLM Zephyr")
165
- llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS-10,max_new_tokens=512, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
 
 
 
 
 
 
166
 
167
  def split_sentences(text, max_len):
168
  # Apply custom rules to enforce sentence breaks with double punctuation
 
5
 
6
  #download for mecab
7
  os.system('python -m unidic download')
 
 
 
 
 
8
  # By using XTTS you agree to CPML license https://coqui.ai/cpml
9
  os.environ["COQUI_TOS_AGREED"] = "1"
 
10
  # NOTE: for streaming will require gradio audio streaming fix
11
  # pip install --upgrade -y gradio==0.50.2 git+https://github.com/gorkemgoknar/gradio.git@patch-1
 
12
  #Now you’re ready to install 🤗 Transformers with the following command:
13
  if not is_gpu:
14
  #For CPU-support only, Transformers and PyTorch with:
15
  os.system('pip install transformers[torch]')
16
  #pip install 'transformers[tf-cpu]' #Transformers and TensorFlow 2.0:
17
+ os.system('pip install llama-cpp-python==0.2.11')
18
+ else:
19
+ # we need to compile a CUBLAS version
20
+ # Or get it from https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/
21
+ os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.11')
22
  import textwrap
23
  from scipy.io.wavfile import write
24
  from pydub import AudioSegment
 
153
  # set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
154
  # else 35 full layers + XTTS works fine on T4 16GB
155
  # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
156
+ if is_gpu:
157
+ GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 35))-10
158
+ else:
159
+ GPU_LAYERS=-1
160
  LLM_STOP_WORDS= ["</s>","<|user|>","/s>"]
 
161
  LLAMA_VERBOSE=False
162
 
163
  print("Running LLM Zephyr")
164
+ llm_zephyr = Llama(model_path=zephyr_model_path,
165
+ n_gpu_layers=GPU_LAYERS,
166
+ max_new_tokens=512,
167
+ context_window=4096,
168
+ n_ctx=4096,
169
+ n_batch=128,
170
+ verbose=LLAMA_VERBOSE)
171
 
172
  def split_sentences(text, max_len):
173
  # Apply custom rules to enforce sentence breaks with double punctuation