Oleg Shulyakov commited on
Commit
e395b9b
·
1 Parent(s): 55ecc95

Move quantization to separate method

Browse files
Files changed (1) hide show
  1. app.py +43 -39
app.py CHANGED
@@ -189,7 +189,45 @@ def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDir
189
 
190
  return fp16_model
191
 
192
- def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, repo_name, gguf_name, oauth_token: gr.OAuthToken | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  # validate the oauth token
194
  if is_valid_token(oauth_token) is False:
195
  raise gr.Error("You must be logged in to use GGUF-my-repo")
@@ -201,42 +239,8 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
201
  try:
202
  with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
203
  fp16 = download_base_model(oauth_token.token, model_id, outdir)
204
-
205
  imatrix_path = Path(outdir)/"imatrix.dat"
206
-
207
- if use_imatrix:
208
- if train_data_file:
209
- train_data_path = train_data_file.name
210
- else:
211
- train_data_path = "train_data.txt" #fallback calibration dataset
212
-
213
- print(f"Training data file path: {train_data_path}")
214
-
215
- if not os.path.isfile(train_data_path):
216
- raise Exception(f"Training data file not found: {train_data_path}")
217
-
218
- generate_importance_matrix(fp16, train_data_path, imatrix_path)
219
- else:
220
- print("Not using imatrix quantization.")
221
-
222
- # Quantize the model
223
- quantized_gguf_path = str(Path(outdir)/gguf_name)
224
- if use_imatrix:
225
- quantise_ggml = [
226
- "llama-quantize",
227
- "--imatrix", imatrix_path, fp16, quantized_gguf_path, imatrix_q_method
228
- ]
229
- else:
230
- quantise_ggml = [
231
- "llama-quantize",
232
- fp16, quantized_gguf_path, q_method
233
- ]
234
- result = subprocess.run(quantise_ggml, shell=False, capture_output=True)
235
- if result.returncode != 0:
236
- stderr_str = result.stderr.decode("utf-8")
237
- raise Exception(f"Error quantizing: {stderr_str}")
238
- print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
239
- print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
240
 
241
  # Create empty repo
242
  api = HfApi(token=oauth_token.token)
@@ -312,12 +316,12 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
312
  card.save(readme_path)
313
 
314
  if split_model:
315
- split_upload_model(str(quantized_gguf_path), outdir, new_repo_id, oauth_token, split_max_tensors, split_max_size)
316
  else:
317
  try:
318
- print(f"Uploading quantized model: {quantized_gguf_path}")
319
  api.upload_file(
320
- path_or_fileobj=quantized_gguf_path,
321
  path_in_repo=gguf_name,
322
  repo_id=new_repo_id,
323
  )
 
189
 
190
  return fp16_model
191
 
192
+ def quantize_model(outdir: tempfile.TemporaryDirectory, gguf_name: str, fp16, q_method: str, use_imatrix: bool, imatrix_q_method: str, imatrix_path: str):
193
+ if use_imatrix:
194
+ if train_data_file:
195
+ train_data_path = train_data_file.name
196
+ else:
197
+ train_data_path = "train_data.txt" #fallback calibration dataset
198
+
199
+ print(f"Training data file path: {train_data_path}")
200
+
201
+ if not os.path.isfile(train_data_path):
202
+ raise Exception(f"Training data file not found: {train_data_path}")
203
+
204
+ generate_importance_matrix(fp16, train_data_path, imatrix_path)
205
+ else:
206
+ print("Not using imatrix quantization.")
207
+
208
+ # Quantize the model
209
+ quantized_gguf = str(Path(outdir)/gguf_name)
210
+ if use_imatrix:
211
+ quantize_cmd = [
212
+ "llama-quantize",
213
+ "--imatrix", imatrix_path, fp16, quantized_gguf, imatrix_q_method
214
+ ]
215
+ else:
216
+ quantize_cmd = [
217
+ "llama-quantize",
218
+ fp16, quantized_gguf, q_method
219
+ ]
220
+
221
+ result = subprocess.run(quantize_cmd, shell=False, capture_output=True)
222
+ if result.returncode != 0:
223
+ stderr_str = result.stderr.decode("utf-8")
224
+ raise Exception(f"Error quantizing: {stderr_str}")
225
+
226
+ print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
227
+ print(f"Quantized model path: {os.path.abspath(quantized_gguf)}")
228
+ return quantized_gguf
229
+
230
+ def process_model(model_id: str, q_method: str, use_imatrix: bool, imatrix_q_method: str, private_repo: bool, train_data_file, split_model: bool, split_max_tensors, split_max_size: str | None, repo_name: str, gguf_name: str, oauth_token: gr.OAuthToken | None):
231
  # validate the oauth token
232
  if is_valid_token(oauth_token) is False:
233
  raise gr.Error("You must be logged in to use GGUF-my-repo")
 
239
  try:
240
  with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
241
  fp16 = download_base_model(oauth_token.token, model_id, outdir)
 
242
  imatrix_path = Path(outdir)/"imatrix.dat"
243
+ quantized_gguf = quantize_model(outdir, gguf_name, fp16, q_method, use_imatrix, imatrix_q_method, imatrix_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
  # Create empty repo
246
  api = HfApi(token=oauth_token.token)
 
316
  card.save(readme_path)
317
 
318
  if split_model:
319
+ split_upload_model(str(quantized_gguf), outdir, new_repo_id, oauth_token, split_max_tensors, split_max_size)
320
  else:
321
  try:
322
+ print(f"Uploading quantized model: {quantized_gguf}")
323
  api.upload_file(
324
+ path_or_fileobj=quantized_gguf,
325
  path_in_repo=gguf_name,
326
  repo_id=new_repo_id,
327
  )