Oleg Shulyakov commited on
Commit
55ecc95
·
1 Parent(s): c96815e

Replace model name with UI values

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -220,9 +220,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
220
  print("Not using imatrix quantization.")
221
 
222
  # Quantize the model
223
- model_name = get_model_name(model_id)
224
- quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
225
- quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
226
  if use_imatrix:
227
  quantise_ggml = [
228
  "llama-quantize",
@@ -241,9 +239,6 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
241
  print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
242
 
243
  # Create empty repo
244
- username = whoami(oauth_token.token)["name"]
245
-
246
- repo_name = f"{username}/{model_name}-GGUF"
247
  api = HfApi(token=oauth_token.token)
248
  new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
249
  new_repo_id = new_repo_url.repo_id
@@ -283,12 +278,12 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
283
 
284
  ### CLI:
285
  ```bash
286
- llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
287
  ```
288
 
289
  ### Server:
290
  ```bash
291
- llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
292
  ```
293
 
294
  Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
@@ -305,11 +300,11 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
305
 
306
  Step 3: Run inference through the main binary.
307
  ```
308
- ./llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
309
  ```
310
  or
311
  ```
312
- ./llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
313
  ```
314
  """
315
  )
@@ -323,7 +318,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
323
  print(f"Uploading quantized model: {quantized_gguf_path}")
324
  api.upload_file(
325
  path_or_fileobj=quantized_gguf_path,
326
- path_in_repo=quantized_gguf_name,
327
  repo_id=new_repo_id,
328
  )
329
  except Exception as e:
@@ -455,18 +450,23 @@ gguf_name = gr.Textbox(
455
  def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
456
  if oauth_token is None or oauth_token.token is None:
457
  return ""
458
- username = whoami(oauth_token.token)["name"]
459
  if model_id is None:
460
  return ""
 
 
461
  model_name = model_id.split('/')[-1]
462
  return f"{username}/{model_name}-GGUF"
463
 
464
  def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
465
  if model_id is None:
466
  return ""
 
467
  model_name = model_id.split('/')[-1]
 
468
  if use_imatrix:
469
  return f"{model_name.lower()}-{imatrix_q_method.upper()}-imat.gguf"
 
470
  return f"{model_name.lower()}-{q_method.upper()}.gguf"
471
 
472
  #####
 
220
  print("Not using imatrix quantization.")
221
 
222
  # Quantize the model
223
+ quantized_gguf_path = str(Path(outdir)/gguf_name)
 
 
224
  if use_imatrix:
225
  quantise_ggml = [
226
  "llama-quantize",
 
239
  print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
240
 
241
  # Create empty repo
 
 
 
242
  api = HfApi(token=oauth_token.token)
243
  new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
244
  new_repo_id = new_repo_url.repo_id
 
278
 
279
  ### CLI:
280
  ```bash
281
+ llama-cli --hf-repo {new_repo_id} --hf-file {gguf_name} -p "The meaning to life and the universe is"
282
  ```
283
 
284
  ### Server:
285
  ```bash
286
+ llama-server --hf-repo {new_repo_id} --hf-file {gguf_name} -c 2048
287
  ```
288
 
289
  Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
 
300
 
301
  Step 3: Run inference through the main binary.
302
  ```
303
+ ./llama-cli --hf-repo {new_repo_id} --hf-file {gguf_name} -p "The meaning to life and the universe is"
304
  ```
305
  or
306
  ```
307
+ ./llama-server --hf-repo {new_repo_id} --hf-file {gguf_name} -c 2048
308
  ```
309
  """
310
  )
 
318
  print(f"Uploading quantized model: {quantized_gguf_path}")
319
  api.upload_file(
320
  path_or_fileobj=quantized_gguf_path,
321
+ path_in_repo=gguf_name,
322
  repo_id=new_repo_id,
323
  )
324
  except Exception as e:
 
450
  def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
451
  if oauth_token is None or oauth_token.token is None:
452
  return ""
453
+
454
  if model_id is None:
455
  return ""
456
+
457
+ username = whoami(oauth_token.token)["name"]
458
  model_name = model_id.split('/')[-1]
459
  return f"{username}/{model_name}-GGUF"
460
 
461
  def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
462
  if model_id is None:
463
  return ""
464
+
465
  model_name = model_id.split('/')[-1]
466
+
467
  if use_imatrix:
468
  return f"{model_name.lower()}-{imatrix_q_method.upper()}-imat.gguf"
469
+
470
  return f"{model_name.lower()}-{q_method.upper()}.gguf"
471
 
472
  #####