gguf-my-repo

Running

App Files Files Community

Oleg Shulyakov commited on 24 days ago

Commit

55ecc95

1 Parent(s): c96815e

Replace model name with UI values

Browse files

Files changed (1) hide show

app.py +12 -12

app.py CHANGED Viewed

@@ -220,9 +220,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                 print("Not using imatrix quantization.")
             # Quantize the model
-            model_name = get_model_name(model_id)
-            quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
-            quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
             if use_imatrix:
                 quantise_ggml = [
                     "llama-quantize",
@@ -241,9 +239,6 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
             print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
             # Create empty repo
-            username = whoami(oauth_token.token)["name"]
-            repo_name = f"{username}/{model_name}-GGUF"
             api = HfApi(token=oauth_token.token)
             new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
             new_repo_id = new_repo_url.repo_id
@@ -283,12 +278,12 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                 ### CLI:
                 ```bash
-                llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
                 ```
                 ### Server:
                 ```bash
-                llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
                 ```
                 Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
@@ -305,11 +300,11 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                 Step 3: Run inference through the main binary.
                 ```
-                ./llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
                 ```
                 or
                 ```
-                ./llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
                 ```
                 """
             )
@@ -323,7 +318,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                     print(f"Uploading quantized model: {quantized_gguf_path}")
                     api.upload_file(
                         path_or_fileobj=quantized_gguf_path,
-                        path_in_repo=quantized_gguf_name,
                         repo_id=new_repo_id,
                     )
                 except Exception as e:
@@ -455,18 +450,23 @@ gguf_name = gr.Textbox(
 def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
     if oauth_token is None or oauth_token.token is None:
         return ""
-    username = whoami(oauth_token.token)["name"]
     if model_id is None:
         return ""
     model_name = model_id.split('/')[-1]
     return f"{username}/{model_name}-GGUF"
 def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
     if model_id is None:
         return ""
     model_name = model_id.split('/')[-1]
     if use_imatrix:
         return f"{model_name.lower()}-{imatrix_q_method.upper()}-imat.gguf"
     return f"{model_name.lower()}-{q_method.upper()}.gguf"
 #####

                 print("Not using imatrix quantization.")
             # Quantize the model
+            quantized_gguf_path = str(Path(outdir)/gguf_name)
             if use_imatrix:
                 quantise_ggml = [
                     "llama-quantize",
             print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
             # Create empty repo
             api = HfApi(token=oauth_token.token)
             new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
             new_repo_id = new_repo_url.repo_id
                 ### CLI:
                 ```bash
+                llama-cli --hf-repo {new_repo_id} --hf-file {gguf_name} -p "The meaning to life and the universe is"
                 ```
                 ### Server:
                 ```bash
+                llama-server --hf-repo {new_repo_id} --hf-file {gguf_name} -c 2048
                 ```
                 Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
                 Step 3: Run inference through the main binary.
                 ```
+                ./llama-cli --hf-repo {new_repo_id} --hf-file {gguf_name} -p "The meaning to life and the universe is"
                 ```
                 or
                 ```
+                ./llama-server --hf-repo {new_repo_id} --hf-file {gguf_name} -c 2048
                 ```
                 """
             )
                     print(f"Uploading quantized model: {quantized_gguf_path}")
                     api.upload_file(
                         path_or_fileobj=quantized_gguf_path,
+                        path_in_repo=gguf_name,
                         repo_id=new_repo_id,
                     )
                 except Exception as e:
 def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
     if oauth_token is None or oauth_token.token is None:
         return ""
     if model_id is None:
         return ""
+    username = whoami(oauth_token.token)["name"]
     model_name = model_id.split('/')[-1]
     return f"{username}/{model_name}-GGUF"
 def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
     if model_id is None:
         return ""
     model_name = model_id.split('/')[-1]
     if use_imatrix:
         return f"{model_name.lower()}-{imatrix_q_method.upper()}-imat.gguf"
     return f"{model_name.lower()}-{q_method.upper()}.gguf"
 #####