gguf-my-repo

Sleeping

App Files Files Community

Oleg Shulyakov commited on 21 days ago

Commit

c0d1d96

1 Parent(s): 17f9e2b

Add RUN_LOCALLY flag

Browse files

Files changed (2) hide show

app.py +46 -19
docker-compose.yml +1 -0

app.py CHANGED Viewed

@@ -16,6 +16,8 @@ SPACE_ID = os.environ.get("SPACE_ID") if os.environ.get("SPACE_ID") else ""
 SPACE_URL = "https://" + SPACE_ID.replace("/", "-") + ".hf.space/" if SPACE_ID else "http://localhost:7860/"
 HF_TOKEN = os.environ.get("HF_TOKEN")
 # Folder
 DOWNLOAD_FOLDER = "./downloads"
 OUTPUT_FOLDER = "./outputs"
@@ -27,6 +29,8 @@ def create_folder(folder_name: str):
         print(f"Creating folder: {folder_name}")
         os.makedirs(folder_name)
 def validate_token(oauth_token):
     if oauth_token is None or oauth_token.token is None:
         raise gr.Error(ERROR_LOGIN)
@@ -51,6 +55,18 @@ def get_model_creator(model_id: str):
 def get_model_name(model_id: str):
     return model_id.split('/')[-1]
 def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
     if not os.path.isfile(model_path):
         raise Exception(f"Model file not found: {model_path}")
@@ -120,12 +136,12 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, token: str, s
     sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
     if sharded_model_files:
         print(f"Sharded model files: {sharded_model_files}")
-        api = HfApi(token=token)
         for file in sharded_model_files:
             file_path = os.path.join(outdir, file)
             try:
                 print(f"Uploading file: {file_path}")
-                api.upload_file(
                     path_or_fileobj=file_path,
                     path_in_repo=file,
                     repo_id=repo_id,
@@ -137,15 +153,21 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, token: str, s
     print("Sharded model has been uploaded successfully!")
-def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDirectory):
     model_name = get_model_name(model_id)
     with tempfile.TemporaryDirectory(dir=DOWNLOAD_FOLDER) as tmpdir:
-        # Download model
-        print(f"Downloading model {model_name}")
-        local_dir = Path(tmpdir)/model_name # Keep the model name as the dirname so the model name metadata is populated correctly
         print(f"Local directory: {os.path.abspath(local_dir)}")
         api = HfApi(token=token)
         pattern = (
             "*.safetensors"
@@ -172,7 +194,6 @@ def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDir
             raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
         # Convert HF to GGUF
-        fp16_model = str(Path(outdir)/f"{model_name}-fp16.gguf")
         print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
         result = subprocess.run(
             [
@@ -192,7 +213,7 @@ def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDir
         return fp16_model
 def quantize_model(
-    outdir: tempfile.TemporaryDirectory,
     gguf_name: str,
     fp16: str,
     q_method: str,
@@ -235,7 +256,7 @@ def quantize_model(
     else:
         print("Not using imatrix quantization.")
-    quantized_gguf = str(Path(outdir)/gguf_name)
     quantize_cmd.append(fp16)
     quantize_cmd.append(quantized_gguf)
@@ -254,7 +275,7 @@ def quantize_model(
     print(f"Quantized model path: {os.path.abspath(quantized_gguf)}")
     return quantized_gguf
-def generate_readme(outdir: tempfile.TemporaryDirectory, token: str, model_id: str, new_repo_id: str, gguf_name: str):
     creator = get_model_creator(model_id)
     model_name = get_model_name(model_id)
     username = whoami(token)["name"]
@@ -307,7 +328,7 @@ llama-server --hf-repo "{new_repo_id}" --hf-file "{gguf_name}" -c 4096
 ```
         """
     )
-    readme_path = Path(outdir)/"README.md"
     card.save(readme_path)
     return readme_path
@@ -331,16 +352,19 @@ def process_model(
     oauth_token: gr.OAuthToken | None,
 ):
     validate_token(oauth_token)
     token = oauth_token.token
     print(f"Current working directory: {os.path.abspath(os.getcwd())}")
     create_folder(DOWNLOAD_FOLDER)
     create_folder(OUTPUT_FOLDER)
     try:
-        with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
             fp16 = download_base_model(token, model_id, outdir)
-            imatrix_file = Path(outdir)/f"{get_model_name(model_id)}-imatrix.dat"
             quantized_gguf = quantize_model(outdir, gguf_name, fp16, q_method, use_imatrix, imatrix_q_method, imatrix_file, quant_embedding, embedding_tensor_method, leave_output, quant_output, output_tensor_method)
             # Create empty repo
@@ -352,11 +376,12 @@ def process_model(
             # Upload model
             if split_model:
                 print(f"Splitting quantized model: {os.path.abspath(quantized_gguf)}")
-                split_upload_model(str(quantized_gguf), outdir, new_repo_id, token, split_max_tensors, split_max_size)
             else:
                 try:
                     print(f"Uploading quantized model: {os.path.abspath(quantized_gguf)}")
-                    api.upload_file(
                         path_or_fileobj=quantized_gguf,
                         path_in_repo=gguf_name,
                         repo_id=new_repo_id,
@@ -367,7 +392,8 @@ def process_model(
             if os.path.isfile(imatrix_file):
                 try:
                     print(f"Uploading imatrix.dat: {os.path.abspath(output_path)}")
-                    api.upload_file(
                         path_or_fileobj=imatrix_file,
                         path_in_repo="imatrix.dat",
                         repo_id=new_repo_id,
@@ -378,7 +404,8 @@ def process_model(
             # Upload README.md
             readme_path = generate_readme(outdir, token, model_id, new_repo_id, gguf_name)
-            api.upload_file(
                 path_or_fileobj=readme_path,
                 path_in_repo="README.md",
                 repo_id=new_repo_id,
@@ -629,8 +656,8 @@ with gr.Blocks(css=css) as demo:
                 submit_btn.render()
         with gr.Column() as outputs:
-            output_label.render()
             output_image.render()
     #####
     # Button Click handlers

 SPACE_URL = "https://" + SPACE_ID.replace("/", "-") + ".hf.space/" if SPACE_ID else "http://localhost:7860/"
 HF_TOKEN = os.environ.get("HF_TOKEN")
+RUN_LOCALLY = os.environ.get("RUN_LOCALLY")
 # Folder
 DOWNLOAD_FOLDER = "./downloads"
 OUTPUT_FOLDER = "./outputs"
         print(f"Creating folder: {folder_name}")
         os.makedirs(folder_name)
+    return folder_name
 def validate_token(oauth_token):
     if oauth_token is None or oauth_token.token is None:
         raise gr.Error(ERROR_LOGIN)
 def get_model_name(model_id: str):
     return model_id.split('/')[-1]
+def upload_file(token, path_or_fileobj, path_in_repo, repo_id):
+    if RUN_LOCALLY == "1":
+        print("Skipping upload...")
+        return
+    api = HfApi(token=token)
+    api.upload_file(
+        path_or_fileobj=path_or_fileobj,
+        path_in_repo=path_in_repo,
+        repo_id=repo_id,
+    )
 def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
     if not os.path.isfile(model_path):
         raise Exception(f"Model file not found: {model_path}")
     sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
     if sharded_model_files:
         print(f"Sharded model files: {sharded_model_files}")
         for file in sharded_model_files:
             file_path = os.path.join(outdir, file)
             try:
                 print(f"Uploading file: {file_path}")
+                upload_file(
+                    token=token,
                     path_or_fileobj=file_path,
                     path_in_repo=file,
                     repo_id=repo_id,
     print("Sharded model has been uploaded successfully!")
+def download_base_model(token: str, model_id: str, outdir: str):
     model_name = get_model_name(model_id)
+    print(f"Downloading model {model_name}")
+    fp16_model = f"{outdir}/{model_name}-fp16.gguf"
+    if os.path.exists(fp16_model):
+        print("Skipping fp16 convertion...")
+        print(f"Converted model path: {os.path.abspath(fp16_model)}")
     with tempfile.TemporaryDirectory(dir=DOWNLOAD_FOLDER) as tmpdir:
+        # Keep the model name as the dirname so the model name metadata is populated correctly
+        local_dir = f"{Path(tmpdir)}/{model_name}"
         print(f"Local directory: {os.path.abspath(local_dir)}")
+        # Download model
         api = HfApi(token=token)
         pattern = (
             "*.safetensors"
             raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
         # Convert HF to GGUF
         print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
         result = subprocess.run(
             [
         return fp16_model
 def quantize_model(
+    outdir: str,
     gguf_name: str,
     fp16: str,
     q_method: str,
     else:
         print("Not using imatrix quantization.")
+    quantized_gguf = f"{outdir}/{gguf_name}"
     quantize_cmd.append(fp16)
     quantize_cmd.append(quantized_gguf)
     print(f"Quantized model path: {os.path.abspath(quantized_gguf)}")
     return quantized_gguf
+def generate_readme(outdir: str, token: str, model_id: str, new_repo_id: str, gguf_name: str):
     creator = get_model_creator(model_id)
     model_name = get_model_name(model_id)
     username = whoami(token)["name"]
 ```
         """
     )
+    readme_path = f"{outdir}/README.md"
     card.save(readme_path)
     return readme_path
     oauth_token: gr.OAuthToken | None,
 ):
     validate_token(oauth_token)
     token = oauth_token.token
     print(f"Current working directory: {os.path.abspath(os.getcwd())}")
     create_folder(DOWNLOAD_FOLDER)
     create_folder(OUTPUT_FOLDER)
+    model_name = get_model_name(model_id)
     try:
+        with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outDirObj:
+            outdir = create_folder(os.path.join(OUTPUT_FOLDER, model_name)) if RUN_LOCALLY == "1" else Path(outDirObj)
             fp16 = download_base_model(token, model_id, outdir)
+            imatrix_file = f"{outdir}/{model_name}-imatrix.dat"
             quantized_gguf = quantize_model(outdir, gguf_name, fp16, q_method, use_imatrix, imatrix_q_method, imatrix_file, quant_embedding, embedding_tensor_method, leave_output, quant_output, output_tensor_method)
             # Create empty repo
             # Upload model
             if split_model:
                 print(f"Splitting quantized model: {os.path.abspath(quantized_gguf)}")
+                split_upload_model(quantized_gguf, outdir, new_repo_id, token, split_max_tensors, split_max_size)
             else:
                 try:
                     print(f"Uploading quantized model: {os.path.abspath(quantized_gguf)}")
+                    upload_file(
+                        token=token,
                         path_or_fileobj=quantized_gguf,
                         path_in_repo=gguf_name,
                         repo_id=new_repo_id,
             if os.path.isfile(imatrix_file):
                 try:
                     print(f"Uploading imatrix.dat: {os.path.abspath(output_path)}")
+                    upload_file(
+                        token=token,
                         path_or_fileobj=imatrix_file,
                         path_in_repo="imatrix.dat",
                         repo_id=new_repo_id,
             # Upload README.md
             readme_path = generate_readme(outdir, token, model_id, new_repo_id, gguf_name)
+            upload_file(
+                token=token,
                 path_or_fileobj=readme_path,
                 path_in_repo="README.md",
                 repo_id=new_repo_id,
                 submit_btn.render()
         with gr.Column() as outputs:
             output_image.render()
+            output_label.render()
     #####
     # Button Click handlers

docker-compose.yml CHANGED Viewed

@@ -11,5 +11,6 @@ services:
       - .:/home/user/app
     environment:
       - RUN_CUDA=0
       - HF_TOKEN=${HF_TOKEN}
       - HF_HUB_CACHE=/home/user/app/downloads

       - .:/home/user/app
     environment:
       - RUN_CUDA=0
+      - RUN_LOCALLY=0
       - HF_TOKEN=${HF_TOKEN}
       - HF_HUB_CACHE=/home/user/app/downloads