gguf-my-repo

Running

App Files Files Community

Oleg Shulyakov commited on 16 days ago

Commit

c96815e

1 Parent(s): 67040c8

Move base model creation into separate method

Browse files

Files changed (1) hide show

app.py +79 -53

app.py CHANGED Viewed

@@ -15,8 +15,25 @@ from apscheduler.schedulers.background import BackgroundScheduler
 SPACE_ID = os.environ.get("SPACE_ID")
 HF_TOKEN = os.environ.get("HF_TOKEN")
-# Llama.cpp Safetensors to GGUF F16
-CONVERSION_SCRIPT = "/app/convert_hf_to_gguf.py"
 # escape HTML for logging
 def escape(s: str) -> str:
@@ -27,6 +44,9 @@ def escape(s: str) -> str:
     s = s.replace("\n", "<br/>")
     return s
 def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
     if not os.path.isfile(model_path):
         raise Exception(f"Model file not found: {model_path}")
@@ -59,8 +79,8 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
     print(f"Model path: {model_path}")
     print(f"Output dir: {outdir}")
-    if oauth_token is None or oauth_token.token is None:
-        raise ValueError("You have to be logged in.")
     split_cmd = [
         "llama-gguf-split",
@@ -101,8 +121,8 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
         api = HfApi(token=oauth_token.token)
         for file in sharded_model_files:
             file_path = os.path.join(outdir, file)
-            print(f"Uploading file: {file_path}")
             try:
                 api.upload_file(
                     path_or_fileobj=file_path,
                     path_in_repo=file,
@@ -115,23 +135,16 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
     print("Sharded model has been uploaded successfully!")
-def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, repo_name, gguf_name, oauth_token: gr.OAuthToken | None):
-    if oauth_token is None or oauth_token.token is None:
-        raise gr.Error("You must be logged in to use GGUF-my-repo")
-    # validate the oauth token
-    try:
-        whoami(oauth_token.token)
-    except Exception as e:
-        raise gr.Error("You must be logged in to use GGUF-my-repo")
-    model_name = model_id.split('/')[-1]
-    try:
-        api = HfApi(token=oauth_token.token)
-        dl_pattern = ["*.md", "*.json", "*.model"]
         pattern = (
             "*.safetensors"
             if any(
@@ -144,40 +157,50 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
             else "*.bin"
         )
         dl_pattern += [pattern]
-        if not os.path.exists("downloads"):
-            os.makedirs("downloads")
-        if not os.path.exists("outputs"):
-            os.makedirs("outputs")
-        with tempfile.TemporaryDirectory(dir="outputs") as outdir:
-            fp16 = str(Path(outdir)/f"{model_name}.fp16.gguf")
-            with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
-                # Keep the model name as the dirname so the model name metadata is populated correctly
-                local_dir = Path(tmpdir)/model_name
-                print(local_dir)
-                api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
-                print("Model downloaded successfully!")
-                print(f"Current working directory: {os.getcwd()}")
-                print(f"Model directory contents: {os.listdir(local_dir)}")
-                config_dir = local_dir/"config.json"
-                adapter_config_dir = local_dir/"adapter_config.json"
-                if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
-                    raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
-                result = subprocess.run([
-                    "python3", CONVERSION_SCRIPT, local_dir, "--outtype", "f16", "--outfile", fp16
-                ], shell=False, capture_output=True)
-                print(result)
-                if result.returncode != 0:
-                    stderr_str = result.stderr.decode("utf-8")
-                    raise Exception(f"Error converting to fp16: {stderr_str}")
-                print("Model converted to fp16 successfully!")
-                print(f"Converted model path: {fp16}")
             imatrix_path = Path(outdir)/"imatrix.dat"
@@ -197,6 +220,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                 print("Not using imatrix quantization.")
             # Quantize the model
             quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
             quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
             if use_imatrix:
@@ -214,12 +238,13 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                 stderr_str = result.stderr.decode("utf-8")
                 raise Exception(f"Error quantizing: {stderr_str}")
             print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
-            print(f"Quantized model path: {quantized_gguf_path}")
             # Create empty repo
             username = whoami(oauth_token.token)["name"]
             repo_name = f"{username}/{model_name}-GGUF"
             new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
             new_repo_id = new_repo_url.repo_id
             print("Repo created successfully!", new_repo_url)
@@ -329,6 +354,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
             "llama.png",
         )
     except Exception as e:
         return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")

 SPACE_ID = os.environ.get("SPACE_ID")
 HF_TOKEN = os.environ.get("HF_TOKEN")
+# Folder
+DOWNLOAD_FOLDER = "./downloads"
+OUTPUT_FOLDER = "./outputs"
+def create_folder(folder_name: str):
+    if not os.path.exists(folder_name):
+        print(f"Creating folder: {folder_name}")
+        os.makedirs(folder_name)
+def is_valid_token(oauth_token):
+    if oauth_token is None or oauth_token.token is None:
+        return False
+    try:
+        whoami(oauth_token.token)
+    except Exception as e:
+        return False
+    return True
 # escape HTML for logging
 def escape(s: str) -> str:
     s = s.replace("\n", "<br/>")
     return s
+def get_model_name(model_id: str):
+    return model_id.split('/')[-1]
 def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
     if not os.path.isfile(model_path):
         raise Exception(f"Model file not found: {model_path}")
     print(f"Model path: {model_path}")
     print(f"Output dir: {outdir}")
+    if is_valid_token(oauth_token) is False:
+        raise gr.Error("You have to be logged in.")
     split_cmd = [
         "llama-gguf-split",
         api = HfApi(token=oauth_token.token)
         for file in sharded_model_files:
             file_path = os.path.join(outdir, file)
             try:
+                print(f"Uploading file: {file_path}")
                 api.upload_file(
                     path_or_fileobj=file_path,
                     path_in_repo=file,
     print("Sharded model has been uploaded successfully!")
+def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDirectory):
+    model_name = get_model_name(model_id)
+    with tempfile.TemporaryDirectory(dir=DOWNLOAD_FOLDER) as tmpdir:
+        # Download model
+        print(f"Downloading model {model_name}")
+        local_dir = Path(tmpdir)/model_name # Keep the model name as the dirname so the model name metadata is populated correctly
+        print(f"Local directory: {os.path.abspath(local_dir)}")
+        api = HfApi(token=token)
         pattern = (
             "*.safetensors"
             if any(
             else "*.bin"
         )
+        dl_pattern = ["*.md", "*.json", "*.model"]
         dl_pattern += [pattern]
+        api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
+        print("Model downloaded successfully!")
+        print(f"Model directory contents: {os.listdir(local_dir)}")
+        config_dir = local_dir/"config.json"
+        adapter_config_dir = local_dir/"adapter_config.json"
+        if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
+            raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
+        # Convert HF to GGUF
+        fp16_model = str(Path(outdir)/f"{model_name}_fp16.gguf")
+        print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
+        result = subprocess.run(
+            [
+                "python3", "/app/convert_hf_to_gguf.py", local_dir, "--outtype", "f16", "--outfile", fp16_model
+            ],
+            shell=False,
+            capture_output=True
+        )
+        print(f"Model directory contents: {result}")
+        if result.returncode != 0:
+            stderr_str = result.stderr.decode("utf-8")
+            raise Exception(f"Error converting to fp16: {stderr_str}")
+        print("Model converted to fp16 successfully!")
+        print(f"Converted model path: {os.path.abspath(fp16_model)}")
+        return fp16_model
+def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, repo_name, gguf_name, oauth_token: gr.OAuthToken | None):
+    # validate the oauth token
+    if is_valid_token(oauth_token) is False:
+        raise gr.Error("You must be logged in to use GGUF-my-repo")
+    print(f"Current working directory: {os.path.abspath(os.getcwd())}")
+    create_folder(DOWNLOAD_FOLDER)
+    create_folder(OUTPUT_FOLDER)
+    try:
+        with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
+            fp16 = download_base_model(oauth_token.token, model_id, outdir)
             imatrix_path = Path(outdir)/"imatrix.dat"
                 print("Not using imatrix quantization.")
             # Quantize the model
+            model_name = get_model_name(model_id)
             quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
             quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
             if use_imatrix:
                 stderr_str = result.stderr.decode("utf-8")
                 raise Exception(f"Error quantizing: {stderr_str}")
             print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
+            print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
             # Create empty repo
             username = whoami(oauth_token.token)["name"]
             repo_name = f"{username}/{model_name}-GGUF"
+            api = HfApi(token=oauth_token.token)
             new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
             new_repo_id = new_repo_url.repo_id
             print("Repo created successfully!", new_repo_url)
             "llama.png",
         )
     except Exception as e:
+        print((f"Error processing model: {e}"))
         return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")