gguf-my-repo

Running

App Files Files Community

Oleg Shulyakov commited on 27 days ago

Commit

2d3ee83

1 Parent(s): ad94fc8

OOP draft

Browse files

Files changed (1) hide show

app.py +139 -101

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import tempfile
 from pathlib import Path
 from textwrap import dedent
 from typing import Optional, Tuple, List, Union
-from dataclasses import dataclass
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
@@ -26,7 +26,10 @@ class QuantizationConfig:
     leave_output: bool = False
     quant_output: bool = False
     output_tensor_method: str = "Q8_0"
 @dataclass
 class SplitConfig:
@@ -43,6 +46,16 @@ class OutputConfig:
     repo_name: str = ""
     filename: str = ""
 class GGUFConverterError(Exception):
     """Custom exception for GGUF conversion errors."""
@@ -202,29 +215,27 @@ class HuggingFaceModelProcessor:
         print("Sharded model has been uploaded successfully!")
-    def _download_base_model(self, token: str, model_id: str, outdir: str) -> str:
         """Download and convert Hugging Face model to GGUF FP16 format."""
-        model_name = self._get_model_name(model_id)
-        print(f"Downloading model {model_name}")
-        fp16_model = f"{outdir}/{model_name}-fp16.gguf"
-        if os.path.exists(fp16_model):
             print("Skipping fp16 conversion...")
-            print(f"Converted model path: {os.path.abspath(fp16_model)}")
-            return fp16_model
         with tempfile.TemporaryDirectory(dir=self.DOWNLOAD_FOLDER) as tmpdir:
-            local_dir = f"{Path(tmpdir)}/{model_name}"
             print(f"Local directory: {os.path.abspath(local_dir)}")
             # Download model
-            api = HfApi(token=token)
             pattern = (
                 "*.safetensors"
                 if any(
                     file.path.endswith(".safetensors")
                     for file in api.list_repo_tree(
-                        repo_id=model_id,
                         recursive=True,
                     )
                 )
@@ -232,12 +243,12 @@ class HuggingFaceModelProcessor:
             )
             dl_pattern = ["*.md", "*.json", "*.model"]
             dl_pattern += [pattern]
-            api.snapshot_download(repo_id=model_id, local_dir=local_dir, allow_patterns=dl_pattern)
             print("Model downloaded successfully!")
             print(f"Model directory contents: {os.listdir(local_dir)}")
-            config_dir = local_dir/"config.json"
-            adapter_config_dir = local_dir/"adapter_config.json"
             if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
                 raise GGUFConverterError(
                     'adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, '
@@ -246,11 +257,11 @@ class HuggingFaceModelProcessor:
                 )
             # Convert HF to GGUF
-            print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
             result = subprocess.run(
                 [
                     "python3", "/app/convert_hf_to_gguf.py", local_dir,
-                    "--outtype", "f16", "--outfile", fp16_model
                 ],
                 shell=False,
                 capture_output=True
@@ -262,11 +273,10 @@ class HuggingFaceModelProcessor:
                 raise GGUFConverterError(f"Error converting to fp16: {stderr_str}")
             print("Model converted to fp16 successfully!")
-            print(f"Converted model path: {os.path.abspath(fp16_model)}")
             return fp16_model
-    def _quantize_model(self, outdir: str, gguf_name: str, fp16: str,
-                       quant_config: QuantizationConfig) -> str:
         """Quantize the GGUF model."""
         quantize_cmd = ["llama-quantize"]
@@ -279,8 +289,7 @@ class HuggingFaceModelProcessor:
             if quant_config.quant_output:
                 quantize_cmd.extend(["--output-tensor-type", quant_config.output_tensor_method])
-        imatrix_file = f"{outdir}/{self._get_model_name(gguf_name.split('-')[0])}-imatrix.dat"
         if quant_config.use_imatrix:
             train_data_path = "calibration_data_v5_rc.txt"
             print(f"Training data file path: {train_data_path}")
@@ -288,14 +297,13 @@ class HuggingFaceModelProcessor:
             if not os.path.isfile(train_data_path):
                 raise GGUFConverterError(f"Training data file not found: {train_data_path}")
-            self._generate_importance_matrix(fp16, train_data_path, imatrix_file)
-            quantize_cmd.extend(["--imatrix", imatrix_file])
         else:
             print("Not using imatrix quantization.")
-        quantized_gguf = f"{outdir}/{gguf_name}"
-        quantize_cmd.append(fp16)
-        quantize_cmd.append(quantized_gguf)
         if quant_config.use_imatrix:
             quantize_cmd.append(quant_config.imatrix_method)
@@ -310,8 +318,8 @@ class HuggingFaceModelProcessor:
             raise GGUFConverterError(f"Error quantizing: {stderr_str}")
         print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
-        print(f"Quantized model path: {os.path.abspath(quantized_gguf)}")
-        return quantized_gguf
     def _generate_readme(self, outdir: str, token: str, model_id: str,
                         new_repo_id: str, gguf_name: str) -> str:
@@ -361,61 +369,55 @@ llama-server --hf-repo "{new_repo_id}" --hf-file "{gguf_name}" -c 4096
         card.save(readme_path)
         return readme_path
-    def process_model(self, model_id: str, quant_config: QuantizationConfig,
-                     split_config: SplitConfig, output_config: OutputConfig,
-                     oauth_token: Optional[gr.OAuthToken]) -> Tuple[str, str]:
         """Main method to process a model through the entire pipeline."""
         try:
-            token = self._validate_token(oauth_token)
-            print(f"Current working directory: {os.path.abspath(os.getcwd())}")
-            model_name = self._get_model_name(model_id)
-            with tempfile.TemporaryDirectory(dir=self.OUTPUT_FOLDER) as outDirObj:
-                outdir = (
-                    self._create_folder(os.path.join(self.OUTPUT_FOLDER, model_name))
-                    if self.RUN_LOCALLY == "1"
-                    else Path(outDirObj)
-                )
-                fp16 = self._download_base_model(token, model_id, outdir)
-                quantized_gguf = self._quantize_model(outdir, output_config.filename, fp16, quant_config)
-                # Create empty repo
-                api = HfApi(token=token)
-                new_repo_url = api.create_repo(
-                    repo_id=output_config.repo_name,
-                    exist_ok=True,
-                    private=output_config.private_repo
-                )
-                new_repo_id = new_repo_url.repo_id
-                print("Repo created successfully!", new_repo_url)
-                # Upload model
-                if split_config.enabled:
-                    print(f"Splitting quantized model: {os.path.abspath(quantized_gguf)}")
-                    self._split_and_upload_model(quantized_gguf, outdir, new_repo_id, token, split_config)
-                else:
-                    try:
-                        print(f"Uploading quantized model: {os.path.abspath(quantized_gguf)}")
-                        self._upload_file(token, quantized_gguf, output_config.filename, new_repo_id)
-                    except Exception as e:
-                        raise GGUFConverterError(f"Error uploading quantized model: {e}")
-                # Upload imatrix if it exists
-                imatrix_file = f"{outdir}/{model_name}-imatrix.dat"
-                if os.path.isfile(imatrix_file):
-                    try:
-                        print(f"Uploading imatrix.dat: {os.path.abspath(imatrix_file)}")
-                        self._upload_file(token, imatrix_file, "imatrix.dat", new_repo_id)
-                    except Exception as e:
-                        raise GGUFConverterError(f"Error uploading imatrix.dat: {e}")
-                # Upload README.md
-                readme_path = self._generate_readme(outdir, token, model_id, new_repo_id, output_config.filename)
-                self._upload_file(token, readme_path, "README.md", new_repo_id)
-                print(f"Uploaded successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
             return (
                 f'<h1>✅ DONE</h1><br/>Find your repo here: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{new_repo_id}</a>',
@@ -743,32 +745,68 @@ class GGUFConverterUI:
                               embedding_tensor_method: str, leave_output: bool,
                               quant_output: bool, output_tensor_method: str,
                               split_model: bool, split_max_tensors, split_max_size: str) -> Tuple[str, str]:
-        """Wrapper for the process_model method to handle the conversion."""
-        # Create configuration objects
-        quant_config = QuantizationConfig(
-            method=q_method,
-            use_imatrix=use_imatrix,
-            imatrix_method=imatrix_q_method,
-            quant_embedding=quant_embedding,
-            embedding_tensor_method=embedding_tensor_method,
-            leave_output=leave_output,
-            quant_output=quant_output,
-            output_tensor_method=output_tensor_method
-        )
-        split_config = SplitConfig(
-            enabled=split_model,
-            max_tensors=split_max_tensors,
-            max_size=split_max_size
-        )
-        output_config = OutputConfig(
-            private_repo=private_repo,
-            repo_name=repo_name,
-            filename=gguf_name
-        )
-        return self.processor.process_model(model_id, quant_config, split_config, output_config, gr.OAuthToken)
     def launch(self):
         """Launch the Gradio interface."""

 from pathlib import Path
 from textwrap import dedent
 from typing import Optional, Tuple, List, Union
+from dataclasses import dataclass, field
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
     leave_output: bool = False
     quant_output: bool = False
     output_tensor_method: str = "Q8_0"
+    # Generated values - These will be set during processing
+    fp16_model: str = field(default="", init=False)
+    quantized_gguf: str = field(default="", init=False)
+    imatrix_file: str = field(default="", init=False)
 @dataclass
 class SplitConfig:
     repo_name: str = ""
     filename: str = ""
+@dataclass
+class ModelProcessingConfig:
+    """Configuration for the entire model processing pipeline."""
+    token: str
+    model_id: str
+    model_name: str
+    outdir: str
+    quant_config: QuantizationConfig
+    split_config: SplitConfig
+    output_config: OutputConfig
 class GGUFConverterError(Exception):
     """Custom exception for GGUF conversion errors."""
         print("Sharded model has been uploaded successfully!")
+    def _download_base_model(self, processing_config: ModelProcessingConfig) -> str:
         """Download and convert Hugging Face model to GGUF FP16 format."""
+        print(f"Downloading model {processing_config.model_name}")
+        if os.path.exists(processing_config.quant_config.fp16_model):
             print("Skipping fp16 conversion...")
+            print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
+            return processing_config.quant_config.fp16_model
         with tempfile.TemporaryDirectory(dir=self.DOWNLOAD_FOLDER) as tmpdir:
+            local_dir = f"{Path(tmpdir)}/{processing_config.model_name}"
             print(f"Local directory: {os.path.abspath(local_dir)}")
             # Download model
+            api = HfApi(token=processing_config.token)
             pattern = (
                 "*.safetensors"
                 if any(
                     file.path.endswith(".safetensors")
                     for file in api.list_repo_tree(
+                        repo_id=processing_config.model_id,
                         recursive=True,
                     )
                 )
             )
             dl_pattern = ["*.md", "*.json", "*.model"]
             dl_pattern += [pattern]
+            api.snapshot_download(repo_id=processing_config.model_id, local_dir=local_dir, allow_patterns=dl_pattern)
             print("Model downloaded successfully!")
             print(f"Model directory contents: {os.listdir(local_dir)}")
+            config_dir = os.path.join(local_dir, "config.json")
+            adapter_config_dir = os.path.join(local_dir, "adapter_config.json")
             if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
                 raise GGUFConverterError(
                     'adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, '
                 )
             # Convert HF to GGUF
+            print(f"Converting to GGUF FP16: {os.path.abspath(processing_config.quant_config.fp16_model)}")
             result = subprocess.run(
                 [
                     "python3", "/app/convert_hf_to_gguf.py", local_dir,
+                    "--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
                 ],
                 shell=False,
                 capture_output=True
                 raise GGUFConverterError(f"Error converting to fp16: {stderr_str}")
             print("Model converted to fp16 successfully!")
+            print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
             return fp16_model
+    def _quantize_model(self, quant_config: QuantizationConfig) -> str:
         """Quantize the GGUF model."""
         quantize_cmd = ["llama-quantize"]
             if quant_config.quant_output:
                 quantize_cmd.extend(["--output-tensor-type", quant_config.output_tensor_method])
+        # Set imatrix file path if needed
         if quant_config.use_imatrix:
             train_data_path = "calibration_data_v5_rc.txt"
             print(f"Training data file path: {train_data_path}")
             if not os.path.isfile(train_data_path):
                 raise GGUFConverterError(f"Training data file not found: {train_data_path}")
+            self._generate_importance_matrix(quant_config.fp16_model, train_data_path, quant_config.imatrix_file)
+            quantize_cmd.extend(["--imatrix", quant_config.imatrix_file])
         else:
             print("Not using imatrix quantization.")
+        quantize_cmd.append(quant_config.fp16_model)
+        quantize_cmd.append(quant_config.quantized_gguf)
         if quant_config.use_imatrix:
             quantize_cmd.append(quant_config.imatrix_method)
             raise GGUFConverterError(f"Error quantizing: {stderr_str}")
         print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
+        print(f"Quantized model path: {os.path.abspath(quant_config.quantized_gguf)}")
+        return quant_config.quantized_gguf
     def _generate_readme(self, outdir: str, token: str, model_id: str,
                         new_repo_id: str, gguf_name: str) -> str:
         card.save(readme_path)
         return readme_path
+    def process_model(self, processing_config: ModelProcessingConfig) -> Tuple[str, str]:
         """Main method to process a model through the entire pipeline."""
         try:
+            quant_config = processing_config.quant_config
+            split_config = processing_config.split_config
+            output_config = processing_config.output_config
+            print(f"Current working directory: {os.path.abspath(os.getcwd())}")
+            # Download and convert base model
+            self._download_base_model(processing_config)
+            # Quantize the model
+            self._quantize_model(quant_config)
+            # Create empty repo
+            api = HfApi(token=processing_config.token)
+            new_repo_url = api.create_repo(
+                repo_id=output_config.repo_name,
+                exist_ok=True,
+                private=output_config.private_repo
+            )
+            new_repo_id = new_repo_url.repo_id
+            print("Repo created successfully!", new_repo_url)
+            # Upload model
+            if split_config.enabled:
+                print(f"Splitting quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
+                self._split_and_upload_model(quant_config.quantized_gguf, processing_config.outdir, new_repo_id, processing_config.token, split_config)
+            else:
+                try:
+                    print(f"Uploading quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
+                    self._upload_file(processing_config.token, quant_config.quantized_gguf, output_config.filename, new_repo_id)
+                except Exception as e:
+                    raise GGUFConverterError(f"Error uploading quantized model: {e}")
+            # Upload imatrix if it exists
+            if quant_config.use_imatrix and os.path.isfile(quant_config.imatrix_file):
+                try:
+                    print(f"Uploading imatrix.dat: {os.path.abspath(quant_config.imatrix_file)}")
+                    self._upload_file(processing_config.token, quant_config.imatrix_file, "imatrix.dat", new_repo_id)
+                except Exception as e:
+                    raise GGUFConverterError(f"Error uploading imatrix.dat: {e}")
+            # Upload README.md
+            readme_path = self._generate_readme(processing_config.outdir, processing_config.token, processing_config.model_id, new_repo_id, output_config.filename)
+            self._upload_file(processing_config.token, readme_path, "README.md", new_repo_id)
+            print(f"Uploaded successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
             return (
                 f'<h1>✅ DONE</h1><br/>Find your repo here: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{new_repo_id}</a>',
                               embedding_tensor_method: str, leave_output: bool,
                               quant_output: bool, output_tensor_method: str,
                               split_model: bool, split_max_tensors, split_max_size: str) -> Tuple[str, str]:
+        """Wrapper for the process_model method to handle the conversion using ModelProcessingConfig."""
+        try:
+            # Validate token and get token string
+            token = self.processor._validate_token(oauth_token)
+            # Create configuration objects
+            quant_config = QuantizationConfig(
+                method=q_method,
+                use_imatrix=use_imatrix,
+                imatrix_method=imatrix_q_method,
+                quant_embedding=quant_embedding,
+                embedding_tensor_method=embedding_tensor_method,
+                leave_output=leave_output,
+                quant_output=quant_output,
+                output_tensor_method=output_tensor_method
+            )
+            split_config = SplitConfig(
+                enabled=split_model,
+                max_tensors=split_max_tensors if isinstance(split_max_tensors, int) else 256,
+                max_size=split_max_size
+            )
+            output_config = OutputConfig(
+                private_repo=private_repo,
+                repo_name=repo_name,
+                filename=gguf_name
+            )
+            model_name = self.processor._get_model_name(model_id)
+            with tempfile.TemporaryDirectory(dir=self.OUTPUT_FOLDER) as outDirObj:
+                outdir = (
+                    self._create_folder(os.path.join(self.OUTPUT_FOLDER, model_name))
+                    if self.RUN_LOCALLY == "1"
+                    else Path(outDirObj)
+                )
+                quant_config.fp16_model = f"{outdir}/{model_name}-fp16.gguf"
+                quant_config.imatrix_file = f"{outdir}/{model_name}-imatrix.dat"
+                quant_config.quantized_gguf = f"{outdir}/{gguf_name}"
+                processing_config = ModelProcessingConfig(
+                    token=token,
+                    model_id=model_id,
+                    model_name=model_name,
+                    outdir=outdir,
+                    quant_config=quant_config,
+                    split_config=split_config,
+                    output_config=output_config
+                )
+                # Call the processor's main method with the config object
+                return self.processor.process_model(processing_config)
+        except GGUFConverterError as e:
+            print(f"Error in wrapper: {e}")
+            return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{self.processor._escape_html(str(e))}</pre>', "error.png")
+        except Exception as e:
+            print(f"Unexpected error in wrapper: {e}")
+            return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{self.processor._escape_html(str(e))}</pre>', "error.png")
     def launch(self):
         """Launch the Gradio interface."""