Oleg Shulyakov commited on
Commit
2d3ee83
·
1 Parent(s): ad94fc8
Files changed (1) hide show
  1. app.py +139 -101
app.py CHANGED
@@ -5,7 +5,7 @@ import tempfile
5
  from pathlib import Path
6
  from textwrap import dedent
7
  from typing import Optional, Tuple, List, Union
8
- from dataclasses import dataclass
9
 
10
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
11
 
@@ -26,7 +26,10 @@ class QuantizationConfig:
26
  leave_output: bool = False
27
  quant_output: bool = False
28
  output_tensor_method: str = "Q8_0"
29
-
 
 
 
30
 
31
  @dataclass
32
  class SplitConfig:
@@ -43,6 +46,16 @@ class OutputConfig:
43
  repo_name: str = ""
44
  filename: str = ""
45
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  class GGUFConverterError(Exception):
48
  """Custom exception for GGUF conversion errors."""
@@ -202,29 +215,27 @@ class HuggingFaceModelProcessor:
202
 
203
  print("Sharded model has been uploaded successfully!")
204
 
205
- def _download_base_model(self, token: str, model_id: str, outdir: str) -> str:
206
  """Download and convert Hugging Face model to GGUF FP16 format."""
207
- model_name = self._get_model_name(model_id)
208
- print(f"Downloading model {model_name}")
209
- fp16_model = f"{outdir}/{model_name}-fp16.gguf"
210
 
211
- if os.path.exists(fp16_model):
212
  print("Skipping fp16 conversion...")
213
- print(f"Converted model path: {os.path.abspath(fp16_model)}")
214
- return fp16_model
215
 
216
  with tempfile.TemporaryDirectory(dir=self.DOWNLOAD_FOLDER) as tmpdir:
217
- local_dir = f"{Path(tmpdir)}/{model_name}"
218
  print(f"Local directory: {os.path.abspath(local_dir)}")
219
 
220
  # Download model
221
- api = HfApi(token=token)
222
  pattern = (
223
  "*.safetensors"
224
  if any(
225
  file.path.endswith(".safetensors")
226
  for file in api.list_repo_tree(
227
- repo_id=model_id,
228
  recursive=True,
229
  )
230
  )
@@ -232,12 +243,12 @@ class HuggingFaceModelProcessor:
232
  )
233
  dl_pattern = ["*.md", "*.json", "*.model"]
234
  dl_pattern += [pattern]
235
- api.snapshot_download(repo_id=model_id, local_dir=local_dir, allow_patterns=dl_pattern)
236
  print("Model downloaded successfully!")
237
  print(f"Model directory contents: {os.listdir(local_dir)}")
238
 
239
- config_dir = local_dir/"config.json"
240
- adapter_config_dir = local_dir/"adapter_config.json"
241
  if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
242
  raise GGUFConverterError(
243
  'adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, '
@@ -246,11 +257,11 @@ class HuggingFaceModelProcessor:
246
  )
247
 
248
  # Convert HF to GGUF
249
- print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
250
  result = subprocess.run(
251
  [
252
  "python3", "/app/convert_hf_to_gguf.py", local_dir,
253
- "--outtype", "f16", "--outfile", fp16_model
254
  ],
255
  shell=False,
256
  capture_output=True
@@ -262,11 +273,10 @@ class HuggingFaceModelProcessor:
262
  raise GGUFConverterError(f"Error converting to fp16: {stderr_str}")
263
 
264
  print("Model converted to fp16 successfully!")
265
- print(f"Converted model path: {os.path.abspath(fp16_model)}")
266
  return fp16_model
267
 
268
- def _quantize_model(self, outdir: str, gguf_name: str, fp16: str,
269
- quant_config: QuantizationConfig) -> str:
270
  """Quantize the GGUF model."""
271
  quantize_cmd = ["llama-quantize"]
272
 
@@ -279,8 +289,7 @@ class HuggingFaceModelProcessor:
279
  if quant_config.quant_output:
280
  quantize_cmd.extend(["--output-tensor-type", quant_config.output_tensor_method])
281
 
282
- imatrix_file = f"{outdir}/{self._get_model_name(gguf_name.split('-')[0])}-imatrix.dat"
283
-
284
  if quant_config.use_imatrix:
285
  train_data_path = "calibration_data_v5_rc.txt"
286
  print(f"Training data file path: {train_data_path}")
@@ -288,14 +297,13 @@ class HuggingFaceModelProcessor:
288
  if not os.path.isfile(train_data_path):
289
  raise GGUFConverterError(f"Training data file not found: {train_data_path}")
290
 
291
- self._generate_importance_matrix(fp16, train_data_path, imatrix_file)
292
- quantize_cmd.extend(["--imatrix", imatrix_file])
293
  else:
294
  print("Not using imatrix quantization.")
295
 
296
- quantized_gguf = f"{outdir}/{gguf_name}"
297
- quantize_cmd.append(fp16)
298
- quantize_cmd.append(quantized_gguf)
299
 
300
  if quant_config.use_imatrix:
301
  quantize_cmd.append(quant_config.imatrix_method)
@@ -310,8 +318,8 @@ class HuggingFaceModelProcessor:
310
  raise GGUFConverterError(f"Error quantizing: {stderr_str}")
311
 
312
  print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
313
- print(f"Quantized model path: {os.path.abspath(quantized_gguf)}")
314
- return quantized_gguf
315
 
316
  def _generate_readme(self, outdir: str, token: str, model_id: str,
317
  new_repo_id: str, gguf_name: str) -> str:
@@ -361,61 +369,55 @@ llama-server --hf-repo "{new_repo_id}" --hf-file "{gguf_name}" -c 4096
361
  card.save(readme_path)
362
  return readme_path
363
 
364
- def process_model(self, model_id: str, quant_config: QuantizationConfig,
365
- split_config: SplitConfig, output_config: OutputConfig,
366
- oauth_token: Optional[gr.OAuthToken]) -> Tuple[str, str]:
367
  """Main method to process a model through the entire pipeline."""
368
  try:
369
- token = self._validate_token(oauth_token)
370
- print(f"Current working directory: {os.path.abspath(os.getcwd())}")
 
371
 
372
- model_name = self._get_model_name(model_id)
373
 
374
- with tempfile.TemporaryDirectory(dir=self.OUTPUT_FOLDER) as outDirObj:
375
- outdir = (
376
- self._create_folder(os.path.join(self.OUTPUT_FOLDER, model_name))
377
- if self.RUN_LOCALLY == "1"
378
- else Path(outDirObj)
379
- )
380
 
381
- fp16 = self._download_base_model(token, model_id, outdir)
382
- quantized_gguf = self._quantize_model(outdir, output_config.filename, fp16, quant_config)
383
 
384
- # Create empty repo
385
- api = HfApi(token=token)
386
- new_repo_url = api.create_repo(
387
- repo_id=output_config.repo_name,
388
- exist_ok=True,
389
- private=output_config.private_repo
390
- )
391
- new_repo_id = new_repo_url.repo_id
392
- print("Repo created successfully!", new_repo_url)
393
-
394
- # Upload model
395
- if split_config.enabled:
396
- print(f"Splitting quantized model: {os.path.abspath(quantized_gguf)}")
397
- self._split_and_upload_model(quantized_gguf, outdir, new_repo_id, token, split_config)
398
- else:
399
- try:
400
- print(f"Uploading quantized model: {os.path.abspath(quantized_gguf)}")
401
- self._upload_file(token, quantized_gguf, output_config.filename, new_repo_id)
402
- except Exception as e:
403
- raise GGUFConverterError(f"Error uploading quantized model: {e}")
404
-
405
- # Upload imatrix if it exists
406
- imatrix_file = f"{outdir}/{model_name}-imatrix.dat"
407
- if os.path.isfile(imatrix_file):
408
- try:
409
- print(f"Uploading imatrix.dat: {os.path.abspath(imatrix_file)}")
410
- self._upload_file(token, imatrix_file, "imatrix.dat", new_repo_id)
411
- except Exception as e:
412
- raise GGUFConverterError(f"Error uploading imatrix.dat: {e}")
413
-
414
- # Upload README.md
415
- readme_path = self._generate_readme(outdir, token, model_id, new_repo_id, output_config.filename)
416
- self._upload_file(token, readme_path, "README.md", new_repo_id)
417
-
418
- print(f"Uploaded successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
419
 
420
  return (
421
  f'<h1>✅ DONE</h1><br/>Find your repo here: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{new_repo_id}</a>',
@@ -743,32 +745,68 @@ class GGUFConverterUI:
743
  embedding_tensor_method: str, leave_output: bool,
744
  quant_output: bool, output_tensor_method: str,
745
  split_model: bool, split_max_tensors, split_max_size: str) -> Tuple[str, str]:
746
- """Wrapper for the process_model method to handle the conversion."""
747
- # Create configuration objects
748
- quant_config = QuantizationConfig(
749
- method=q_method,
750
- use_imatrix=use_imatrix,
751
- imatrix_method=imatrix_q_method,
752
- quant_embedding=quant_embedding,
753
- embedding_tensor_method=embedding_tensor_method,
754
- leave_output=leave_output,
755
- quant_output=quant_output,
756
- output_tensor_method=output_tensor_method
757
- )
 
 
 
 
758
 
759
- split_config = SplitConfig(
760
- enabled=split_model,
761
- max_tensors=split_max_tensors,
762
- max_size=split_max_size
763
- )
764
 
765
- output_config = OutputConfig(
766
- private_repo=private_repo,
767
- repo_name=repo_name,
768
- filename=gguf_name
769
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
770
 
771
- return self.processor.process_model(model_id, quant_config, split_config, output_config, gr.OAuthToken)
772
 
773
  def launch(self):
774
  """Launch the Gradio interface."""
 
5
  from pathlib import Path
6
  from textwrap import dedent
7
  from typing import Optional, Tuple, List, Union
8
+ from dataclasses import dataclass, field
9
 
10
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
11
 
 
26
  leave_output: bool = False
27
  quant_output: bool = False
28
  output_tensor_method: str = "Q8_0"
29
+ # Generated values - These will be set during processing
30
+ fp16_model: str = field(default="", init=False)
31
+ quantized_gguf: str = field(default="", init=False)
32
+ imatrix_file: str = field(default="", init=False)
33
 
34
  @dataclass
35
  class SplitConfig:
 
46
  repo_name: str = ""
47
  filename: str = ""
48
 
49
+ @dataclass
50
+ class ModelProcessingConfig:
51
+ """Configuration for the entire model processing pipeline."""
52
+ token: str
53
+ model_id: str
54
+ model_name: str
55
+ outdir: str
56
+ quant_config: QuantizationConfig
57
+ split_config: SplitConfig
58
+ output_config: OutputConfig
59
 
60
  class GGUFConverterError(Exception):
61
  """Custom exception for GGUF conversion errors."""
 
215
 
216
  print("Sharded model has been uploaded successfully!")
217
 
218
+ def _download_base_model(self, processing_config: ModelProcessingConfig) -> str:
219
  """Download and convert Hugging Face model to GGUF FP16 format."""
220
+ print(f"Downloading model {processing_config.model_name}")
 
 
221
 
222
+ if os.path.exists(processing_config.quant_config.fp16_model):
223
  print("Skipping fp16 conversion...")
224
+ print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
225
+ return processing_config.quant_config.fp16_model
226
 
227
  with tempfile.TemporaryDirectory(dir=self.DOWNLOAD_FOLDER) as tmpdir:
228
+ local_dir = f"{Path(tmpdir)}/{processing_config.model_name}"
229
  print(f"Local directory: {os.path.abspath(local_dir)}")
230
 
231
  # Download model
232
+ api = HfApi(token=processing_config.token)
233
  pattern = (
234
  "*.safetensors"
235
  if any(
236
  file.path.endswith(".safetensors")
237
  for file in api.list_repo_tree(
238
+ repo_id=processing_config.model_id,
239
  recursive=True,
240
  )
241
  )
 
243
  )
244
  dl_pattern = ["*.md", "*.json", "*.model"]
245
  dl_pattern += [pattern]
246
+ api.snapshot_download(repo_id=processing_config.model_id, local_dir=local_dir, allow_patterns=dl_pattern)
247
  print("Model downloaded successfully!")
248
  print(f"Model directory contents: {os.listdir(local_dir)}")
249
 
250
+ config_dir = os.path.join(local_dir, "config.json")
251
+ adapter_config_dir = os.path.join(local_dir, "adapter_config.json")
252
  if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
253
  raise GGUFConverterError(
254
  'adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, '
 
257
  )
258
 
259
  # Convert HF to GGUF
260
+ print(f"Converting to GGUF FP16: {os.path.abspath(processing_config.quant_config.fp16_model)}")
261
  result = subprocess.run(
262
  [
263
  "python3", "/app/convert_hf_to_gguf.py", local_dir,
264
+ "--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
265
  ],
266
  shell=False,
267
  capture_output=True
 
273
  raise GGUFConverterError(f"Error converting to fp16: {stderr_str}")
274
 
275
  print("Model converted to fp16 successfully!")
276
+ print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
277
  return fp16_model
278
 
279
+ def _quantize_model(self, quant_config: QuantizationConfig) -> str:
 
280
  """Quantize the GGUF model."""
281
  quantize_cmd = ["llama-quantize"]
282
 
 
289
  if quant_config.quant_output:
290
  quantize_cmd.extend(["--output-tensor-type", quant_config.output_tensor_method])
291
 
292
+ # Set imatrix file path if needed
 
293
  if quant_config.use_imatrix:
294
  train_data_path = "calibration_data_v5_rc.txt"
295
  print(f"Training data file path: {train_data_path}")
 
297
  if not os.path.isfile(train_data_path):
298
  raise GGUFConverterError(f"Training data file not found: {train_data_path}")
299
 
300
+ self._generate_importance_matrix(quant_config.fp16_model, train_data_path, quant_config.imatrix_file)
301
+ quantize_cmd.extend(["--imatrix", quant_config.imatrix_file])
302
  else:
303
  print("Not using imatrix quantization.")
304
 
305
+ quantize_cmd.append(quant_config.fp16_model)
306
+ quantize_cmd.append(quant_config.quantized_gguf)
 
307
 
308
  if quant_config.use_imatrix:
309
  quantize_cmd.append(quant_config.imatrix_method)
 
318
  raise GGUFConverterError(f"Error quantizing: {stderr_str}")
319
 
320
  print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
321
+ print(f"Quantized model path: {os.path.abspath(quant_config.quantized_gguf)}")
322
+ return quant_config.quantized_gguf
323
 
324
  def _generate_readme(self, outdir: str, token: str, model_id: str,
325
  new_repo_id: str, gguf_name: str) -> str:
 
369
  card.save(readme_path)
370
  return readme_path
371
 
372
+ def process_model(self, processing_config: ModelProcessingConfig) -> Tuple[str, str]:
 
 
373
  """Main method to process a model through the entire pipeline."""
374
  try:
375
+ quant_config = processing_config.quant_config
376
+ split_config = processing_config.split_config
377
+ output_config = processing_config.output_config
378
 
379
+ print(f"Current working directory: {os.path.abspath(os.getcwd())}")
380
 
381
+ # Download and convert base model
382
+ self._download_base_model(processing_config)
 
 
 
 
383
 
384
+ # Quantize the model
385
+ self._quantize_model(quant_config)
386
 
387
+ # Create empty repo
388
+ api = HfApi(token=processing_config.token)
389
+ new_repo_url = api.create_repo(
390
+ repo_id=output_config.repo_name,
391
+ exist_ok=True,
392
+ private=output_config.private_repo
393
+ )
394
+ new_repo_id = new_repo_url.repo_id
395
+ print("Repo created successfully!", new_repo_url)
396
+
397
+ # Upload model
398
+ if split_config.enabled:
399
+ print(f"Splitting quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
400
+ self._split_and_upload_model(quant_config.quantized_gguf, processing_config.outdir, new_repo_id, processing_config.token, split_config)
401
+ else:
402
+ try:
403
+ print(f"Uploading quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
404
+ self._upload_file(processing_config.token, quant_config.quantized_gguf, output_config.filename, new_repo_id)
405
+ except Exception as e:
406
+ raise GGUFConverterError(f"Error uploading quantized model: {e}")
407
+
408
+ # Upload imatrix if it exists
409
+ if quant_config.use_imatrix and os.path.isfile(quant_config.imatrix_file):
410
+ try:
411
+ print(f"Uploading imatrix.dat: {os.path.abspath(quant_config.imatrix_file)}")
412
+ self._upload_file(processing_config.token, quant_config.imatrix_file, "imatrix.dat", new_repo_id)
413
+ except Exception as e:
414
+ raise GGUFConverterError(f"Error uploading imatrix.dat: {e}")
415
+
416
+ # Upload README.md
417
+ readme_path = self._generate_readme(processing_config.outdir, processing_config.token, processing_config.model_id, new_repo_id, output_config.filename)
418
+ self._upload_file(processing_config.token, readme_path, "README.md", new_repo_id)
419
+
420
+ print(f"Uploaded successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
 
421
 
422
  return (
423
  f'<h1>✅ DONE</h1><br/>Find your repo here: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{new_repo_id}</a>',
 
745
  embedding_tensor_method: str, leave_output: bool,
746
  quant_output: bool, output_tensor_method: str,
747
  split_model: bool, split_max_tensors, split_max_size: str) -> Tuple[str, str]:
748
+ """Wrapper for the process_model method to handle the conversion using ModelProcessingConfig."""
749
+ try:
750
+ # Validate token and get token string
751
+ token = self.processor._validate_token(oauth_token)
752
+
753
+ # Create configuration objects
754
+ quant_config = QuantizationConfig(
755
+ method=q_method,
756
+ use_imatrix=use_imatrix,
757
+ imatrix_method=imatrix_q_method,
758
+ quant_embedding=quant_embedding,
759
+ embedding_tensor_method=embedding_tensor_method,
760
+ leave_output=leave_output,
761
+ quant_output=quant_output,
762
+ output_tensor_method=output_tensor_method
763
+ )
764
 
765
+ split_config = SplitConfig(
766
+ enabled=split_model,
767
+ max_tensors=split_max_tensors if isinstance(split_max_tensors, int) else 256,
768
+ max_size=split_max_size
769
+ )
770
 
771
+ output_config = OutputConfig(
772
+ private_repo=private_repo,
773
+ repo_name=repo_name,
774
+ filename=gguf_name
775
+ )
776
+
777
+ model_name = self.processor._get_model_name(model_id)
778
+
779
+ with tempfile.TemporaryDirectory(dir=self.OUTPUT_FOLDER) as outDirObj:
780
+ outdir = (
781
+ self._create_folder(os.path.join(self.OUTPUT_FOLDER, model_name))
782
+ if self.RUN_LOCALLY == "1"
783
+ else Path(outDirObj)
784
+ )
785
+
786
+ quant_config.fp16_model = f"{outdir}/{model_name}-fp16.gguf"
787
+ quant_config.imatrix_file = f"{outdir}/{model_name}-imatrix.dat"
788
+ quant_config.quantized_gguf = f"{outdir}/{gguf_name}"
789
+
790
+ processing_config = ModelProcessingConfig(
791
+ token=token,
792
+ model_id=model_id,
793
+ model_name=model_name,
794
+ outdir=outdir,
795
+ quant_config=quant_config,
796
+ split_config=split_config,
797
+ output_config=output_config
798
+ )
799
+
800
+ # Call the processor's main method with the config object
801
+ return self.processor.process_model(processing_config)
802
+
803
+ except GGUFConverterError as e:
804
+ print(f"Error in wrapper: {e}")
805
+ return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{self.processor._escape_html(str(e))}</pre>', "error.png")
806
+ except Exception as e:
807
+ print(f"Unexpected error in wrapper: {e}")
808
+ return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{self.processor._escape_html(str(e))}</pre>', "error.png")
809
 
 
810
 
811
  def launch(self):
812
  """Launch the Gradio interface."""