Oleg Shulyakov commited on
Commit
1147115
·
1 Parent(s): e7b8f47

Update _split_and_upload_model

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -169,11 +169,13 @@ class HuggingFaceModelProcessor:
169
 
170
  print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
171
 
172
- def _split_and_upload_model(self, model_path: str, outdir: str, repo_id: str, token: str,
173
- split_config: SplitConfig) -> None:
174
  """Split large model files and upload shards."""
175
- print(f"Model path: {model_path}")
176
- print(f"Output dir: {outdir}")
 
 
 
177
 
178
  split_cmd = ["llama-gguf-split", "--split"]
179
 
@@ -182,8 +184,8 @@ class HuggingFaceModelProcessor:
182
  else:
183
  split_cmd.extend(["--split-max-tensors", str(split_config.max_tensors)])
184
 
185
- model_path_prefix = '.'.join(model_path.split('.')[:-1])
186
- split_cmd.extend([model_path, model_path_prefix])
187
 
188
  print(f"Split command: {split_cmd}")
189
  result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
@@ -198,14 +200,14 @@ class HuggingFaceModelProcessor:
198
  print("Model split successfully!")
199
 
200
  # Remove original model file
201
- if os.path.exists(model_path):
202
- os.remove(model_path)
203
 
204
  model_file_prefix = model_path_prefix.split('/')[-1]
205
  print(f"Model file name prefix: {model_file_prefix}")
206
 
207
  sharded_model_files = [
208
- f for f in os.listdir(outdir)
209
  if f.startswith(model_file_prefix) and f.endswith(".gguf")
210
  ]
211
 
@@ -214,10 +216,10 @@ class HuggingFaceModelProcessor:
214
 
215
  print(f"Sharded model files: {sharded_model_files}")
216
  for file in sharded_model_files:
217
- file_path = os.path.join(outdir, file)
218
  try:
219
  print(f"Uploading file: {file_path}")
220
- self._upload_file(token, file_path, file, repo_id)
221
  except Exception as e:
222
  raise GGUFConverterError(f"Error uploading file {file_path}: {e}")
223
 
@@ -402,7 +404,7 @@ llama-server --hf-repo "{processing_config.new_repo_id}" --hf-file "{processing_
402
  # Upload model
403
  if split_config.enabled:
404
  print(f"Splitting quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
405
- self._split_and_upload_model(quant_config.quantized_gguf, processing_config.outdir, processing_config.new_repo_id, processing_config.token, split_config)
406
  else:
407
  try:
408
  print(f"Uploading quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
 
169
 
170
  print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
171
 
172
+ def _split_and_upload_model(self, processing_config: ModelProcessingConfig) -> None:
 
173
  """Split large model files and upload shards."""
174
+ quant_config = processing_config.quant_config
175
+ split_config = processing_config.split_config
176
+
177
+ print(f"Model path: {quant_config.quantized_gguf}")
178
+ print(f"Output dir: {processing_config.outdir}")
179
 
180
  split_cmd = ["llama-gguf-split", "--split"]
181
 
 
184
  else:
185
  split_cmd.extend(["--split-max-tensors", str(split_config.max_tensors)])
186
 
187
+ model_path_prefix = '.'.join(quant_config.quantized_gguf.split('.')[:-1])
188
+ split_cmd.extend([quant_config.quantized_gguf, model_path_prefix])
189
 
190
  print(f"Split command: {split_cmd}")
191
  result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
 
200
  print("Model split successfully!")
201
 
202
  # Remove original model file
203
+ if os.path.exists(quant_config.quantized_gguf):
204
+ os.remove(quant_config.quantized_gguf)
205
 
206
  model_file_prefix = model_path_prefix.split('/')[-1]
207
  print(f"Model file name prefix: {model_file_prefix}")
208
 
209
  sharded_model_files = [
210
+ f for f in os.listdir(processing_config.outdir)
211
  if f.startswith(model_file_prefix) and f.endswith(".gguf")
212
  ]
213
 
 
216
 
217
  print(f"Sharded model files: {sharded_model_files}")
218
  for file in sharded_model_files:
219
+ file_path = os.path.join(processing_config.outdir, file)
220
  try:
221
  print(f"Uploading file: {file_path}")
222
+ self._upload_file(processing_config, file_path, file)
223
  except Exception as e:
224
  raise GGUFConverterError(f"Error uploading file {file_path}: {e}")
225
 
 
404
  # Upload model
405
  if split_config.enabled:
406
  print(f"Splitting quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
407
+ self._split_and_upload_model(processing_config)
408
  else:
409
  try:
410
  print(f"Uploading quantized model: {os.path.abspath(quant_config.quantized_gguf)}")