Spaces:
Running
Running
Oleg Shulyakov
commited on
Commit
·
1147115
1
Parent(s):
e7b8f47
Update _split_and_upload_model
Browse files
app.py
CHANGED
@@ -169,11 +169,13 @@ class HuggingFaceModelProcessor:
|
|
169 |
|
170 |
print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
|
171 |
|
172 |
-
def _split_and_upload_model(self,
|
173 |
-
split_config: SplitConfig) -> None:
|
174 |
"""Split large model files and upload shards."""
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
177 |
|
178 |
split_cmd = ["llama-gguf-split", "--split"]
|
179 |
|
@@ -182,8 +184,8 @@ class HuggingFaceModelProcessor:
|
|
182 |
else:
|
183 |
split_cmd.extend(["--split-max-tensors", str(split_config.max_tensors)])
|
184 |
|
185 |
-
model_path_prefix = '.'.join(
|
186 |
-
split_cmd.extend([
|
187 |
|
188 |
print(f"Split command: {split_cmd}")
|
189 |
result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
|
@@ -198,14 +200,14 @@ class HuggingFaceModelProcessor:
|
|
198 |
print("Model split successfully!")
|
199 |
|
200 |
# Remove original model file
|
201 |
-
if os.path.exists(
|
202 |
-
os.remove(
|
203 |
|
204 |
model_file_prefix = model_path_prefix.split('/')[-1]
|
205 |
print(f"Model file name prefix: {model_file_prefix}")
|
206 |
|
207 |
sharded_model_files = [
|
208 |
-
f for f in os.listdir(outdir)
|
209 |
if f.startswith(model_file_prefix) and f.endswith(".gguf")
|
210 |
]
|
211 |
|
@@ -214,10 +216,10 @@ class HuggingFaceModelProcessor:
|
|
214 |
|
215 |
print(f"Sharded model files: {sharded_model_files}")
|
216 |
for file in sharded_model_files:
|
217 |
-
file_path = os.path.join(outdir, file)
|
218 |
try:
|
219 |
print(f"Uploading file: {file_path}")
|
220 |
-
self._upload_file(
|
221 |
except Exception as e:
|
222 |
raise GGUFConverterError(f"Error uploading file {file_path}: {e}")
|
223 |
|
@@ -402,7 +404,7 @@ llama-server --hf-repo "{processing_config.new_repo_id}" --hf-file "{processing_
|
|
402 |
# Upload model
|
403 |
if split_config.enabled:
|
404 |
print(f"Splitting quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
|
405 |
-
self._split_and_upload_model(
|
406 |
else:
|
407 |
try:
|
408 |
print(f"Uploading quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
|
|
|
169 |
|
170 |
print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
|
171 |
|
172 |
+
def _split_and_upload_model(self, processing_config: ModelProcessingConfig) -> None:
|
|
|
173 |
"""Split large model files and upload shards."""
|
174 |
+
quant_config = processing_config.quant_config
|
175 |
+
split_config = processing_config.split_config
|
176 |
+
|
177 |
+
print(f"Model path: {quant_config.quantized_gguf}")
|
178 |
+
print(f"Output dir: {processing_config.outdir}")
|
179 |
|
180 |
split_cmd = ["llama-gguf-split", "--split"]
|
181 |
|
|
|
184 |
else:
|
185 |
split_cmd.extend(["--split-max-tensors", str(split_config.max_tensors)])
|
186 |
|
187 |
+
model_path_prefix = '.'.join(quant_config.quantized_gguf.split('.')[:-1])
|
188 |
+
split_cmd.extend([quant_config.quantized_gguf, model_path_prefix])
|
189 |
|
190 |
print(f"Split command: {split_cmd}")
|
191 |
result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
|
|
|
200 |
print("Model split successfully!")
|
201 |
|
202 |
# Remove original model file
|
203 |
+
if os.path.exists(quant_config.quantized_gguf):
|
204 |
+
os.remove(quant_config.quantized_gguf)
|
205 |
|
206 |
model_file_prefix = model_path_prefix.split('/')[-1]
|
207 |
print(f"Model file name prefix: {model_file_prefix}")
|
208 |
|
209 |
sharded_model_files = [
|
210 |
+
f for f in os.listdir(processing_config.outdir)
|
211 |
if f.startswith(model_file_prefix) and f.endswith(".gguf")
|
212 |
]
|
213 |
|
|
|
216 |
|
217 |
print(f"Sharded model files: {sharded_model_files}")
|
218 |
for file in sharded_model_files:
|
219 |
+
file_path = os.path.join(processing_config.outdir, file)
|
220 |
try:
|
221 |
print(f"Uploading file: {file_path}")
|
222 |
+
self._upload_file(processing_config, file_path, file)
|
223 |
except Exception as e:
|
224 |
raise GGUFConverterError(f"Error uploading file {file_path}: {e}")
|
225 |
|
|
|
404 |
# Upload model
|
405 |
if split_config.enabled:
|
406 |
print(f"Splitting quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
|
407 |
+
self._split_and_upload_model(processing_config)
|
408 |
else:
|
409 |
try:
|
410 |
print(f"Uploading quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
|