Spaces:
Running
Running
Oleg Shulyakov
commited on
Commit
·
2ae55e9
1
Parent(s):
239afdd
Process timeouts
Browse files
app.py
CHANGED
@@ -159,9 +159,9 @@ class HuggingFaceModelProcessor:
|
|
159 |
"-o", quant_config.imatrix_file,
|
160 |
]
|
161 |
|
162 |
-
process = subprocess.Popen(imatrix_command, shell=False)
|
163 |
try:
|
164 |
-
process.wait(timeout=
|
165 |
except subprocess.TimeoutExpired:
|
166 |
print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
|
167 |
process.send_signal(signal.SIGINT)
|
@@ -170,9 +170,10 @@ class HuggingFaceModelProcessor:
|
|
170 |
except subprocess.TimeoutExpired:
|
171 |
print("Imatrix proc still didn't term. Forcefully terminating process...")
|
172 |
process.kill()
|
|
|
173 |
|
174 |
if process.returncode != 0:
|
175 |
-
|
176 |
|
177 |
print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
|
178 |
|
@@ -195,16 +196,21 @@ class HuggingFaceModelProcessor:
|
|
195 |
split_cmd.extend([quant_config.quantized_gguf, model_path_prefix])
|
196 |
|
197 |
print(f"Split command: {split_cmd}")
|
198 |
-
process = subprocess.Popen(split_cmd, shell=False)
|
199 |
try:
|
200 |
process.wait(timeout=300)
|
201 |
except subprocess.TimeoutExpired:
|
202 |
-
print("Splitting timed out.
|
203 |
-
process.
|
|
|
|
|
|
|
|
|
|
|
204 |
raise GGUFConverterError("Error splitting the model: Operation timed out.")
|
205 |
|
206 |
if process.returncode != 0:
|
207 |
-
raise GGUFConverterError(f"Error splitting the model")
|
208 |
|
209 |
print("Model split successfully!")
|
210 |
|
@@ -282,16 +288,21 @@ class HuggingFaceModelProcessor:
|
|
282 |
"python3", "/app/convert_hf_to_gguf.py", local_dir,
|
283 |
"--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
|
284 |
]
|
285 |
-
process = subprocess.Popen(convert_command, shell=False)
|
286 |
try:
|
287 |
process.wait(timeout=600)
|
288 |
except subprocess.TimeoutExpired:
|
289 |
-
print("Conversion timed out.
|
290 |
-
process.
|
|
|
|
|
|
|
|
|
|
|
291 |
raise GGUFConverterError("Error converting to fp16: Operation timed out.")
|
292 |
|
293 |
if process.returncode != 0:
|
294 |
-
raise GGUFConverterError(f"Error converting to fp16")
|
295 |
|
296 |
print("Model converted to fp16 successfully!")
|
297 |
print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
|
@@ -328,16 +339,21 @@ class HuggingFaceModelProcessor:
|
|
328 |
print(f"Quantizing model with {quantize_cmd}")
|
329 |
|
330 |
# Use Popen for quantization
|
331 |
-
process = subprocess.Popen(quantize_cmd, shell=False)
|
332 |
try:
|
333 |
process.wait(timeout=3600)
|
334 |
except subprocess.TimeoutExpired:
|
335 |
-
print("Quantization timed out.
|
336 |
-
process.
|
|
|
|
|
|
|
|
|
|
|
337 |
raise GGUFConverterError("Error quantizing: Operation timed out.")
|
338 |
|
339 |
if process.returncode != 0:
|
340 |
-
raise GGUFConverterError(f"Error quantizing")
|
341 |
|
342 |
print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
|
343 |
print(f"Quantized model path: {os.path.abspath(quant_config.quantized_gguf)}")
|
|
|
159 |
"-o", quant_config.imatrix_file,
|
160 |
]
|
161 |
|
162 |
+
process = subprocess.Popen(imatrix_command, shell=False, stderr=subprocess.STDOUT)
|
163 |
try:
|
164 |
+
process.wait(timeout=600)
|
165 |
except subprocess.TimeoutExpired:
|
166 |
print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
|
167 |
process.send_signal(signal.SIGINT)
|
|
|
170 |
except subprocess.TimeoutExpired:
|
171 |
print("Imatrix proc still didn't term. Forcefully terminating process...")
|
172 |
process.kill()
|
173 |
+
raise GGUFConverterError("Error generating imatrix: Operation timed out.")
|
174 |
|
175 |
if process.returncode != 0:
|
176 |
+
raise GGUFConverterError(f"Error generating imatrix: code={process.returncode}.")
|
177 |
|
178 |
print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
|
179 |
|
|
|
196 |
split_cmd.extend([quant_config.quantized_gguf, model_path_prefix])
|
197 |
|
198 |
print(f"Split command: {split_cmd}")
|
199 |
+
process = subprocess.Popen(split_cmd, shell=False, stderr=subprocess.STDOUT)
|
200 |
try:
|
201 |
process.wait(timeout=300)
|
202 |
except subprocess.TimeoutExpired:
|
203 |
+
print("Splitting timed out. Sending SIGINT to allow graceful termination...")
|
204 |
+
process.send_signal(signal.SIGINT)
|
205 |
+
try:
|
206 |
+
process.wait(timeout=5)
|
207 |
+
except subprocess.TimeoutExpired:
|
208 |
+
print("Splitting timed out. Killing process...")
|
209 |
+
process.kill()
|
210 |
raise GGUFConverterError("Error splitting the model: Operation timed out.")
|
211 |
|
212 |
if process.returncode != 0:
|
213 |
+
raise GGUFConverterError(f"Error splitting the model: code={process.returncode}")
|
214 |
|
215 |
print("Model split successfully!")
|
216 |
|
|
|
288 |
"python3", "/app/convert_hf_to_gguf.py", local_dir,
|
289 |
"--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
|
290 |
]
|
291 |
+
process = subprocess.Popen(convert_command, shell=False, stderr=subprocess.STDOUT)
|
292 |
try:
|
293 |
process.wait(timeout=600)
|
294 |
except subprocess.TimeoutExpired:
|
295 |
+
print("Conversion timed out. Sending SIGINT to allow graceful termination...")
|
296 |
+
process.send_signal(signal.SIGINT)
|
297 |
+
try:
|
298 |
+
process.wait(timeout=5)
|
299 |
+
except subprocess.TimeoutExpired:
|
300 |
+
print("Conversion timed out. Killing process...")
|
301 |
+
process.kill()
|
302 |
raise GGUFConverterError("Error converting to fp16: Operation timed out.")
|
303 |
|
304 |
if process.returncode != 0:
|
305 |
+
raise GGUFConverterError(f"Error converting to fp16: code={process.returncode}")
|
306 |
|
307 |
print("Model converted to fp16 successfully!")
|
308 |
print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
|
|
|
339 |
print(f"Quantizing model with {quantize_cmd}")
|
340 |
|
341 |
# Use Popen for quantization
|
342 |
+
process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
|
343 |
try:
|
344 |
process.wait(timeout=3600)
|
345 |
except subprocess.TimeoutExpired:
|
346 |
+
print("Quantization timed out. Sending SIGINT to allow graceful termination...")
|
347 |
+
process.send_signal(signal.SIGINT)
|
348 |
+
try:
|
349 |
+
process.wait(timeout=5)
|
350 |
+
except subprocess.TimeoutExpired:
|
351 |
+
print("Quantization timed out. Killing process...")
|
352 |
+
process.kill()
|
353 |
raise GGUFConverterError("Error quantizing: Operation timed out.")
|
354 |
|
355 |
if process.returncode != 0:
|
356 |
+
raise GGUFConverterError(f"Error quantizing: code={process.returncode}")
|
357 |
|
358 |
print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
|
359 |
print(f"Quantized model path: {os.path.abspath(quant_config.quantized_gguf)}")
|