Oleg Shulyakov commited on
Commit
239afdd
·
1 Parent(s): dd433e4

subprocess.Run

Browse files
Files changed (1) hide show
  1. app.py +45 -27
app.py CHANGED
@@ -31,6 +31,7 @@ class QuantizationConfig:
31
  quantized_gguf: str = field(default="", init=False)
32
  imatrix_file: str = field(default="", init=False)
33
 
 
34
  @dataclass
35
  class SplitConfig:
36
  """Configuration for model splitting."""
@@ -46,6 +47,7 @@ class OutputConfig:
46
  repo_name: str = ""
47
  filename: str = ""
48
 
 
49
  @dataclass
50
  class ModelProcessingConfig:
51
  """Configuration for the entire model processing pipeline."""
@@ -60,6 +62,7 @@ class ModelProcessingConfig:
60
  new_repo_url: str = field(default="", init=False)
61
  new_repo_id: str = field(default="", init=False)
62
 
 
63
  class GGUFConverterError(Exception):
64
  """Custom exception for GGUF conversion errors."""
65
  pass
@@ -143,9 +146,10 @@ class HuggingFaceModelProcessor:
143
  train_data_path = self.CALIBRATION_FILE
144
  if not os.path.isfile(train_data_path):
145
  raise GGUFConverterError(f"Training data file not found: {train_data_path}")
146
- print(f"Training data file path: {train_data_path}")
147
 
 
148
  print("Running imatrix command...")
 
149
  imatrix_command = [
150
  "llama-imatrix",
151
  "-m", quant_config.fp16_model,
@@ -157,16 +161,19 @@ class HuggingFaceModelProcessor:
157
 
158
  process = subprocess.Popen(imatrix_command, shell=False)
159
  try:
160
- process.wait(timeout=60)
161
  except subprocess.TimeoutExpired:
162
  print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
163
  process.send_signal(signal.SIGINT)
164
  try:
165
  process.wait(timeout=5)
166
  except subprocess.TimeoutExpired:
167
- print("Imatrix proc still didn't term. Forecfully terming process...")
168
  process.kill()
169
 
 
 
 
170
  print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
171
 
172
  def _split_and_upload_model(self, processing_config: ModelProcessingConfig) -> None:
@@ -188,14 +195,16 @@ class HuggingFaceModelProcessor:
188
  split_cmd.extend([quant_config.quantized_gguf, model_path_prefix])
189
 
190
  print(f"Split command: {split_cmd}")
191
- result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
192
-
193
- print(f"Split command stdout: {result.stdout}")
194
- print(f"Split command stderr: {result.stderr}")
 
 
 
195
 
196
- if result.returncode != 0:
197
- stderr_str = result.stderr.decode("utf-8")
198
- raise GGUFConverterError(f"Error splitting the model: {stderr_str}")
199
 
200
  print("Model split successfully!")
201
 
@@ -215,6 +224,7 @@ class HuggingFaceModelProcessor:
215
  raise GGUFConverterError("No sharded files found.")
216
 
217
  print(f"Sharded model files: {sharded_model_files}")
 
218
  for file in sharded_model_files:
219
  file_path = os.path.join(processing_config.outdir, file)
220
  try:
@@ -268,19 +278,20 @@ class HuggingFaceModelProcessor:
268
 
269
  # Convert HF to GGUF
270
  print(f"Converting to GGUF FP16: {os.path.abspath(processing_config.quant_config.fp16_model)}")
271
- result = subprocess.run(
272
- [
273
- "python3", "/app/convert_hf_to_gguf.py", local_dir,
274
- "--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
275
- ],
276
- shell=False,
277
- capture_output=True
278
- )
 
 
 
279
 
280
- print(f"Model directory contents: {result}")
281
- if result.returncode != 0:
282
- stderr_str = result.stderr.decode("utf-8")
283
- raise GGUFConverterError(f"Error converting to fp16: {stderr_str}")
284
 
285
  print("Model converted to fp16 successfully!")
286
  print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
@@ -315,11 +326,18 @@ class HuggingFaceModelProcessor:
315
  quantize_cmd.append(quant_config.method)
316
 
317
  print(f"Quantizing model with {quantize_cmd}")
318
- result = subprocess.run(quantize_cmd, shell=False, capture_output=True)
319
 
320
- if result.returncode != 0:
321
- stderr_str = result.stderr.decode("utf-8")
322
- raise GGUFConverterError(f"Error quantizing: {stderr_str}")
 
 
 
 
 
 
 
 
323
 
324
  print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
325
  print(f"Quantized model path: {os.path.abspath(quant_config.quantized_gguf)}")
@@ -338,7 +356,7 @@ class HuggingFaceModelProcessor:
338
 
339
  return new_repo_url
340
 
341
- def _generate_readme(self, processing_config :ModelProcessingConfig) -> str:
342
  """Generate README.md for the quantized model."""
343
  creator = self._get_model_creator(processing_config.model_id)
344
  username = whoami(processing_config.token)["name"]
 
31
  quantized_gguf: str = field(default="", init=False)
32
  imatrix_file: str = field(default="", init=False)
33
 
34
+
35
  @dataclass
36
  class SplitConfig:
37
  """Configuration for model splitting."""
 
47
  repo_name: str = ""
48
  filename: str = ""
49
 
50
+
51
  @dataclass
52
  class ModelProcessingConfig:
53
  """Configuration for the entire model processing pipeline."""
 
62
  new_repo_url: str = field(default="", init=False)
63
  new_repo_id: str = field(default="", init=False)
64
 
65
+
66
  class GGUFConverterError(Exception):
67
  """Custom exception for GGUF conversion errors."""
68
  pass
 
146
  train_data_path = self.CALIBRATION_FILE
147
  if not os.path.isfile(train_data_path):
148
  raise GGUFConverterError(f"Training data file not found: {train_data_path}")
 
149
 
150
+ print(f"Training data file path: {train_data_path}")
151
  print("Running imatrix command...")
152
+
153
  imatrix_command = [
154
  "llama-imatrix",
155
  "-m", quant_config.fp16_model,
 
161
 
162
  process = subprocess.Popen(imatrix_command, shell=False)
163
  try:
164
+ process.wait(timeout=300)
165
  except subprocess.TimeoutExpired:
166
  print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
167
  process.send_signal(signal.SIGINT)
168
  try:
169
  process.wait(timeout=5)
170
  except subprocess.TimeoutExpired:
171
+ print("Imatrix proc still didn't term. Forcefully terminating process...")
172
  process.kill()
173
 
174
+ if process.returncode != 0:
175
+ raise GGUFConverterError(f"Error generating imatrix")
176
+
177
  print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
178
 
179
  def _split_and_upload_model(self, processing_config: ModelProcessingConfig) -> None:
 
195
  split_cmd.extend([quant_config.quantized_gguf, model_path_prefix])
196
 
197
  print(f"Split command: {split_cmd}")
198
+ process = subprocess.Popen(split_cmd, shell=False)
199
+ try:
200
+ process.wait(timeout=300)
201
+ except subprocess.TimeoutExpired:
202
+ print("Splitting timed out. Killing process...")
203
+ process.kill()
204
+ raise GGUFConverterError("Error splitting the model: Operation timed out.")
205
 
206
+ if process.returncode != 0:
207
+ raise GGUFConverterError(f"Error splitting the model")
 
208
 
209
  print("Model split successfully!")
210
 
 
224
  raise GGUFConverterError("No sharded files found.")
225
 
226
  print(f"Sharded model files: {sharded_model_files}")
227
+
228
  for file in sharded_model_files:
229
  file_path = os.path.join(processing_config.outdir, file)
230
  try:
 
278
 
279
  # Convert HF to GGUF
280
  print(f"Converting to GGUF FP16: {os.path.abspath(processing_config.quant_config.fp16_model)}")
281
+ convert_command = [
282
+ "python3", "/app/convert_hf_to_gguf.py", local_dir,
283
+ "--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
284
+ ]
285
+ process = subprocess.Popen(convert_command, shell=False)
286
+ try:
287
+ process.wait(timeout=600)
288
+ except subprocess.TimeoutExpired:
289
+ print("Conversion timed out. Killing process...")
290
+ process.kill()
291
+ raise GGUFConverterError("Error converting to fp16: Operation timed out.")
292
 
293
+ if process.returncode != 0:
294
+ raise GGUFConverterError(f"Error converting to fp16")
 
 
295
 
296
  print("Model converted to fp16 successfully!")
297
  print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
 
326
  quantize_cmd.append(quant_config.method)
327
 
328
  print(f"Quantizing model with {quantize_cmd}")
 
329
 
330
+ # Use Popen for quantization
331
+ process = subprocess.Popen(quantize_cmd, shell=False)
332
+ try:
333
+ process.wait(timeout=3600)
334
+ except subprocess.TimeoutExpired:
335
+ print("Quantization timed out. Killing process...")
336
+ process.kill()
337
+ raise GGUFConverterError("Error quantizing: Operation timed out.")
338
+
339
+ if process.returncode != 0:
340
+ raise GGUFConverterError(f"Error quantizing")
341
 
342
  print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
343
  print(f"Quantized model path: {os.path.abspath(quant_config.quantized_gguf)}")
 
356
 
357
  return new_repo_url
358
 
359
+ def _generate_readme(self, processing_config: ModelProcessingConfig) -> str:
360
  """Generate README.md for the quantized model."""
361
  creator = self._get_model_creator(processing_config.model_id)
362
  username = whoami(processing_config.token)["name"]