da03 commited on
Commit
9e3f8c4
·
1 Parent(s): 94fd315
Files changed (1) hide show
  1. sync_train_dataset.py +45 -1
sync_train_dataset.py CHANGED
@@ -336,6 +336,45 @@ def transfer_csv_file(sftp):
336
  return False
337
 
338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  def run_transfer_cycle():
340
  """Run a complete transfer cycle."""
341
  client = None
@@ -344,6 +383,11 @@ def run_transfer_cycle():
344
  client = create_ssh_client()
345
  sftp = client.open_sftp()
346
 
 
 
 
 
 
347
  # Step 1: Transfer TAR files
348
  tar_count = transfer_tar_files(sftp)
349
 
@@ -360,7 +404,7 @@ def run_transfer_cycle():
360
  logger.warning("Skipping CSV transfer because PKL transfer failed")
361
  csv_success = False
362
 
363
- return tar_count > 0 or pkl_success or csv_success
364
  except Exception as e:
365
  logger.error(f"Error in transfer cycle: {str(e)}")
366
  return False
 
336
  return False
337
 
338
 
339
+ def transfer_padding_file(sftp):
340
+ """Transfer the padding.npy file if it hasn't been transferred yet or has changed."""
341
+ padding_file = "padding.npy"
342
+ remote_path = os.path.join(REMOTE_DATA_DIR, padding_file)
343
+ local_path = os.path.join(LOCAL_DATA_DIR, padding_file)
344
+
345
+ try:
346
+ # Check if file exists
347
+ try:
348
+ stat = sftp.stat(remote_path)
349
+ except FileNotFoundError:
350
+ logger.warning(f"Padding file {remote_path} not found")
351
+ return False
352
+
353
+ # Skip if already transferred with same size and mtime
354
+ if is_file_transferred(padding_file, stat.st_size, stat.st_mtime):
355
+ logger.debug(f"Skipping already transferred padding file (unchanged)")
356
+ return True
357
+
358
+ # Check if file is stable
359
+ is_stable, updated_stat = is_file_stable(sftp, remote_path)
360
+ if not is_stable:
361
+ logger.info(f"Padding file is still being written to, skipping")
362
+ return False
363
+
364
+ # Transfer the file
365
+ checksum = safe_transfer_file(sftp, remote_path, local_path)
366
+ mark_file_transferred(padding_file, updated_stat.st_size, updated_stat.st_mtime, checksum)
367
+
368
+ # Update state
369
+ update_transfer_state("last_padding_transfer", datetime.now().isoformat())
370
+
371
+ logger.info(f"Successfully transferred padding.npy file")
372
+ return True
373
+ except Exception as e:
374
+ logger.error(f"Error transferring padding file: {str(e)}")
375
+ return False
376
+
377
+
378
  def run_transfer_cycle():
379
  """Run a complete transfer cycle."""
380
  client = None
 
383
  client = create_ssh_client()
384
  sftp = client.open_sftp()
385
 
386
+ # Step 0: Transfer padding.npy file (critical for model operation)
387
+ padding_success = transfer_padding_file(sftp)
388
+ if not padding_success:
389
+ logger.warning("Failed to transfer padding.npy file, but continuing with other transfers")
390
+
391
  # Step 1: Transfer TAR files
392
  tar_count = transfer_tar_files(sftp)
393
 
 
404
  logger.warning("Skipping CSV transfer because PKL transfer failed")
405
  csv_success = False
406
 
407
+ return padding_success or tar_count > 0 or pkl_success or csv_success
408
  except Exception as e:
409
  logger.error(f"Error in transfer cycle: {str(e)}")
410
  return False