nbugs commited on
Commit
5e83f82
·
verified ·
1 Parent(s): ce6710a

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +18 -21
sync_data.sh CHANGED
@@ -1,4 +1,5 @@
1
  #!/bin/sh
 
2
  # 检查环境变量
3
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
4
  echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
@@ -9,35 +10,30 @@ fi
9
  # 激活虚拟环境
10
  . /opt/venv/bin/activate
11
 
12
- # 上传备份(含自动清理旧备份)
13
  upload_backup() {
14
  file_path="$1"
15
  file_name="$2"
 
16
  python3 -c "
17
  from huggingface_hub import HfApi
18
  import sys
19
  import os
20
-
21
  def manage_backups(api, repo_id, max_files=50):
22
  files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
23
  backup_files = [f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')]
24
  backup_files.sort()
 
25
  if len(backup_files) >= max_files:
26
- files_to_delete = backup_files[:len(backup_files) - max_files + 1]
27
  for file_to_delete in files_to_delete:
28
  try:
29
- api.delete_file(
30
- path_in_repo=file_to_delete,
31
- repo_id=repo_id,
32
- repo_type='dataset'
33
- )
34
  print(f'Deleted old backup: {file_to_delete}')
35
  except Exception as e:
36
  print(f'Error deleting {file_to_delete}: {str(e)}')
37
-
38
  api = HfApi(token='$HF_TOKEN')
39
  try:
40
- # 上传新备份
41
  api.upload_file(
42
  path_or_fileobj='$file_path',
43
  path_in_repo='$file_name',
@@ -46,14 +42,13 @@ try:
46
  )
47
  print(f'Successfully uploaded $file_name')
48
 
49
- # 清理旧备份
50
  manage_backups(api, '$DATASET_ID')
51
  except Exception as e:
52
  print(f'Error uploading file: {str(e)}')
53
  "
54
  }
55
 
56
- # 下载最新备份(补充完整)
57
  download_latest_backup() {
58
  python3 -c "
59
  from huggingface_hub import HfApi
@@ -61,15 +56,17 @@ import sys
61
  import os
62
  import tarfile
63
  import tempfile
64
-
65
  api = HfApi(token='$HF_TOKEN')
66
  try:
67
  files = api.list_repo_files(repo_id='$DATASET_ID', repo_type='dataset')
68
  backup_files = [f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')]
 
69
  if not backup_files:
70
  print('No backup files found')
71
  sys.exit()
 
72
  latest_backup = sorted(backup_files)[-1]
 
73
  with tempfile.TemporaryDirectory() as temp_dir:
74
  filepath = api.hf_hub_download(
75
  repo_id='$DATASET_ID',
@@ -77,17 +74,12 @@ try:
77
  repo_type='dataset',
78
  local_dir=temp_dir
79
  )
 
80
  if filepath and os.path.exists(filepath):
81
- # 确保目标目录存在
82
- os.makedirs('/app/electerm-web/data', exist_ok=True)
83
- # 清空旧数据(防止残留文件干扰)
84
- if os.listdir('/app/electerm-web/data'):
85
- for f in os.listdir('/app/electerm-web/data'):
86
- os.remove(os.path.join('/app/electerm-web/data', f))
87
- # 解压备份
88
  with tarfile.open(filepath, 'r:gz') as tar:
89
  tar.extractall('/app/electerm-web/data')
90
  print(f'Successfully restored backup from {latest_backup}')
 
91
  except Exception as e:
92
  print(f'Error downloading backup: {str(e)}')
93
  "
@@ -101,17 +93,22 @@ download_latest_backup
101
  sync_data() {
102
  while true; do
103
  echo "Starting sync process at $(date)"
 
104
  if [ -d /app/electerm-web/data ]; then
105
  timestamp=$(date +%Y%m%d_%H%M%S)
106
  backup_file="electerm_backup_${timestamp}.tar.gz"
107
- # 压缩数据目录(排除临时文件)
 
108
  tar -czf "/tmp/${backup_file}" -C /app/electerm-web/data .
 
109
  echo "Uploading backup to HuggingFace..."
110
  upload_backup "/tmp/${backup_file}" "${backup_file}"
 
111
  rm -f "/tmp/${backup_file}"
112
  else
113
  echo "Data directory does not exist yet, waiting for next sync..."
114
  fi
 
115
  SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
116
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
117
  sleep $SYNC_INTERVAL
 
1
  #!/bin/sh
2
+
3
  # 检查环境变量
4
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
5
  echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
 
10
  # 激活虚拟环境
11
  . /opt/venv/bin/activate
12
 
13
+ # 上传备份
14
  upload_backup() {
15
  file_path="$1"
16
  file_name="$2"
17
+
18
  python3 -c "
19
  from huggingface_hub import HfApi
20
  import sys
21
  import os
 
22
  def manage_backups(api, repo_id, max_files=50):
23
  files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
24
  backup_files = [f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')]
25
  backup_files.sort()
26
+
27
  if len(backup_files) >= max_files:
28
+ files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
29
  for file_to_delete in files_to_delete:
30
  try:
31
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
 
 
 
 
32
  print(f'Deleted old backup: {file_to_delete}')
33
  except Exception as e:
34
  print(f'Error deleting {file_to_delete}: {str(e)}')
 
35
  api = HfApi(token='$HF_TOKEN')
36
  try:
 
37
  api.upload_file(
38
  path_or_fileobj='$file_path',
39
  path_in_repo='$file_name',
 
42
  )
43
  print(f'Successfully uploaded $file_name')
44
 
 
45
  manage_backups(api, '$DATASET_ID')
46
  except Exception as e:
47
  print(f'Error uploading file: {str(e)}')
48
  "
49
  }
50
 
51
+ # 下载最新备份
52
  download_latest_backup() {
53
  python3 -c "
54
  from huggingface_hub import HfApi
 
56
  import os
57
  import tarfile
58
  import tempfile
 
59
  api = HfApi(token='$HF_TOKEN')
60
  try:
61
  files = api.list_repo_files(repo_id='$DATASET_ID', repo_type='dataset')
62
  backup_files = [f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')]
63
+
64
  if not backup_files:
65
  print('No backup files found')
66
  sys.exit()
67
+
68
  latest_backup = sorted(backup_files)[-1]
69
+
70
  with tempfile.TemporaryDirectory() as temp_dir:
71
  filepath = api.hf_hub_download(
72
  repo_id='$DATASET_ID',
 
74
  repo_type='dataset',
75
  local_dir=temp_dir
76
  )
77
+
78
  if filepath and os.path.exists(filepath):
 
 
 
 
 
 
 
79
  with tarfile.open(filepath, 'r:gz') as tar:
80
  tar.extractall('/app/electerm-web/data')
81
  print(f'Successfully restored backup from {latest_backup}')
82
+
83
  except Exception as e:
84
  print(f'Error downloading backup: {str(e)}')
85
  "
 
93
  sync_data() {
94
  while true; do
95
  echo "Starting sync process at $(date)"
96
+
97
  if [ -d /app/electerm-web/data ]; then
98
  timestamp=$(date +%Y%m%d_%H%M%S)
99
  backup_file="electerm_backup_${timestamp}.tar.gz"
100
+
101
+ # 压缩数据目录
102
  tar -czf "/tmp/${backup_file}" -C /app/electerm-web/data .
103
+
104
  echo "Uploading backup to HuggingFace..."
105
  upload_backup "/tmp/${backup_file}" "${backup_file}"
106
+
107
  rm -f "/tmp/${backup_file}"
108
  else
109
  echo "Data directory does not exist yet, waiting for next sync..."
110
  fi
111
+
112
  SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
113
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
114
  sleep $SYNC_INTERVAL