nbugs commited on
Commit
ce6710a
·
verified ·
1 Parent(s): 983ba9f

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +54 -8
sync_data.sh CHANGED
@@ -5,10 +5,11 @@ if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
5
  exec node ./src/app/app.js
6
  exit 0
7
  fi
 
8
  # 激活虚拟环境
9
  . /opt/venv/bin/activate
10
 
11
- # 上传备份(新增备份数量管理)
12
  upload_backup() {
13
  file_path="$1"
14
  file_name="$2"
@@ -19,21 +20,24 @@ import os
19
 
20
  def manage_backups(api, repo_id, max_files=50):
21
  files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
22
- # 注意前缀改为electerm_backup_
23
  backup_files = [f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')]
24
  backup_files.sort()
25
  if len(backup_files) >= max_files:
26
- files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
27
  for file_to_delete in files_to_delete:
28
  try:
29
- api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
 
 
 
 
30
  print(f'Deleted old backup: {file_to_delete}')
31
  except Exception as e:
32
  print(f'Error deleting {file_to_delete}: {str(e)}')
33
 
34
  api = HfApi(token='$HF_TOKEN')
35
  try:
36
- # 先上传新备份
37
  api.upload_file(
38
  path_or_fileobj='$file_path',
39
  path_in_repo='$file_name',
@@ -42,14 +46,56 @@ try:
42
  )
43
  print(f'Successfully uploaded $file_name')
44
 
45
- # 上传完成后执行备份数量管理
46
  manage_backups(api, '$DATASET_ID')
47
  except Exception as e:
48
  print(f'Error uploading file: {str(e)}')
49
  "
50
  }
51
 
52
- # [...] 后续的download_latest_backup和sync_data函数保持不变
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  # 同步函数
55
  sync_data() {
@@ -58,7 +104,7 @@ sync_data() {
58
  if [ -d /app/electerm-web/data ]; then
59
  timestamp=$(date +%Y%m%d_%H%M%S)
60
  backup_file="electerm_backup_${timestamp}.tar.gz"
61
- # 压缩数据目录
62
  tar -czf "/tmp/${backup_file}" -C /app/electerm-web/data .
63
  echo "Uploading backup to HuggingFace..."
64
  upload_backup "/tmp/${backup_file}" "${backup_file}"
 
5
  exec node ./src/app/app.js
6
  exit 0
7
  fi
8
+
9
  # 激活虚拟环境
10
  . /opt/venv/bin/activate
11
 
12
+ # 上传备份(含自动清理旧备份)
13
  upload_backup() {
14
  file_path="$1"
15
  file_name="$2"
 
20
 
21
  def manage_backups(api, repo_id, max_files=50):
22
  files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
 
23
  backup_files = [f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')]
24
  backup_files.sort()
25
  if len(backup_files) >= max_files:
26
+ files_to_delete = backup_files[:len(backup_files) - max_files + 1]
27
  for file_to_delete in files_to_delete:
28
  try:
29
+ api.delete_file(
30
+ path_in_repo=file_to_delete,
31
+ repo_id=repo_id,
32
+ repo_type='dataset'
33
+ )
34
  print(f'Deleted old backup: {file_to_delete}')
35
  except Exception as e:
36
  print(f'Error deleting {file_to_delete}: {str(e)}')
37
 
38
  api = HfApi(token='$HF_TOKEN')
39
  try:
40
+ # 上传新备份
41
  api.upload_file(
42
  path_or_fileobj='$file_path',
43
  path_in_repo='$file_name',
 
46
  )
47
  print(f'Successfully uploaded $file_name')
48
 
49
+ # 清理旧备份
50
  manage_backups(api, '$DATASET_ID')
51
  except Exception as e:
52
  print(f'Error uploading file: {str(e)}')
53
  "
54
  }
55
 
56
+ # 下载最新备份(补充完整)
57
+ download_latest_backup() {
58
+ python3 -c "
59
+ from huggingface_hub import HfApi
60
+ import sys
61
+ import os
62
+ import tarfile
63
+ import tempfile
64
+
65
+ api = HfApi(token='$HF_TOKEN')
66
+ try:
67
+ files = api.list_repo_files(repo_id='$DATASET_ID', repo_type='dataset')
68
+ backup_files = [f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')]
69
+ if not backup_files:
70
+ print('No backup files found')
71
+ sys.exit()
72
+ latest_backup = sorted(backup_files)[-1]
73
+ with tempfile.TemporaryDirectory() as temp_dir:
74
+ filepath = api.hf_hub_download(
75
+ repo_id='$DATASET_ID',
76
+ filename=latest_backup,
77
+ repo_type='dataset',
78
+ local_dir=temp_dir
79
+ )
80
+ if filepath and os.path.exists(filepath):
81
+ # 确保目标目录存在
82
+ os.makedirs('/app/electerm-web/data', exist_ok=True)
83
+ # 清空旧数据(防止残留文件干扰)
84
+ if os.listdir('/app/electerm-web/data'):
85
+ for f in os.listdir('/app/electerm-web/data'):
86
+ os.remove(os.path.join('/app/electerm-web/data', f))
87
+ # 解压备份
88
+ with tarfile.open(filepath, 'r:gz') as tar:
89
+ tar.extractall('/app/electerm-web/data')
90
+ print(f'Successfully restored backup from {latest_backup}')
91
+ except Exception as e:
92
+ print(f'Error downloading backup: {str(e)}')
93
+ "
94
+ }
95
+
96
+ # 首次启动时下载最新备份
97
+ echo "Downloading latest backup from HuggingFace..."
98
+ download_latest_backup
99
 
100
  # 同步函数
101
  sync_data() {
 
104
  if [ -d /app/electerm-web/data ]; then
105
  timestamp=$(date +%Y%m%d_%H%M%S)
106
  backup_file="electerm_backup_${timestamp}.tar.gz"
107
+ # 压缩数据目录(排除临时文件)
108
  tar -czf "/tmp/${backup_file}" -C /app/electerm-web/data .
109
  echo "Uploading backup to HuggingFace..."
110
  upload_backup "/tmp/${backup_file}" "${backup_file}"