nbugs commited on
Commit
feffb8b
·
verified ·
1 Parent(s): 20446e0

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +21 -60
sync_data.sh CHANGED
@@ -1,5 +1,4 @@
1
  #!/bin/bash
2
-
3
  # 检查环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
  echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
@@ -7,7 +6,6 @@ if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
7
  exit 0
8
  fi
9
 
10
-
11
  # 检查Tunnel
12
  check_tunnel() {
13
  while ! curl -s http://localhost:7860 >/dev/null; do
@@ -16,30 +14,37 @@ check_tunnel() {
16
  done
17
  }
18
 
19
- # 检查环境变量
20
- if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
21
- echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
22
- exec /start.sh
23
- exit 0
24
- fi
25
-
26
-
27
  # 激活虚拟环境
28
  source /opt/venv/bin/activate
29
 
30
- # 上传备份
31
  upload_backup() {
32
  file_path="$1"
33
  file_name="$2"
34
  token="$HF_TOKEN"
35
  repo_id="$DATASET_ID"
36
-
37
  python3 -c "
38
  from huggingface_hub import HfApi
39
  import sys
40
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  api = HfApi(token='$token')
42
  try:
 
43
  api.upload_file(
44
  path_or_fileobj='$file_path',
45
  path_in_repo='$file_name',
@@ -47,75 +52,31 @@ try:
47
  repo_type='dataset'
48
  )
49
  print(f'Successfully uploaded $file_name')
50
- except Exception as e:
51
- print(f'Error uploading file: {str(e)}')
52
- "
53
- }
54
-
55
- # 下载最新备份
56
- download_latest_backup() {
57
- token="$HF_TOKEN"
58
- repo_id="$DATASET_ID"
59
-
60
- python3 -c "
61
- from huggingface_hub import HfApi
62
- import sys
63
- import os
64
- import tarfile
65
- import tempfile
66
- api = HfApi(token='$token')
67
- try:
68
- files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
69
- backup_files = [f for f in files if f.startswith('vaultwarden_backup_') and f.endswith('.tar.gz')]
70
-
71
- if not backup_files:
72
- print('No backup files found')
73
- sys.exit()
74
-
75
- latest_backup = sorted(backup_files)[-1]
76
 
77
- with tempfile.TemporaryDirectory() as temp_dir:
78
- filepath = api.hf_hub_download(
79
- repo_id='$repo_id',
80
- filename=latest_backup,
81
- repo_type='dataset',
82
- local_dir=temp_dir
83
- )
84
-
85
- if filepath and os.path.exists(filepath):
86
- with tarfile.open(filepath, 'r:gz') as tar:
87
- tar.extractall('/data')
88
- print(f'Successfully restored backup from {latest_backup}')
89
-
90
  except Exception as e:
91
- print(f'Error downloading backup: {str(e)}')
92
  "
93
  }
94
 
95
- # 首次启动时下载最新备份
96
- echo "Downloading latest backup from HuggingFace..."
97
- download_latest_backup
98
 
99
  # 同步函数
100
  sync_data() {
101
  while true; do
102
  echo "Starting sync process at $(date)"
103
-
104
  if [ -d /data ]; then
105
  timestamp=$(date +%Y%m%d_%H%M%S)
106
  backup_file="vaultwarden_backup_${timestamp}.tar.gz"
107
-
108
  # 压缩数据目录
109
  tar -czf "/tmp/${backup_file}" -C /data .
110
-
111
  echo "Uploading backup to HuggingFace..."
112
  upload_backup "/tmp/${backup_file}" "${backup_file}"
113
-
114
  rm -f "/tmp/${backup_file}"
115
  else
116
  echo "Data directory does not exist yet, waiting for next sync..."
117
  fi
118
-
119
  SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
120
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
121
  sleep $SYNC_INTERVAL
 
1
  #!/bin/bash
 
2
  # 检查环境变量
3
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
4
  echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
 
6
  exit 0
7
  fi
8
 
 
9
  # 检查Tunnel
10
  check_tunnel() {
11
  while ! curl -s http://localhost:7860 >/dev/null; do
 
14
  done
15
  }
16
 
 
 
 
 
 
 
 
 
17
  # 激活虚拟环境
18
  source /opt/venv/bin/activate
19
 
20
+ # 上传备份(新增备份数量管理)
21
  upload_backup() {
22
  file_path="$1"
23
  file_name="$2"
24
  token="$HF_TOKEN"
25
  repo_id="$DATASET_ID"
 
26
  python3 -c "
27
  from huggingface_hub import HfApi
28
  import sys
29
  import os
30
+
31
+ def manage_backups(api, repo_id, max_files=50):
32
+ files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
33
+ # 注意前缀改为vaultwarden_backup_
34
+ backup_files = [f for f in files if f.startswith('vaultwarden_backup_') and f.endswith('.tar.gz')]
35
+ backup_files.sort()
36
+ if len(backup_files) >= max_files:
37
+ files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
38
+ for file_to_delete in files_to_delete:
39
+ try:
40
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
41
+ print(f'Deleted old backup: {file_to_delete}')
42
+ except Exception as e:
43
+ print(f'Error deleting {file_to_delete}: {str(e)}')
44
+
45
  api = HfApi(token='$token')
46
  try:
47
+ # 先上传新备份
48
  api.upload_file(
49
  path_or_fileobj='$file_path',
50
  path_in_repo='$file_name',
 
52
  repo_type='dataset'
53
  )
54
  print(f'Successfully uploaded $file_name')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ # 上传完成后执行备份数量管理
57
+ manage_backups(api, '$repo_id')
 
 
 
 
 
 
 
 
 
 
 
58
  except Exception as e:
59
+ print(f'Error uploading file: {str(e)}')
60
  "
61
  }
62
 
63
+ # [...] 后续的download_latest_backup和sync_data函数保持不变
 
 
64
 
65
  # 同步函数
66
  sync_data() {
67
  while true; do
68
  echo "Starting sync process at $(date)"
 
69
  if [ -d /data ]; then
70
  timestamp=$(date +%Y%m%d_%H%M%S)
71
  backup_file="vaultwarden_backup_${timestamp}.tar.gz"
 
72
  # 压缩数据目录
73
  tar -czf "/tmp/${backup_file}" -C /data .
 
74
  echo "Uploading backup to HuggingFace..."
75
  upload_backup "/tmp/${backup_file}" "${backup_file}"
 
76
  rm -f "/tmp/${backup_file}"
77
  else
78
  echo "Data directory does not exist yet, waiting for next sync..."
79
  fi
 
80
  SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
81
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
82
  sleep $SYNC_INTERVAL