nbugs commited on
Commit
d905c44
·
verified ·
1 Parent(s): 96c26ef

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +25 -26
sync_data.sh CHANGED
@@ -1,7 +1,7 @@
1
  #!/bin/bash
2
 
3
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
4
- echo "Starting without backup - missing HF_TOKEN or DATASET_ID"
5
  exec uvicorn app.main:app --host 0.0.0.0 --port 7860
6
  exit 0
7
  fi
@@ -9,7 +9,7 @@ fi
9
  export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN
10
 
11
  restore_latest() {
12
- echo "正在检查是否存在备份..."
13
  python3 -c "
14
  from huggingface_hub import HfApi
15
  import os
@@ -20,55 +20,54 @@ backup_files = sorted([f for f in files if f.startswith('backup_') and f.endswit
20
 
21
  if backup_files:
22
  latest = backup_files[-1]
23
- print(f'找到备份: {latest},正在下载...')
24
- api.hf_hub_download(repo_id='${DATASET_ID}', filename=latest, repo_type='dataset', local_dir='/tmp')
25
 
26
  backup_path = f'/tmp/{latest}'
27
  if os.path.exists(backup_path):
28
- print(f'备份文件已下载到: {backup_path}, 文件大小: {os.path.getsize(backup_path)} bytes')
 
 
 
 
 
 
 
 
29
  else:
30
- print(f'备份文件 {backup_path} 不存在!')
31
-
32
- # 明确禁止tar恢复所有元数据(权限、用户、组、时间戳)
33
- result = os.system(f'tar --no-same-owner --no-same-permissions --touch -xzvf {backup_path} -C /app || true')
34
- exit_code = result >> 8
35
- if exit_code == 0:
36
- print(f'成功从 {latest} 恢复数据!')
37
- else:
38
- print(f'解压过程中出现警告,但数据已恢复,请检查目录确认。tar返回码: {result}, 退出码: {exit_code}')
39
- os.remove(backup_path)
40
  else:
41
- print('未找到备份文件,已跳过数据恢复步骤')
42
  "
43
  }
44
 
45
  sync_data() {
46
- echo "Waiting ${SYNC_INTERVAL:-7200} seconds before first backup..."
47
  sleep ${SYNC_INTERVAL:-7200}
48
 
49
  while true; do
50
- echo "Starting backup at $(date)"
51
- cd /app
52
  timestamp=$(date +%Y%m%d_%H%M%S)
53
  backup_file="backup_${timestamp}.tar.gz"
54
 
55
- if [ -d "app" ] && [ "$(ls -A app 2>/dev/null)" ]; then
56
- tar -czf "/tmp/${backup_file}" app/
57
  python3 -c "
58
  from huggingface_hub import HfApi
59
  api = HfApi()
60
  api.upload_file('/tmp/${backup_file}', '${backup_file}', '${DATASET_ID}', repo_type='dataset')
61
- print('Backup uploaded successfully')
62
  backup_files = sorted([f for f in api.list_repo_files('${DATASET_ID}', repo_type='dataset') if f.startswith('backup_')])
63
  for old_backup in backup_files[:-50]:
64
- api.delete_file(path_in_repo=old_backup, repo_id='${DATASET_ID}', repo_type='dataset')
65
- print(f'Deleted old backup: {old_backup}')
66
  "
67
  rm -f "/tmp/${backup_file}"
68
  else
69
- echo "No data to backup"
70
  fi
71
- echo "Next backup in ${SYNC_INTERVAL:-7200} seconds..."
72
  sleep ${SYNC_INTERVAL:-7200}
73
  done
74
  }
 
1
  #!/bin/bash
2
 
3
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
4
+ echo "缺少环境变量HF_TOKENDATASET_ID,启动服务但不启用备份功能"
5
  exec uvicorn app.main:app --host 0.0.0.0 --port 7860
6
  exit 0
7
  fi
 
9
  export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN
10
 
11
  restore_latest() {
12
+ echo "正在检查备份..."
13
  python3 -c "
14
  from huggingface_hub import HfApi
15
  import os
 
20
 
21
  if backup_files:
22
  latest = backup_files[-1]
23
+ print(f'找到备份文件: {latest}, 开始下载...')
24
+ api.hf_hub_download('${DATASET_ID}', latest, repo_type='dataset', local_dir='/tmp')
25
 
26
  backup_path = f'/tmp/{latest}'
27
  if os.path.exists(backup_path):
28
+ print(f'备份文件已下载: {backup_path}, 大小: {os.path.getsize(backup_path)} bytes')
29
+ # 解压到/app/app目录,避免路径嵌套和权限问题
30
+ result = os.system(f'tar --no-same-owner --no-same-permissions --touch --warning=no-timestamp -xzf {backup_path} -C /app/app || true')
31
+ exit_code = result >> 8
32
+ if exit_code == 0:
33
+ print(f'成功恢复数据!')
34
+ else:
35
+ print(f'解压时出现次要警告或错误,请检查数据完整性,tar返回码: {result}')
36
+ os.remove(backup_path)
37
  else:
38
+ print('下载备份文件失败!')
 
 
 
 
 
 
 
 
 
39
  else:
40
+ print('未发现任何备份文件,跳过恢复步骤')
41
  "
42
  }
43
 
44
  sync_data() {
45
+ echo "启动后首次备份将在${SYNC_INTERVAL:-7200}秒后执行"
46
  sleep ${SYNC_INTERVAL:-7200}
47
 
48
  while true; do
49
+ echo "开始备份: $(date)"
50
+ cd /app/app
51
  timestamp=$(date +%Y%m%d_%H%M%S)
52
  backup_file="backup_${timestamp}.tar.gz"
53
 
54
+ if [ "$(ls -A . 2>/dev/null)" ]; then
55
+ tar -czf "/tmp/${backup_file}" ./
56
  python3 -c "
57
  from huggingface_hub import HfApi
58
  api = HfApi()
59
  api.upload_file('/tmp/${backup_file}', '${backup_file}', '${DATASET_ID}', repo_type='dataset')
60
+ print('备份上传成功')
61
  backup_files = sorted([f for f in api.list_repo_files('${DATASET_ID}', repo_type='dataset') if f.startswith('backup_')])
62
  for old_backup in backup_files[:-50]:
63
+ api.delete_file(old_backup, '${DATASET_ID}', repo_type='dataset')
64
+ print(f'删除旧备份: {old_backup}')
65
  "
66
  rm -f "/tmp/${backup_file}"
67
  else
68
+ echo "无数据需要备份"
69
  fi
70
+ echo "下次备份将在${SYNC_INTERVAL:-7200}秒后执行"
71
  sleep ${SYNC_INTERVAL:-7200}
72
  done
73
  }