nbugs commited on
Commit
9506a5d
·
verified ·
1 Parent(s): 7ceeeed

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +37 -94
sync_data.sh CHANGED
@@ -1,118 +1,61 @@
1
  #!/bin/bash
2
 
3
- # 检查环境变量
4
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
5
  echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
  exec uvicorn app.main:app --host 0.0.0.0 --port 7860
7
  exit 0
8
  fi
9
 
10
- # 登录HuggingFace (使用环境变量方式避免交互问题)
11
  export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN
12
 
13
- # 确保data目录存在
14
- mkdir -p /app/data
15
-
16
- # 同步函数
17
- sync_data() {
18
- while true; do
19
- echo "Starting sync process at $(date)"
20
-
21
- # 创建临时压缩文件
22
- cd /app
23
- timestamp=$(date +%Y%m%d_%H%M%S)
24
- backup_file="backup_${timestamp}.tar.gz"
25
-
26
- # 检查data目录是否存在且不为空
27
- if [ -d "data" ] && [ "$(ls -A data 2>/dev/null)" ]; then
28
- tar -czf "/tmp/${backup_file}" data/
29
-
30
- # 上传备份并管理历史备份
31
- python3 -c "
32
- from huggingface_hub import HfApi
33
- import os
34
- def manage_backups(api, repo_id, max_files=50):
35
- files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
36
- backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
37
- backup_files.sort()
38
-
39
- if len(backup_files) >= max_files:
40
- files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
41
- for file_to_delete in files_to_delete:
42
- try:
43
- api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
44
- print(f'Deleted old backup: {file_to_delete}')
45
- except Exception as e:
46
- print(f'Error deleting {file_to_delete}: {str(e)}')
47
- try:
48
- api = HfApi()
49
- api.upload_file(
50
- path_or_fileobj='/tmp/${backup_file}',
51
- path_in_repo='${backup_file}',
52
- repo_id='${DATASET_ID}',
53
- repo_type='dataset'
54
- )
55
- print('Backup uploaded successfully')
56
-
57
- manage_backups(api, '${DATASET_ID}')
58
- except Exception as e:
59
- print(f'Backup failed: {str(e)}')
60
- "
61
- # 清理临时文件
62
- rm -f "/tmp/${backup_file}"
63
- echo "Backup completed"
64
- else
65
- echo "No data to backup or data directory not found"
66
- fi
67
-
68
- # 设置同步间隔
69
- SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
70
- echo "Next sync in ${SYNC_INTERVAL} seconds..."
71
- sleep $SYNC_INTERVAL
72
- done
73
- }
74
-
75
  restore_latest() {
76
- echo "Attempting to restore latest backup..."
77
  python3 -c "
78
  from huggingface_hub import HfApi
79
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- try:
82
- api = HfApi()
83
- files = api.list_repo_files('${DATASET_ID}', repo_type='dataset')
84
- backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
 
 
85
 
86
- if backup_files:
87
- latest = sorted(backup_files)[-1]
88
- api.hf_hub_download(
89
- repo_id='${DATASET_ID}',
90
- filename=latest,
91
- repo_type='dataset',
92
- local_dir='/tmp'
93
- )
94
- os.system(f'tar -xzf /tmp/{latest} -C /app')
95
- os.remove(f'/tmp/{latest}')
96
- print(f'Restored from {latest}')
97
- else:
98
- print('No backup found')
99
- except Exception as e:
100
- print(f'Restore failed: {str(e)}')
101
  "
 
 
 
 
 
 
102
  }
103
 
104
-
105
- # 主程序
106
  (
107
- # 安装huggingface_hub库
108
- pip install --quiet huggingface_hub
109
-
110
- # 尝试恢复
111
  restore_latest
112
-
113
- # 启动同步进程
114
  sync_data &
115
-
116
- # 启动主应用(适配DeepClaude的启动命令)
117
  exec uvicorn app.main:app --host 0.0.0.0 --port 7860
118
  ) 2>&1 | tee -a /app/data/backup.log
 
1
  #!/bin/bash
2
 
 
3
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
4
  echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
5
  exec uvicorn app.main:app --host 0.0.0.0 --port 7860
6
  exit 0
7
  fi
8
 
 
9
  export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  restore_latest() {
12
+ echo "Restoring latest backup..."
13
  python3 -c "
14
  from huggingface_hub import HfApi
15
  import os
16
+ api = HfApi()
17
+ files = api.list_repo_files(repo_id='${DATASET_ID}', repo_type='dataset')
18
+ backup_files = sorted([f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')])
19
+ if backup_files:
20
+ latest = backup_files[-1]
21
+ api.hf_hub_download(repo_id='${DATASET_ID}', filename=latest, repo_type='dataset', local_dir='/tmp')
22
+ os.system(f'tar -xzf /tmp/{latest} -C /app/app')
23
+ os.remove(f'/tmp/{latest}')
24
+ print(f'Restored from {latest}')
25
+ else:
26
+ print('No backup found')
27
+ "
28
+ }
29
 
30
+ sync_data() {
31
+ while true; do
32
+ echo "Starting backup at $(date)"
33
+ cd /app
34
+ timestamp=$(date +%Y%m%d_%H%M%S)
35
+ backup_file="backup_${timestamp}.tar.gz"
36
 
37
+ if [ -d "app" ] && [ "$(ls -A app 2>/dev/null)" ]; then
38
+ tar -czf "/tmp/${backup_file}" app/
39
+ python3 -c "
40
+ from huggingface_hub import HfApi
41
+ api = HfApi()
42
+ api.upload_file(path_or_fileobj='/tmp/${backup_file}', path_in_repo='${backup_file}', repo_id='${DATASET_ID}', repo_type='dataset')
43
+ print('Backup uploaded successfully')
44
+ backup_files = sorted([f for f in api.list_repo_files('${DATASET_ID}', repo_type='dataset') if f.startswith('backup_')])
45
+ for old_backup in backup_files[:-50]:
46
+ api.delete_file(path_in_repo=old_backup, repo_id='${DATASET_ID}', repo_type='dataset')
47
+ print(f'Deleted old backup: {old_backup}')
 
 
 
 
48
  "
49
+ rm -f "/tmp/${backup_file}"
50
+ else
51
+ echo "No data to backup"
52
+ fi
53
+ sleep ${SYNC_INTERVAL:-7200}
54
+ done
55
  }
56
 
 
 
57
  (
 
 
 
 
58
  restore_latest
 
 
59
  sync_data &
 
 
60
  exec uvicorn app.main:app --host 0.0.0.0 --port 7860
61
  ) 2>&1 | tee -a /app/data/backup.log