nbugs commited on
Commit
4ba79ba
·
verified ·
1 Parent(s): 006016c

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +210 -134
sync_data.sh CHANGED
@@ -1,160 +1,236 @@
1
  #!/bin/bash
2
- # sync_data.sh - 负责数据恢复和启动后台同步任务
3
 
4
- # 遇到错误时立即退出(在恢复阶段)
5
- set -e
6
-
7
- # --- 配置 ---
8
- # WebDAV 相关环境变量由 Hugging Face Secrets 提供:
9
- # WEBDAV_URL, WEBDAV_USERNAME, WEBDAV_PASSWORD
10
- # Hugging Face 相关环境变量由 Hugging Face Secrets 提供:
11
- # HF_TOKEN, DATASET_ID
12
- WEBDAV_BASE_PATH="openwebui" # WebDAV上的基础目录名
13
- LOCAL_DATA_DIR="./data" # 本地数据存储目录
14
- DB_FILENAME="webui.db" # 数据库文件名
15
- WEBDAV_SYNC_INTERVAL=${WEBDAV_SYNC_INTERVAL:-7200} # WebDAV同步间隔(秒),默认2小时
16
- HF_SYNC_INTERVAL=${HF_SYNC_INTERVAL:-7200} # Hugging Face同步间隔(秒),默认2小时
17
- WEBDAV_CLEANUP_DAYS=7 # WebDAV保留最近多少天的备份
18
- HF_MAX_BACKUPS=50 # Hugging Face保留的最大备份数量 (通过hf_sync.py控制)
19
-
20
- # --- 检查环境变量 ---
21
  if [ -z "$WEBDAV_URL" ] || [ -z "$WEBDAV_USERNAME" ] || [ -z "$WEBDAV_PASSWORD" ]; then
22
- echo "错误:缺少必要的WebDAV环境变量 (WEBDAV_URL, WEBDAV_USERNAME, WEBDAV_PASSWORD)。"
23
  exit 1
24
  fi
 
25
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
26
- echo "错误:缺少必要的Hugging Face环境变量 (HF_TOKEN, DATASET_ID)。"
27
  exit 1
28
  fi
29
 
30
- # 确保WebDAV URL末尾没有斜杠,方便拼接
31
- WEBDAV_URL=$(echo "$WEBDAV_URL" | sed 's:/*$::')
32
- WEBDAV_FULL_PATH="$WEBDAV_URL/$WEBDAV_BASE_PATH"
33
 
34
- # --- 函数定义 ---
35
- # 计算文件MD5哈希
36
  get_file_hash() {
37
  local file_path="$1"
38
  if [ -f "$file_path" ]; then
39
  md5sum "$file_path" | awk '{print $1}'
40
  else
41
- echo "not_found" # 返回特殊值表示文件不存在
42
  fi
43
  }
44
 
45
- # --- 初始化和恢复 ---
46
- echo "[恢复] 开始初始化数据恢复流程..."
47
- mkdir -p "$LOCAL_DATA_DIR"
48
- LOCAL_DB_PATH="$LOCAL_DATA_DIR/$DB_FILENAME"
49
- recovered=false
50
-
51
- # 1. 尝试从 WebDAV 恢复最新的日期文件
52
- echo "[恢复] 尝试从 WebDAV 恢复 ($WEBDAV_FULL_PATH)..."
53
- # 使用 curl 获取目录列表 (PROPFIND)。注意:解析 XML 可能不稳定
54
- # curl -s -f: 静默模式,遇到HTTP错误时失败退出
55
- webdav_list_output=$(curl -s -f --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -X PROPFIND -H "Depth: 1" "$WEBDAV_FULL_PATH/" || echo "WebDAV PROPFIND failed")
56
-
57
- if [[ "$webdav_list_output" != "WebDAV PROPFIND failed" ]]; then
58
- # 提取日期格式的文件名 (webui_YYYYMMDD.db)
59
- webdav_files=$(echo "$webdav_list_output" | grep '<d:href>' | sed 's#<d:href>[^<]*/$[^<]*$</d:href>#\1#' | grep -E '^webui_[0-9]{8}\.db$')
60
- if [ -n "$webdav_files" ]; then
61
- latest_webdav_file=$(echo "$webdav_files" | sort -r | head -n 1)
62
- echo "[恢复] 在WebDAV找到最新的日期文件: $latest_webdav_file"
63
- download_url="$WEBDAV_FULL_PATH/$latest_webdav_file"
64
- temp_db_path="${LOCAL_DB_PATH}.webdav.tmp"
65
- echo "[恢复] 尝试下载: $download_url"
66
- if curl -L -f -s -o "$temp_db_path" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$download_url"; then
67
- # 验证下载的文件是否有效(至少非空)
68
- if [ -s "$temp_db_path" ]; then
69
- mv "$temp_db_path" "$LOCAL_DB_PATH"
70
- echo "[恢复] 成功从 WebDAV ($latest_webdav_file) 恢复数据库。"
71
- recovered=true
72
- else
73
- echo "[恢复] WebDAV下载的文件为空或无效,删除临时文件。"
74
- rm -f "$temp_db_path"
75
- fi
76
- else
77
- echo "[恢复] 从 WebDAV 下载 $latest_webdav_file 失败。"
78
- rm -f "$temp_db_path"
79
- fi
80
- else
81
- echo "[恢复] 在WebDAV路径 ($WEBDAV_FULL_PATH/) 未找到符合 'webui_YYYYMMDD.db' 格式的文件。"
82
- # 可选:尝试恢复主文件 webui.db (如果存在)
83
- main_db_url="$WEBDAV_FULL_PATH/$DB_FILENAME"
84
- echo "[恢复] 尝试从WebDAV恢复主文件: $main_db_url"
85
- temp_db_path="${LOCAL_DB_PATH}.webdav_main.tmp"
86
- if curl -L -f -s -o "$temp_db_path" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_db_url"; then
87
- if [ -s "$temp_db_path" ]; then
88
- mv "$temp_db_path" "$LOCAL_DB_PATH"
89
- echo "[恢复] 成功从 WebDAV (主文件 $DB_FILENAME) 恢复数据库。"
90
- recovered=true
91
- else
92
- echo "[恢复] WebDAV下载的主文件为空或无效,删除临时文件。"
93
- rm -f "$temp_db_path"
94
- fi
95
- else
96
- echo "[恢复] WebDAV下载主文件 $DB_FILENAME 失败。"
97
- rm -f "$temp_db_path"
98
- fi
99
- fi
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  else
101
- echo "[恢复] 无法访问WebDAV目录 ($WEBDAV_FULL_PATH/)。"
102
- fi
103
-
104
- # 2. 如果WebDAV恢复失败,尝试从 Hugging Face 恢复
105
- if [ "$recovered" != true ]; then
106
- echo "[恢复] WebDAV恢复失败或未找到文件,尝试从 Hugging Face ($DATASET_ID) 恢复..."
107
- # 调用 Python 脚本进行下载,传递目标目录
108
- if python3 /app/hf_sync.py download "$HF_TOKEN" "$DATASET_ID" "$LOCAL_DATA_DIR"; then
109
- echo "[恢复] 成功从 Hugging Face 恢复。"
110
- recovered=true
111
- else
112
- echo "[恢复] 从 Hugging Face 恢复也失败了。"
113
- fi
114
- fi
115
-
116
- # 3. 如果所有恢复都失败,并且本地数据库文件不存在,则创建空文件
117
- if [ "$recovered" != true ] && [ ! -f "$LOCAL_DB_PATH" ]; then
118
- echo "[恢复] 所有恢复方式均失败,创建空的数据库文件: $LOCAL_DB_PATH"
119
- touch "$LOCAL_DB_PATH"
120
  fi
121
 
122
- echo "[恢复] 数据恢复流程结束。"
123
-
124
- # --- 后台同步任务 ---
125
-
126
- # WebDAV 同步函数
127
  webdav_sync() {
128
- echo "[WebDAV Sync] 后台任务启动,间隔: ${WEBDAV_SYNC_INTERVAL} 秒"
129
- local last_uploaded_hash=""
130
-
131
  while true; do
132
- timestamp=$(date '+%Y-%m-%d %H:%M:%S')
133
- echo "[WebDAV Sync @ $timestamp] 开始检查..."
134
-
135
- if [ -f "$LOCAL_DB_PATH" ]; then
136
- current_hash=$(get_file_hash "$LOCAL_DB_PATH")
137
- echo "[WebDAV Sync @ $timestamp] 本地文件哈希: $current_hash"
138
-
139
- if [ "$current_hash" == "$last_uploaded_hash" ]; then
140
- echo "[WebDAV Sync @ $timestamp] 文件未变化,跳过上传。"
 
 
 
 
 
 
 
 
 
 
141
  else
142
- echo "[WebDAV Sync @ $timestamp] 检测到文件变化或首次运行,准备上传到 WebDAV..."
143
- current_date=$(date +'%Y%m%d')
144
- dated_filename="webui_${current_date}.db"
145
- upload_dated_url="$WEBDAV_FULL_PATH/$dated_filename"
146
- upload_main_url="$WEBDAV_FULL_PATH/$DB_FILENAME"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- # 上传带日期的文件
149
- echo "[WebDAV Sync @ $timestamp] 上传日期文件: $upload_dated_url"
150
- if curl -L -f -T "$LOCAL_DB_PATH" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_dated_url"; then
151
- echo "[WebDAV Sync @ $timestamp] 日期文件上传成功: $dated_filename"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
- # 上传主文件 (覆盖)
154
- echo "[WebDAV Sync @ $timestamp] 更新主文件: $upload_main_url"
155
- if curl -L -f -T "$LOCAL_DB_PATH" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_main_url"; then
156
- echo "[WebDAV Sync @ $timestamp] 主文件更新成功。"
157
- last_uploaded_hash=$current_hash # 只有主文件也成功才更新哈希
158
- else
159
- echo "[WebDAV Sync @ $timestamp] 错误:主文件更新失败!"
160
- # 主文件失败,不更新哈希,下次会
 
1
  #!/bin/bash
 
2
 
3
+ # 检查必要的环境变量
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  if [ -z "$WEBDAV_URL" ] || [ -z "$WEBDAV_USERNAME" ] || [ -z "$WEBDAV_PASSWORD" ]; then
5
+ echo "缺少必要的环境变量: WEBDAV_URLWEBDAV_USERNAME WEBDAV_PASSWORD"
6
  exit 1
7
  fi
8
+
9
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
10
+ echo "缺少必要的环境变量: HF_TOKEN DATASET_ID"
11
  exit 1
12
  fi
13
 
14
+ # 创建数据目录
15
+ mkdir -p ./data
 
16
 
17
+ # 定义哈希计算函数
 
18
  get_file_hash() {
19
  local file_path="$1"
20
  if [ -f "$file_path" ]; then
21
  md5sum "$file_path" | awk '{print $1}'
22
  else
23
+ echo "文件不存在"
24
  fi
25
  }
26
 
27
+ # 创建 Hugging Face 同步脚本
28
+ cat > /tmp/hf_sync.py << 'EOL'
29
+ from huggingface_hub import HfApi
30
+ import sys
31
+ import os
32
+
33
+ def manage_backups(api, repo_id, max_files=50):
34
+ """管理备份文件,保留最新的max_files个文件"""
35
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
36
+ backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
37
+
38
+ # 按日期分组文件(从文件名中提取日期)
39
+ backup_by_date = {}
40
+ for file in backup_files:
41
+ try:
42
+ date_part = file.split('_')[2].split('.')[0]
43
+ backup_by_date[date_part] = file
44
+ except:
45
+ continue
46
+
47
+ # 保留最新的max_files个文件
48
+ sorted_dates = sorted(backup_by_date.keys(), reverse=True)
49
+ if len(sorted_dates) > max_files:
50
+ files_to_delete = [backup_by_date[date] for date in sorted_dates[max_files:]]
51
+ for file in files_to_delete:
52
+ api.delete_file(path_in_repo=file, repo_id=repo_id, repo_type="dataset")
53
+ print(f"已删除旧备份: {file}")
54
+
55
+ def upload_backup(file_path, file_name, token, repo_id):
56
+ """上传备份文件到Hugging Face"""
57
+ api = HfApi(token=token)
58
+ try:
59
+ # 删除同名文件(如有)
60
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
61
+ if file_name in files:
62
+ api.delete_file(path_in_repo=file_name, repo_id=repo_id, repo_type="dataset")
63
+ print(f"已删除同名文件: {file_name}")
64
+
65
+ # 上传新文件
66
+ api.upload_file(
67
+ path_or_fileobj=file_path,
68
+ path_in_repo=file_name,
69
+ repo_id=repo_id,
70
+ repo_type="dataset"
71
+ )
72
+ print(f"成功上传: {file_name}")
73
+ manage_backups(api, repo_id)
74
+ except Exception as e:
75
+ print(f"上传失败: {str(e)}")
76
+
77
+ def download_latest_backup(token, repo_id):
78
+ """Hugging Face下载最新备份"""
79
+ api = HfApi(token=token)
80
+ try:
81
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
82
+ backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
83
+ if not backup_files:
84
+ return False
85
+
86
+ # 找到最新的文件(按日期排序)
87
+ latest_file = max(backup_files, key=lambda x: x.split('_')[2].split('.')[0])
88
+ file_path = api.hf_hub_download(
89
+ repo_id=repo_id,
90
+ filename=latest_file,
91
+ repo_type="dataset"
92
+ )
93
+
94
+ if file_path and os.path.exists(file_path):
95
+ os.makedirs('./data', exist_ok=True)
96
+ os.system(f'cp "{file_path}" ./data/webui.db')
97
+ print(f"成功从Hugging Face恢复: {latest_file}")
98
+ return True
99
+ else:
100
+ return False
101
+ except Exception as e:
102
+ print(f"下载失败: {str(e)}")
103
+ return False
104
+
105
+ if __name__ == "__main__":
106
+ action = sys.argv[1]
107
+ token = sys.argv[2]
108
+ repo_id = sys.argv[3]
109
+
110
+ if action == "upload":
111
+ file_path = sys.argv[4]
112
+ file_name = sys.argv[5]
113
+ upload_backup(file_path, file_name, token, repo_id)
114
+ elif action == "download":
115
+ download_latest_backup(token, repo_id)
116
+ EOL
117
+
118
+ # 初始化数据恢复策略
119
+ echo "初始化数据恢复..."
120
+ echo "WebDAV URL: $WEBDAV_URL"
121
+ echo "WebDAV 用户名: $WEBDAV_USERNAME"
122
+ echo "WebDAV 密码: $(echo $WEBDAV_PASSWORD | sed 's/./*/g')"
123
+
124
+ # 尝试从 WebDAV 恢复最新文件
125
+ echo "尝试从 WebDAV 获取文件列表..."
126
+ webdav_files=$(curl -s -X PROPFIND --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -H "Depth: 1" "$WEBDAV_URL/openwebui/" | grep '<d:href>' | grep 'webui_[0-9]\{8\}.db' | sed 's|</?d:href>||g')
127
+
128
+ if [ -n "$webdav_files" ]; then
129
+ latest_file=$(echo "$webdav_files" | sort -r | head -n 1)
130
+ download_url="$WEBDAV_URL/openwebui/$latest_file"
131
+ curl -L -o "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$download_url" && {
132
+ echo "成功从 WebDAV 下载最新数据库: $latest_file"
133
+ } || {
134
+ echo "WebDAV 下载失败,尝试从 Hugging Face 恢复..."
135
+ python /tmp/hf_sync.py download "$HF_TOKEN" "$DATASET_ID"
136
+ }
137
  else
138
+ echo "WebDAV 无有效备份,尝试从 Hugging Face 恢复..."
139
+ python /tmp/hf_sync.py download "$HF_TOKEN" "$DATASET_ID" || {
140
+ echo "所有恢复失败,创建空数据库..."
141
+ touch ./data/webui.db
142
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  fi
144
 
145
+ # WebDAV 同步函数(仅上传变化文件)
 
 
 
 
146
  webdav_sync() {
147
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认 2 小时
148
+ echo "WebDAV 同步启动,间隔: ${SYNC_INTERVAL} 秒"
149
+
150
  while true; do
151
+ echo "开始 WebDAV 同步: $(date)"
152
+
153
+ if [ -f "./data/webui.db" ]; then
154
+ # 生成文件名(包含年月日)
155
+ current_date=$(date +'%Y%m%d')
156
+ file_name="webui_${current_date}.db"
157
+ upload_url="$WEBDAV_URL/openwebui/${file_name}"
158
+
159
+ # 计算本地文件哈希
160
+ local_hash=$(get_file_hash "./data/webui.db")
161
+
162
+ # 获取远程文件哈希(通过临时下载)
163
+ remote_temp="/tmp/webui_remote.db"
164
+ curl -s -o "$remote_temp" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" > /dev/null 2>&1
165
+ remote_hash=$(get_file_hash "$remote_temp")
166
+ rm -f "$remote_temp"
167
+
168
+ if [ "$local_hash" == "$remote_hash" ]; then
169
+ echo "文件未变化,跳过 WebDAV 上传"
170
  else
171
+ echo "检测到文件变化,开始上传到 WebDAV..."
172
+ curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" && {
173
+ echo "WebDAV 上传成功: $file_name"
174
+
175
+ # 更新主文件(覆盖 webui.db)
176
+ main_url="$WEBDAV_URL/openwebui/webui.db"
177
+ curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_url" && {
178
+ echo "主文件更新成功"
179
+ } || {
180
+ echo "主文件更新失败"
181
+ }
182
+ } || {
183
+ echo "WebDAV 上传失败,等待重试..."
184
+ sleep 10
185
+ curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" || {
186
+ echo "重试失败,放弃本次上传"
187
+ }
188
+ }
189
+ fi
190
+
191
+ # 清理过期 WebDAV 文件(保留最近 7 天)
192
+ cleanup_days=7
193
+ cutoff_date=$(date -d "-${cleanup_days} days" +%Y%m%d)
194
+ for file in $webdav_files; do
195
+ file_date=$(echo "$file" | grep -oE '[0-9]{8}')
196
+ if [ "$file_date" -lt "$cutoff_date" ]; then
197
+ delete_url="$WEBDAV_URL/openwebui/$file"
198
+ curl -X DELETE --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$delete_url" && echo "删除过期文件: $file"
199
+ fi
200
+ done
201
+ else
202
+ echo "未找到 webui.db,跳过同步"
203
+ fi
204
+
205
+ sleep $SYNC_INTERVAL
206
+ done
207
+ }
208
 
209
+ # Hugging Face 同步函数
210
+ hf_sync() {
211
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
212
+ echo "Hugging Face 同步启动,间隔: ${SYNC_INTERVAL} "
213
+
214
+ while true; do
215
+ echo "开始 Hugging Face 同步: $(date)"
216
+
217
+ if [ -f "./data/webui.db" ]; then
218
+ current_date=$(date +'%Y%m%d')
219
+ backup_file="webui_backup_${current_date}.db"
220
+ temp_path="/tmp/${backup_file}"
221
+ cp "./data/webui.db" "$temp_path"
222
+
223
+ echo "正在上传到 Hugging Face..."
224
+ python /tmp/hf_sync.py upload "$HF_TOKEN" "$DATASET_ID" "$temp_path" "$backup_file"
225
+ rm -f "$temp_path"
226
+ else
227
+ echo "未找到数据库文件,跳过 Hugging Face 同��"
228
+ fi
229
+
230
+ sleep $SYNC_INTERVAL
231
+ done
232
+ }
233
 
234
+ # 启动同步进程
235
+ webdav_sync &
236
+ hf_sync &