nbugs commited on
Commit
7d26d69
·
verified ·
1 Parent(s): bba1654

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +247 -148
sync_data.sh CHANGED
@@ -1,15 +1,6 @@
1
  #!/bin/bash
2
 
3
- # 检查必要的环境变量
4
- if [ -z "$WEBDAV_URL" ] || [ -z "$WEBDAV_USERNAME" ] || [ -z "$WEBDAV_PASSWORD" ]; then
5
- echo "缺少必要的环境变量: WEBDAV_URL、WEBDAV_USERNAME 或 WEBDAV_PASSWORD"
6
- exit 1
7
- fi
8
-
9
- if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
10
- echo "缺少必要的环境变量: HF_TOKEN 或 DATASET_ID"
11
- exit 1
12
- fi
13
 
14
  # 创建数据目录
15
  mkdir -p ./data
@@ -24,43 +15,65 @@ get_file_hash() {
24
  fi
25
  }
26
 
27
- # 创建 Hugging Face 同步脚本
28
  cat > /tmp/hf_sync.py << 'EOL'
29
  from huggingface_hub import HfApi
30
  import sys
31
  import os
 
 
32
 
33
- def manage_backups(api, repo_id, max_files=50):
34
- """管理备份文件,保留最新的max_files个文件"""
35
- files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
36
- backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
37
-
38
- # 按日期分组文件(从文件名中提取日期)
39
- backup_by_date = {}
40
- for file in backup_files:
41
- try:
42
- date_part = file.split('_')[2].split('.')[0]
43
- backup_by_date[date_part] = file
44
- except:
45
- continue
46
-
47
- # 保留最新的max_files个文件
48
- sorted_dates = sorted(backup_by_date.keys(), reverse=True)
49
- if len(sorted_dates) > max_files:
50
- files_to_delete = [backup_by_date[date] for date in sorted_dates[max_files:]]
51
- for file in files_to_delete:
52
- api.delete_file(path_in_repo=file, repo_id=repo_id, repo_type="dataset")
53
- print(f"已删除旧备份: {file}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  def upload_backup(file_path, file_name, token, repo_id):
56
- """上传备份文件到Hugging Face"""
57
  api = HfApi(token=token)
58
  try:
59
- # 删除同名文件(如有)
60
- files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
61
- if file_name in files:
62
- api.delete_file(path_in_repo=file_name, repo_id=repo_id, repo_type="dataset")
63
- print(f"已删除同名文件: {file_name}")
 
 
 
 
64
 
65
  # 上传新文件
66
  api.upload_file(
@@ -70,16 +83,22 @@ def upload_backup(file_path, file_name, token, repo_id):
70
  repo_type="dataset"
71
  )
72
  print(f"成功上传: {file_name}")
73
- manage_backups(api, repo_id)
 
 
 
74
  except Exception as e:
75
  print(f"上传失败: {str(e)}")
 
 
76
 
77
  def download_latest_backup(token, repo_id):
78
- """从Hugging Face下载最新备份"""
79
  api = HfApi(token=token)
80
  try:
81
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
82
  backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
 
83
  if not backup_files:
84
  return False
85
 
@@ -101,136 +120,216 @@ def download_latest_backup(token, repo_id):
101
  except Exception as e:
102
  print(f"下载失败: {str(e)}")
103
  return False
 
 
104
 
105
  if __name__ == "__main__":
106
- action = sys.argv[1]
107
- token = sys.argv[2]
108
- repo_id = sys.argv[3]
109
-
110
- if action == "upload":
111
- file_path = sys.argv[4]
112
- file_name = sys.argv[5]
113
- upload_backup(file_path, file_name, token, repo_id)
114
- elif action == "download":
115
- download_latest_backup(token, repo_id)
 
 
 
 
 
 
116
  EOL
117
 
118
- # 初始化数据恢复策略
119
- echo "初始化数据恢复..."
120
- echo "WebDAV URL: $WEBDAV_URL"
121
- echo "WebDAV 用户名: $WEBDAV_USERNAME"
122
- echo "WebDAV 密码: $(echo $WEBDAV_PASSWORD | sed 's/./*/g')"
123
-
124
- # 尝试从 WebDAV 恢复最新文件
125
- echo "尝试从 WebDAV 获取文件列表..."
126
- webdav_files=$(curl -s -X PROPFIND --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -H "Depth: 1" "$WEBDAV_URL/openwebui/" | grep '<d:href>' | grep 'webui_[0-9]\{8\}.db' | sed 's|</?d:href>||g')
127
-
128
- if [ -n "$webdav_files" ]; then
129
- latest_file=$(echo "$webdav_files" | sort -r | head -n 1)
130
- download_url="$WEBDAV_URL/openwebui/$latest_file"
131
- curl -L -o "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$download_url" && {
132
- echo "成功从 WebDAV 下载最新数据库: $latest_file"
133
- } || {
134
- echo "WebDAV 下载失败,尝试从 Hugging Face 恢复..."
135
- python /tmp/hf_sync.py download "$HF_TOKEN" "$DATASET_ID"
136
- }
137
  else
138
- echo "WebDAV 无有效备份,尝试从 Hugging Face 恢复..."
139
- python /tmp/hf_sync.py download "$HF_TOKEN" "$DATASET_ID" || {
140
- echo "所有恢复失败,创建空数据库..."
141
- touch ./data/webui.db
142
- }
 
 
 
143
  fi
144
 
145
- # WebDAV 同步函数(仅上传变化文件)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  webdav_sync() {
147
- SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认 2 小时
148
- echo "WebDAV 同步启动,间隔: ${SYNC_INTERVAL} 秒"
 
 
 
 
149
 
150
- while true; do
151
- echo "开始 WebDAV 同步: $(date)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
- if [ -f "./data/webui.db" ]; then
154
- # 生成文件名(包含年月日)
155
- current_date=$(date +'%Y%m%d')
156
- file_name="webui_${current_date}.db"
157
- upload_url="$WEBDAV_URL/openwebui/${file_name}"
158
-
159
- # 计算本地文件哈希
160
- local_hash=$(get_file_hash "./data/webui.db")
161
-
162
- # 获取远程文件哈希(通过临时下载)
163
- remote_temp="/tmp/webui_remote.db"
164
- curl -s -o "$remote_temp" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" > /dev/null 2>&1
165
- remote_hash=$(get_file_hash "$remote_temp")
166
- rm -f "$remote_temp"
167
-
168
- if [ "$local_hash" == "$remote_hash" ]; then
169
- echo "文件未变化,跳过 WebDAV 上传"
170
- else
171
- echo "检测到文件变化,开始上传到 WebDAV..."
172
- curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" && {
173
- echo "WebDAV 上传成功: $file_name"
174
-
175
- # 更新主文件(覆盖 webui.db)
176
- main_url="$WEBDAV_URL/openwebui/webui.db"
177
- curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_url" && {
178
- echo "主文件更新成功"
179
- } || {
180
- echo "主文件更新失败"
181
- }
182
- } || {
183
- echo "WebDAV 上传失败,等待重试..."
184
- sleep 10
185
- curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" || {
186
- echo "重试失败,放弃本次上传"
187
- }
188
- }
189
- fi
190
-
191
- # 清理过期 WebDAV 文件(保留最近 7 天)
192
- cleanup_days=7
193
- cutoff_date=$(date -d "-${cleanup_days} days" +%Y%m%d)
194
- for file in $webdav_files; do
195
- file_date=$(echo "$file" | grep -oE '[0-9]{8}')
196
- if [ "$file_date" -lt "$cutoff_date" ]; then
197
- delete_url="$WEBDAV_URL/openwebui/$file"
198
- curl -X DELETE --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$delete_url" && echo "删除过期文件: $file"
199
- fi
200
- done
201
- else
202
- echo "未找到 webui.db,跳过同步"
203
  fi
204
-
205
- sleep $SYNC_INTERVAL
206
  done
207
  }
208
 
209
  # Hugging Face 同步函数
210
  hf_sync() {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
212
- echo "Hugging Face 同步启动,间隔: ${SYNC_INTERVAL} 秒"
213
 
 
214
  while true; do
215
- echo "开始 Hugging Face 同步: $(date)"
 
216
 
217
- if [ -f "./data/webui.db" ]; then
218
- current_date=$(date +'%Y%m%d')
219
- backup_file="webui_backup_${current_date}.db"
220
- temp_path="/tmp/${backup_file}"
221
- cp "./data/webui.db" "$temp_path"
222
-
223
- echo "正在上传到 Hugging Face..."
224
- python /tmp/hf_sync.py upload "$HF_TOKEN" "$DATASET_ID" "$temp_path" "$backup_file"
225
- rm -f "$temp_path"
226
- else
227
- echo "未找到数据库文件,跳过 Hugging Face 同步"
228
  fi
229
 
230
- sleep $SYNC_INTERVAL
 
 
 
 
 
 
 
 
 
 
 
231
  done
232
  }
233
 
234
- # 启动同步进程
235
- webdav_sync &
236
- hf_sync &
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #!/bin/bash
2
 
3
+ # 此脚本仅创建同步脚本,不在构建阶段运行同步进程
 
 
 
 
 
 
 
 
 
4
 
5
  # 创建数据目录
6
  mkdir -p ./data
 
15
  fi
16
  }
17
 
18
+ # 创建 Hugging Face 同步脚本,优化内存使用
19
  cat > /tmp/hf_sync.py << 'EOL'
20
  from huggingface_hub import HfApi
21
  import sys
22
  import os
23
+ import gc
24
+ import time
25
 
26
+ def manage_backups(api, repo_id, max_files=10):
27
+ """管理备份文件,保留最新的max_files个文件,内存优化版"""
28
+ try:
29
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
30
+ backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
31
+
32
+ # 按日期分组文件(从文件名中提取日期)
33
+ backup_by_date = {}
34
+ for file in backup_files:
35
+ try:
36
+ date_part = file.split('_')[2].split('.')[0]
37
+ backup_by_date[date_part] = file
38
+ except:
39
+ continue
40
+
41
+ # 保留最新的max_files个文件
42
+ sorted_dates = sorted(backup_by_date.keys(), reverse=True)
43
+ if len(sorted_dates) > max_files:
44
+ files_to_delete = [backup_by_date[date] for date in sorted_dates[max_files:]]
45
+
46
+ # 分批删除文件以减少内存使用
47
+ batch_size = 3
48
+ for i in range(0, len(files_to_delete), batch_size):
49
+ batch = files_to_delete[i:i+batch_size]
50
+ for file in batch:
51
+ api.delete_file(path_in_repo=file, repo_id=repo_id, repo_type="dataset")
52
+ print(f"已删除旧备份: {file}")
53
+ # 强制垃圾回收
54
+ gc.collect()
55
+ # 批次间暂停
56
+ if i + batch_size < len(files_to_delete):
57
+ time.sleep(2)
58
+ except Exception as e:
59
+ print(f"管理备份错误: {str(e)}")
60
+ finally:
61
+ # 确保垃圾回收
62
+ gc.collect()
63
 
64
  def upload_backup(file_path, file_name, token, repo_id):
65
+ """上传备份文件到Hugging Face,内存优化版"""
66
  api = HfApi(token=token)
67
  try:
68
+ # 检查文件是否存在
69
+ try:
70
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
71
+ if file_name in files:
72
+ api.delete_file(path_in_repo=file_name, repo_id=repo_id, repo_type="dataset")
73
+ print(f"已删除同名文件: {file_name}")
74
+ except Exception as e:
75
+ print(f"检查文件存在错误: {str(e)}")
76
+ gc.collect()
77
 
78
  # 上传新文件
79
  api.upload_file(
 
83
  repo_type="dataset"
84
  )
85
  print(f"成功上传: {file_name}")
86
+
87
+ # 管理备份,可选执行
88
+ if os.environ.get("MANAGE_BACKUPS", "true").lower() == "true":
89
+ manage_backups(api, repo_id)
90
  except Exception as e:
91
  print(f"上传失败: {str(e)}")
92
+ finally:
93
+ gc.collect()
94
 
95
  def download_latest_backup(token, repo_id):
96
+ """从Hugging Face下载最新备份,内存优化版"""
97
  api = HfApi(token=token)
98
  try:
99
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
100
  backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
101
+
102
  if not backup_files:
103
  return False
104
 
 
120
  except Exception as e:
121
  print(f"下载失败: {str(e)}")
122
  return False
123
+ finally:
124
+ gc.collect()
125
 
126
  if __name__ == "__main__":
127
+ try:
128
+ action = sys.argv[1]
129
+ token = sys.argv[2]
130
+ repo_id = sys.argv[3]
131
+
132
+ if action == "upload":
133
+ file_path = sys.argv[4]
134
+ file_name = sys.argv[5]
135
+ upload_backup(file_path, file_name, token, repo_id)
136
+ elif action == "download":
137
+ download_latest_backup(token, repo_id)
138
+ except Exception as e:
139
+ print(f"脚本执行错误: {str(e)}")
140
+ finally:
141
+ # 最终垃圾回收
142
+ gc.collect()
143
  EOL
144
 
145
+ # 创建同步服务启动脚本(不在构建时执行)
146
+ cat > /tmp/start_sync.sh << 'EOL'
147
+ #!/bin/bash
148
+
149
+ # 检查必要的环境变量
150
+ if [ -z "$WEBDAV_URL" ] || [ -z "$WEBDAV_USERNAME" ] || [ -z "$WEBDAV_PASSWORD" ]; then
151
+ echo "缺少必要的环境变量: WEBDAV_URL、WEBDAV_USERNAME 或 WEBDAV_PASSWORD"
152
+ export WEBDAV_ENABLED="false"
 
 
 
 
 
 
 
 
 
 
 
153
  else
154
+ export WEBDAV_ENABLED="true"
155
+ fi
156
+
157
+ if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
158
+ echo "缺少必要的环境变量: HF_TOKEN 或 DATASET_ID"
159
+ export HF_ENABLED="false"
160
+ else
161
+ export HF_ENABLED="true"
162
  fi
163
 
164
+ # 初始化数据恢复策略
165
+ echo "初始化数据恢复..."
166
+
167
+ # 尝试恢复数据
168
+ restore_data() {
169
+ # 首先尝试从 WebDAV 恢复
170
+ if [ "$WEBDAV_ENABLED" = "true" ]; then
171
+ echo "尝试从 WebDAV 获取文件列表..."
172
+ webdav_files=$(curl -s -X PROPFIND --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -H "Depth: 1" "$WEBDAV_URL/openwebui/" | grep '<d:href>' | grep 'webui_[0-9]\{8\}.db' | sed 's|</?d:href>||g')
173
+
174
+ if [ -n "$webdav_files" ]; then
175
+ latest_file=$(echo "$webdav_files" | sort -r | head -n 1)
176
+ download_url="$WEBDAV_URL/openwebui/$latest_file"
177
+ curl -L -o "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$download_url" && {
178
+ echo "成功从 WebDAV 下载最新数据库: $latest_file"
179
+ return 0
180
+ }
181
+ fi
182
+ echo "WebDAV 恢复失败"
183
+ fi
184
+
185
+ # 如果 WebDAV 失败,尝试从 Hugging Face 恢复
186
+ if [ "$HF_ENABLED" = "true" ]; then
187
+ echo "尝试从 Hugging Face 恢复..."
188
+ python /tmp/hf_sync.py download "$HF_TOKEN" "$DATASET_ID" && {
189
+ echo "成功从 Hugging Face 恢复"
190
+ return 0
191
+ }
192
+ fi
193
+
194
+ # 所有恢复方法都失败
195
+ echo "所有恢复失败,创建空数据库..."
196
+ touch ./data/webui.db
197
+ return 1
198
+ }
199
+
200
+ # WebDAV 同步函数(使用 cron 风格的调度)
201
  webdav_sync() {
202
+ if [ "$WEBDAV_ENABLED" != "true" ]; then
203
+ echo "WebDAV 同步已禁用"
204
+ return
205
+ fi
206
+
207
+ echo "执行 WebDAV 同步: $(date)"
208
 
209
+ if [ ! -f "./data/webui.db" ]; then
210
+ echo "未找到 webui.db,跳过同步"
211
+ return
212
+ fi
213
+
214
+ # 生成文件名(包含年月日)
215
+ current_date=$(date +'%Y%m%d')
216
+ file_name="webui_${current_date}.db"
217
+ upload_url="$WEBDAV_URL/openwebui/${file_name}"
218
+
219
+ # 计算本地文件哈希
220
+ local_hash=$(get_file_hash "./data/webui.db")
221
+
222
+ # 获取远程文件哈希(通过临时下载)
223
+ remote_temp="/tmp/webui_remote.db"
224
+ curl -s -o "$remote_temp" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" > /dev/null 2>&1
225
+ remote_hash=$(get_file_hash "$remote_temp")
226
+ rm -f "$remote_temp"
227
+
228
+ if [ "$local_hash" = "$remote_hash" ]; then
229
+ echo "文件未变化,跳过 WebDAV 上传"
230
+ return
231
+ fi
232
+
233
+ echo "检测到文件变化,开始上传到 WebDAV..."
234
+ curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" && {
235
+ echo "WebDAV 上传成功: $file_name"
236
 
237
+ # 更新主文件(覆盖 webui.db
238
+ main_url="$WEBDAV_URL/openwebui/webui.db"
239
+ curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_url" && {
240
+ echo "主文件更新成功"
241
+ } || {
242
+ echo "主文件更新失败"
243
+ }
244
+ } || {
245
+ echo "WebDAV 上传失败"
246
+ }
247
+
248
+ # 清理过期 WebDAV 文件(保留最近 7 天)
249
+ echo "清理过期 WebDAV 文件..."
250
+ webdav_files=$(curl -s -X PROPFIND --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -H "Depth: 1" "$WEBDAV_URL/openwebui/" | grep '<d:href>' | grep 'webui_[0-9]\{8\}.db' | sed 's|</?d:href>||g')
251
+ cleanup_days=7
252
+ cutoff_date=$(date -d "-${cleanup_days} days" +%Y%m%d)
253
+
254
+ for file in $webdav_files; do
255
+ file_date=$(echo "$file" | grep -oE '[0-9]{8}')
256
+ if [ -n "$file_date" ] && [ "$file_date" -lt "$cutoff_date" ]; then
257
+ delete_url="$WEBDAV_URL/openwebui/$file"
258
+ curl -X DELETE --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$delete_url" && echo "删除过期文件: $file"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  fi
 
 
260
  done
261
  }
262
 
263
  # Hugging Face 同步函数
264
  hf_sync() {
265
+ if [ "$HF_ENABLED" != "true" ]; then
266
+ echo "Hugging Face 同步已禁用"
267
+ return
268
+ fi
269
+
270
+ echo "执行 Hugging Face 同步: $(date)"
271
+
272
+ if [ ! -f "./data/webui.db" ]; then
273
+ echo "未找到数据库文件,跳过 Hugging Face 同步"
274
+ return
275
+ fi
276
+
277
+ current_date=$(date +'%Y%m%d')
278
+ backup_file="webui_backup_${current_date}.db"
279
+ temp_path="/tmp/${backup_file}"
280
+ cp "./data/webui.db" "$temp_path"
281
+
282
+ echo "正在上传到 Hugging Face..."
283
+ python /tmp/hf_sync.py upload "$HF_TOKEN" "$DATASET_ID" "$temp_path" "$backup_file"
284
+ rm -f "$temp_path"
285
+ }
286
+
287
+ # 主函数
288
+ main() {
289
+ # 恢复数据
290
+ restore_data
291
+
292
+ # 设置同步间隔(默认2小时)
293
  SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
294
+ echo "同步间隔设置为: ${SYNC_INTERVAL} 秒"
295
 
296
+ # 循环执行同步,但使用更高效的方式
297
  while true; do
298
+ # 每次同步前先休眠,避免启动时立即同步
299
+ sleep $SYNC_INTERVAL
300
 
301
+ # 执行WebDAV同步
302
+ if [ "$WEBDAV_ENABLED" = "true" ]; then
303
+ webdav_sync
 
 
 
 
 
 
 
 
304
  fi
305
 
306
+ # 清理内存
307
+ sync
308
+ echo 3 > /proc/sys/vm/drop_caches 2>/dev/null || true
309
+
310
+ # 执行Hugging Face同步
311
+ if [ "$HF_ENABLED" = "true" ]; then
312
+ hf_sync
313
+ fi
314
+
315
+ # 清理内存
316
+ sync
317
+ echo 3 > /proc/sys/vm/drop_caches 2>/dev/null || true
318
  done
319
  }
320
 
321
+ # 以非阻塞方式启动主函数
322
+ main &
323
+ EOL
324
+
325
+ # 确保脚本可执行
326
+ chmod +x /tmp/start_sync.sh
327
+
328
+ # 修改启动脚本以包含同步功能,但在容器启动时启动而不是构建时
329
+ cat > /tmp/sync_starter.sh << 'EOL'
330
+ # 在容器启动后启动同步服务
331
+ /bin/bash /tmp/start_sync.sh &
332
+ EOL
333
+
334
+ # 注意:此处只是准备脚本,不执行它们
335
+ echo "同步脚本已准备就绪,将在容器启动时执行"