nbugs commited on
Commit
85c4208
·
verified ·
1 Parent(s): dce165e

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +170 -188
sync_data.sh CHANGED
@@ -2,18 +2,28 @@
2
 
3
  # 检查必要的环境变量
4
  if [ -z "$WEBDAV_URL" ] || [ -z "$WEBDAV_USERNAME" ] || [ -z "$WEBDAV_PASSWORD" ]; then
5
- echo "缺少必要的环境变量 WEBDAV_URL、WEBDAV_USERNAME 或 WEBDAV_PASSWORD"
6
  exit 1
7
  fi
8
 
9
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
10
- echo "缺少必要的环境变量 HF_TOKEN 或 DATASET_ID"
11
  exit 1
12
  fi
13
 
14
  # 创建数据目录
15
  mkdir -p ./data
16
 
 
 
 
 
 
 
 
 
 
 
17
  # 创建 Hugging Face 同步脚本
18
  cat > /tmp/hf_sync.py << 'EOL'
19
  from huggingface_hub import HfApi
@@ -21,234 +31,206 @@ import sys
21
  import os
22
 
23
  def manage_backups(api, repo_id, max_files=50):
24
- """管理备份文件,保留最新的max_files个文件"""
25
- files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
26
  backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
27
-
28
  # 按日期分组文件(从文件名中提取日期)
29
  backup_by_date = {}
30
  for file in backup_files:
31
- try:
32
- # 从文件名格式 webui_backup_YYYYMMDD.db 中提取日期部分
33
- date_part = file.split('_')[2].split('.')[0] # 提取YYYYMMDD部分
34
- backup_by_date[date_part] = file
35
- except:
36
- # 如果文件名格式不符,则跳过
37
- continue
38
-
39
- # 日期排序并仅保留最新max_files个文件
40
- sorted_dates = sorted(backup_by_date.keys())
41
  if len(sorted_dates) > max_files:
42
- dates_to_delete = sorted_dates[:(len(sorted_dates) - max_files)]
43
- for date in dates_to_delete:
44
- try:
45
- api.delete_file(path_in_repo=backup_by_date[date], repo_id=repo_id, repo_type="dataset")
46
- print(f'已删除旧备份: {backup_by_date[date]}')
47
- except Exception as e:
48
- print(f'删除 {backup_by_date[date]} 时出错: {str(e)}')
49
 
50
  def upload_backup(file_path, file_name, token, repo_id):
51
  """上传备份文件到Hugging Face"""
52
  api = HfApi(token=token)
53
  try:
54
- # 检查同名文件是否已存在
55
- files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
56
- if file_name in files:
57
- # 上传前删除同名文件
58
- api.delete_file(path_in_repo=file_name, repo_id=repo_id, repo_type="dataset")
59
- print(f"已删除同名文件: {file_name}")
60
-
61
- api.upload_file(
62
- path_or_fileobj=file_path,
63
- path_in_repo=file_name,
64
- repo_id=repo_id,
65
- repo_type="dataset"
66
- )
67
- print(f"成功上传 {file_name}")
68
- manage_backups(api, repo_id)
69
  except Exception as e:
70
- print(f"文件上传出错: {str(e)}")
71
 
72
- # 下载最新备份
73
  def download_latest_backup(token, repo_id):
 
 
74
  try:
75
- api = HfApi(token=token)
76
- files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
77
- backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
78
- if not backup_files:
79
- print("未找到备份文件")
80
- return False
81
-
82
- latest_backup = sorted(backup_files)[-1]
83
- file = api.hf_hub_download(
84
- repo_id=repo_id,
85
- filename=latest_backup,
86
- repo_type="dataset"
87
- )
88
-
89
- if file and os.path.exists(file):
90
- os.makedirs('./data', exist_ok=True)
91
- os.system(f'cp "{file}" ./data/webui.db')
92
- print(f"成功从 {latest_backup} 恢复备份")
93
- return True
94
-
95
- return False
96
  except Exception as e:
97
- print(f"下载备份时出错: {str(e)}")
98
  return False
99
 
100
  if __name__ == "__main__":
101
  action = sys.argv[1]
102
  token = sys.argv[2]
103
  repo_id = sys.argv[3]
104
-
105
  if action == "upload":
106
- file_path = sys.argv[4]
107
- file_name = sys.argv[5]
108
- upload_backup(file_path, file_name, token, repo_id)
109
  elif action == "download":
110
- download_latest_backup(token, repo_id)
111
  EOL
112
 
113
- # 首次启动时的数据恢复策略
114
- echo "开始初始化数据恢复..."
115
-
116
- # 打印 WebDAV 信息(隐藏密码)
117
  echo "WebDAV URL: $WEBDAV_URL"
118
  echo "WebDAV 用户名: $WEBDAV_USERNAME"
119
- masked_password=$(echo $WEBDAV_PASSWORD | sed 's/./*/g')
120
- echo "WebDAV 密码: $masked_password"
121
-
122
- # 首先尝试从 WebDAV 恢复最新文件
123
- echo "正在尝试从 WebDAV 获取文件列表..."
124
- echo "PROPFIND 请求: $WEBDAV_URL/openwebui/"
125
-
126
- # 获取并打印 WebDAV 目录内容
127
- webdav_list_output=$(curl -v -X PROPFIND --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -H "Depth: 1" "$WEBDAV_URL/openwebui/" 2>&1)
128
- echo "WebDAV 目录内容响应:"
129
- echo "$webdav_list_output"
130
 
131
- # 获取 WebDAV 目录中的文件列表并找出最新的备份文件
132
- webdav_files=$(echo "$webdav_list_output" | grep -o '<d:href>[^<]*webui_[0-9]\{4\}[0-9]\{2\}[0-9]\{2\}\.db</d:href>' | sed 's/<d:href>//g' | sed 's/<\/d:href>//g')
 
133
 
134
  if [ -n "$webdav_files" ]; then
135
- # 找出最新的文件 (按文件名排序,取最后一个)
136
- latest_file=$(echo "$webdav_files" | sort | tail -n 1)
137
- echo "找到最新的 WebDAV 备份文件: $latest_file"
138
-
139
- # 构建并打印完整下载链接
140
- download_url="$WEBDAV_URL$latest_file"
141
- echo "下载 URL: $download_url"
142
-
143
- # 下载最新文件
144
- curl -v -L --fail --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$download_url" -o "./data/webui.db" && {
145
- echo "成功从 WebDAV 下载最新数据库文件: $latest_file"
146
-
147
- # 上传到 HuggingFace
148
- DATE_FORMAT=$(date +%Y%m%d)
149
- backup_file="webui_backup_${DATE_FORMAT}.db"
150
- cp ./data/webui.db "/tmp/${backup_file}"
151
- echo "正在上传初始备份到 Hugging Face..."
152
- python /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
153
- rm -f "/tmp/${backup_file}"
154
  } || {
155
- echo "WebDAV 下载最新文件失败,尝试从 Hugging Face 恢复..."
156
- python /tmp/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" || {
157
- echo "所有恢复方式均失败,将使用空数据库开始"
158
- }
159
  }
160
  else
161
- echo "WebDAV 中没有找到符合格式的备份文件,尝试从 Hugging Face 恢复..."
162
- python /tmp/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" || {
163
- echo "从 Hugging Face 恢复失败,将使用空数据库开始"
 
164
  }
165
  fi
166
 
167
- # 定义 WebDAV 同步函数
168
- webdav_sync_() {
169
- SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认间隔时间为 7200
170
- echo "WebDAV 同步进程启动,等待 ${SYNC_INTERVAL} 秒后开始同步..."
171
- sleep $SYNC_INTERVAL
172
-
173
- while true; do
174
- echo "开始 WebDAV 同步 $(date)"
175
-
176
- # 检查数据库文件是否存在
177
- if [ -f "./data/webui.db" ]; then
178
- # 生成当天的日期文件名(YYYYMMDD格式)
179
- DATE_FORMAT=$(date +%Y%m%d)
180
- FILE="webui_${DATE_FORMAT}.db"
181
- echo "同步到 WebDAV..."
182
- upload_url="$WEBDAV_URL/openwebui/$FILE"
183
- echo "上传 URL: $upload_url"
184
 
185
- # 上传以日期命名的数据库文件
186
- curl -v -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" && {
187
- echo "WebDAV 上传成功: $FILE"
188
-
189
- # 删除较旧的文件以管理空间
190
- echo "获取 WebDAV 目录以清理旧文件..."
191
- webdav_files_list=$(curl -s -X PROPFIND --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -H "Depth: 1" "$WEBDAV_URL/openwebui/" | grep -o '<d:href>[^<]*webui_[0-9]\{8\}\.db</d:href>' | sed 's/<d:href>//g' | sed 's/<\/d:href>//g')
192
-
193
- # 保留最新的10个文件
194
- if [ -n "$webdav_files_list" ]; then
195
- files_to_keep=$(echo "$webdav_files_list" | sort | tail -n 10)
196
-
197
- # 删除较旧的文件
198
- for old_file in $webdav_files_list; do
199
- if ! echo "$files_to_keep" | grep -q "$old_file"; then
200
- echo "删除旧文件: $old_file"
201
- curl -X DELETE --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$WEBDAV_URL$old_file"
202
- fi
203
- done
204
- fi
205
-
206
- } || {
207
- echo "WebDAV 上传失败,等待重试..."
208
- sleep 10
209
- echo "重试上传 URL: $upload_url"
210
- curl -v -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" || {
211
- echo "重试失败,放弃上传。"
212
- }
213
- }
214
- else
215
- echo "未找到 webui.db 文件,跳过 WebDAV 同步"
216
- fi
217
-
218
- # 等待下一次同步间隔
219
- echo "WebDAV 同步完成,下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
220
- sleep $SYNC_INTERVAL
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  done
222
  }
223
 
224
- # 定义 Hugging Face 同步函数
225
- hf_sync_() {
226
  SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
227
- echo "Hugging Face 同步进程启动,等待 ${SYNC_INTERVAL} 秒后开始同步..."
228
- sleep $(($SYNC_INTERVAL / 2)) # 错开与 WebDAV 同步的时间
229
-
230
- while true; do
231
- echo "开始 Hugging Face 同步 $(date)"
232
-
233
- if [ -f "./data/webui.db" ]; then
234
- # 使用仅���日期的格式(年月日)
235
- DATE_FORMAT=$(date +%Y%m%d)
236
- backup_file="webui_backup_${DATE_FORMAT}.db"
237
 
238
- # 复制数据库文件
239
- cp ./data/webui.db "/tmp/${backup_file}"
240
- echo "正在上传备份到 Hugging Face..."
241
- python /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
242
- rm -f "/tmp/${backup_file}"
243
- else
244
- echo "数据库文件不存在,跳过 Hugging Face 同步"
245
- fi
246
-
247
- echo "Hugging Face 同步完成,下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
248
- sleep $SYNC_INTERVAL
 
 
 
 
 
 
249
  done
250
  }
251
 
252
- # 后台启动同步进程
253
- webdav_sync_ &
254
- hf_sync_ &
 
2
 
3
  # 检查必要的环境变量
4
  if [ -z "$WEBDAV_URL" ] || [ -z "$WEBDAV_USERNAME" ] || [ -z "$WEBDAV_PASSWORD" ]; then
5
+ echo "缺少必要的环境变量: WEBDAV_URL、WEBDAV_USERNAME 或 WEBDAV_PASSWORD"
6
  exit 1
7
  fi
8
 
9
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
10
+ echo "缺少必要的环境变量: HF_TOKEN 或 DATASET_ID"
11
  exit 1
12
  fi
13
 
14
  # 创建数据目录
15
  mkdir -p ./data
16
 
17
+ # 定义哈希计算函数
18
+ get_file_hash() {
19
+ local file_path="$1"
20
+ if [ -f "$file_path" ]; then
21
+ md5sum "$file_path" | awk '{print $1}'
22
+ else
23
+ echo "文件不存在"
24
+ fi
25
+ }
26
+
27
  # 创建 Hugging Face 同步脚本
28
  cat > /tmp/hf_sync.py << 'EOL'
29
  from huggingface_hub import HfApi
 
31
  import os
32
 
33
  def manage_backups(api, repo_id, max_files=50):
34
+ """管理备份文件,保留最新的max_files个文件"""
35
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
36
  backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
37
+
38
  # 按日期分组文件(从文件名中提取日期)
39
  backup_by_date = {}
40
  for file in backup_files:
41
+ try:
42
+ date_part = file.split('_')[2].split('.')[0]
43
+ backup_by_date[date_part] = file
44
+ except:
45
+ continue
46
+
47
+ # 保留最新的max_files个文件
48
+ sorted_dates = sorted(backup_by_date.keys(), reverse=True)
 
 
49
  if len(sorted_dates) > max_files:
50
+ files_to_delete = [backup_by_date[date] for date in sorted_dates[max_files:]]
51
+ for file in files_to_delete:
52
+ api.delete_file(path_in_repo=file, repo_id=repo_id, repo_type="dataset")
53
+ print(f"已删除旧备份: {file}")
 
 
 
54
 
55
  def upload_backup(file_path, file_name, token, repo_id):
56
  """上传备份文件到Hugging Face"""
57
  api = HfApi(token=token)
58
  try:
59
+ # 删除同名文件(如有)
60
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
61
+ if file_name in files:
62
+ api.delete_file(path_in_repo=file_name, repo_id=repo_id, repo_type="dataset")
63
+ print(f"已删除同名文件: {file_name}")
64
+
65
+ # 上传新文件
66
+ api.upload_file(
67
+ path_or_fileobj=file_path,
68
+ path_in_repo=file_name,
69
+ repo_id=repo_id,
70
+ repo_type="dataset"
71
+ )
72
+ print(f"成功上传: {file_name}")
73
+ manage_backups(api, repo_id)
74
  except Exception as e:
75
+ print(f"上传失败: {str(e)}")
76
 
 
77
  def download_latest_backup(token, repo_id):
78
+ """从Hugging Face下载最新备份"""
79
+ api = HfApi(token=token)
80
  try:
81
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
82
+ backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
83
+ if not backup_files:
84
+ return False
85
+
86
+ # 找到最新的文件(按日期排序)
87
+ latest_file = max(backup_files, key=lambda x: x.split('_')[2].split('.')[0])
88
+ file_path = api.hf_hub_download(
89
+ repo_id=repo_id,
90
+ filename=latest_file,
91
+ repo_type="dataset"
92
+ )
93
+
94
+ if file_path and os.path.exists(file_path):
95
+ os.makedirs('./data', exist_ok=True)
96
+ os.system(f'cp "{file_path}" ./data/webui.db')
97
+ print(f"成功从Hugging Face恢复: {latest_file}")
98
+ return True
99
+ else:
100
+ return False
 
101
  except Exception as e:
102
+ print(f"下载失败: {str(e)}")
103
  return False
104
 
105
  if __name__ == "__main__":
106
  action = sys.argv[1]
107
  token = sys.argv[2]
108
  repo_id = sys.argv[3]
109
+
110
  if action == "upload":
111
+ file_path = sys.argv[4]
112
+ file_name = sys.argv[5]
113
+ upload_backup(file_path, file_name, token, repo_id)
114
  elif action == "download":
115
+ download_latest_backup(token, repo_id)
116
  EOL
117
 
118
+ # 初始化数据恢复策略
119
+ echo "初始化数据恢复..."
 
 
120
  echo "WebDAV URL: $WEBDAV_URL"
121
  echo "WebDAV 用户名: $WEBDAV_USERNAME"
122
+ echo "WebDAV 密码: $(echo $WEBDAV_PASSWORD | sed 's/./*/g')"
 
 
 
 
 
 
 
 
 
 
123
 
124
+ # 尝试从 WebDAV 恢复最新文件
125
+ echo "尝试从 WebDAV 获取文件列表..."
126
+ webdav_files=$(curl -s -X PROPFIND --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -H "Depth: 1" "$WEBDAV_URL/openwebui/" | grep '<d:href>' | grep 'webui_[0-9]\{8\}.db' | sed 's|</?d:href>||g')
127
 
128
  if [ -n "$webdav_files" ]; then
129
+ latest_file=$(echo "$webdav_files" | sort -r | head -n 1)
130
+ download_url="$WEBDAV_URL/openwebui/$latest_file"
131
+ curl -L -o "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$download_url" && {
132
+ echo "成功从 WebDAV 下载最新数据库: $latest_file"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  } || {
134
+ echo "WebDAV 下载失败,尝试从 Hugging Face 恢复..."
135
+ python /tmp/hf_sync.py download "$HF_TOKEN" "$DATASET_ID"
 
 
136
  }
137
  else
138
+ echo "WebDAV 无有效备份,尝试从 Hugging Face 恢复..."
139
+ python /tmp/hf_sync.py download "$HF_TOKEN" "$DATASET_ID" || {
140
+ echo "所有恢复失败,创建空数据库..."
141
+ touch ./data/webui.db
142
  }
143
  fi
144
 
145
+ # WebDAV 同步函数(仅上传变化文件)
146
+ webdav_sync() {
147
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认 2 小时
148
+ echo "WebDAV 同步启动,间隔: ${SYNC_INTERVAL} "
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
+ while true; do
151
+ echo "开始 WebDAV 同步: $(date)"
152
+
153
+ if [ -f "./data/webui.db" ]; then
154
+ # 生成文件名(包含年月日)
155
+ current_date=$(date +'%Y%m%d')
156
+ file_name="webui_${current_date}.db"
157
+ upload_url="$WEBDAV_URL/openwebui/${file_name}"
158
+
159
+ # 计算本地文件哈希
160
+ local_hash=$(get_file_hash "./data/webui.db")
161
+
162
+ # 获取远程文件哈希(通过临时下载)
163
+ remote_temp="/tmp/webui_remote.db"
164
+ curl -s -o "$remote_temp" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" > /dev/null 2>&1
165
+ remote_hash=$(get_file_hash "$remote_temp")
166
+ rm -f "$remote_temp"
167
+
168
+ if [ "$local_hash" == "$remote_hash" ]; then
169
+ echo "文件未变化,跳过 WebDAV 上传"
170
+ else
171
+ echo "检测到文件变化,开始上传到 WebDAV..."
172
+ curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" && {
173
+ echo "WebDAV 上传成功: $file_name"
174
+
175
+ # 更新主文件(覆盖 webui.db
176
+ main_url="$WEBDAV_URL/openwebui/webui.db"
177
+ curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_url" && {
178
+ echo "主文件更新成功"
179
+ } || {
180
+ echo "主文件更新失败"
181
+ }
182
+ } || {
183
+ echo "WebDAV 上传失败,等待重试..."
184
+ sleep 10
185
+ curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" || {
186
+ echo "重试失败,放弃本次上传"
187
+ }
188
+ }
189
+ fi
190
+
191
+ # 清理过期 WebDAV 文件(保留最近 7 天)
192
+ cleanup_days=7
193
+ cutoff_date=$(date -d "-${cleanup_days} days" +%Y%m%d)
194
+ for file in $webdav_files; do
195
+ file_date=$(echo "$file" | grep -oE '[0-9]{8}')
196
+ if [ "$file_date" -lt "$cutoff_date" ]; then
197
+ delete_url="$WEBDAV_URL/openwebui/$file"
198
+ curl -X DELETE --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$delete_url" && echo "删除过期文件: $file"
199
+ fi
200
+ done
201
+ else
202
+ echo "未找到 webui.db,跳过同步"
203
+ fi
204
+
205
+ sleep $SYNC_INTERVAL
206
  done
207
  }
208
 
209
+ # Hugging Face 同步函数
210
+ hf_sync() {
211
  SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
212
+ echo "Hugging Face 同步启动,间隔: ${SYNC_INTERVAL} "
 
 
 
 
 
 
 
 
 
213
 
214
+ while true; do
215
+ echo "开始 Hugging Face 同步: $(date)"
216
+
217
+ if [ -f "./data/webui.db" ]; then
218
+ current_date=$(date +'%Y%m%d')
219
+ backup_file="webui_backup_${current_date}.db"
220
+ temp_path="/tmp/${backup_file}"
221
+ cp "./data/webui.db" "$temp_path"
222
+
223
+ echo "正在上传到 Hugging Face..."
224
+ python /tmp/hf_sync.py upload "$HF_TOKEN" "$DATASET_ID" "$temp_path" "$backup_file"
225
+ rm -f "$temp_path"
226
+ else
227
+ echo "未找到数据库文件,跳过 Hugging Face 同步"
228
+ fi
229
+
230
+ sleep $SYNC_INTERVAL
231
  done
232
  }
233
 
234
+ # 启动同步进程
235
+ webdav_sync &
236
+ hf_sync &