Update sync_data.sh
Browse files- sync_data.sh +210 -134
sync_data.sh
CHANGED
@@ -1,160 +1,236 @@
|
|
1 |
#!/bin/bash
|
2 |
-
# sync_data.sh - 负责数据恢复和启动后台同步任务
|
3 |
|
4 |
-
#
|
5 |
-
set -e
|
6 |
-
|
7 |
-
# --- 配置 ---
|
8 |
-
# WebDAV 相关环境变量由 Hugging Face Secrets 提供:
|
9 |
-
# WEBDAV_URL, WEBDAV_USERNAME, WEBDAV_PASSWORD
|
10 |
-
# Hugging Face 相关环境变量由 Hugging Face Secrets 提供:
|
11 |
-
# HF_TOKEN, DATASET_ID
|
12 |
-
WEBDAV_BASE_PATH="openwebui" # WebDAV上的基础目录名
|
13 |
-
LOCAL_DATA_DIR="./data" # 本地数据存储目录
|
14 |
-
DB_FILENAME="webui.db" # 数据库文件名
|
15 |
-
WEBDAV_SYNC_INTERVAL=${WEBDAV_SYNC_INTERVAL:-7200} # WebDAV同步间隔(秒),默认2小时
|
16 |
-
HF_SYNC_INTERVAL=${HF_SYNC_INTERVAL:-7200} # Hugging Face同步间隔(秒),默认2小时
|
17 |
-
WEBDAV_CLEANUP_DAYS=7 # WebDAV保留最近多少天的备份
|
18 |
-
HF_MAX_BACKUPS=50 # Hugging Face保留的最大备份数量 (通过hf_sync.py控制)
|
19 |
-
|
20 |
-
# --- 检查环境变量 ---
|
21 |
if [ -z "$WEBDAV_URL" ] || [ -z "$WEBDAV_USERNAME" ] || [ -z "$WEBDAV_PASSWORD" ]; then
|
22 |
-
echo "
|
23 |
exit 1
|
24 |
fi
|
|
|
25 |
if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
|
26 |
-
echo "
|
27 |
exit 1
|
28 |
fi
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
-
WEBDAV_FULL_PATH="$WEBDAV_URL/$WEBDAV_BASE_PATH"
|
33 |
|
34 |
-
#
|
35 |
-
# 计算文件MD5哈希
|
36 |
get_file_hash() {
|
37 |
local file_path="$1"
|
38 |
if [ -f "$file_path" ]; then
|
39 |
md5sum "$file_path" | awk '{print $1}'
|
40 |
else
|
41 |
-
echo "
|
42 |
fi
|
43 |
}
|
44 |
|
45 |
-
#
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
else
|
101 |
-
echo "
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
echo "[恢复] WebDAV恢复失败或未找到文件,尝试从 Hugging Face ($DATASET_ID) 恢复..."
|
107 |
-
# 调用 Python 脚本进行下载,传递目标目录
|
108 |
-
if python3 /app/hf_sync.py download "$HF_TOKEN" "$DATASET_ID" "$LOCAL_DATA_DIR"; then
|
109 |
-
echo "[恢复] 成功从 Hugging Face 恢复。"
|
110 |
-
recovered=true
|
111 |
-
else
|
112 |
-
echo "[恢复] 从 Hugging Face 恢复也失败了。"
|
113 |
-
fi
|
114 |
-
fi
|
115 |
-
|
116 |
-
# 3. 如果所有恢复都失败,并且本地数据库文件不存在,则创建空文件
|
117 |
-
if [ "$recovered" != true ] && [ ! -f "$LOCAL_DB_PATH" ]; then
|
118 |
-
echo "[恢复] 所有恢复方式均失败,创建空的数据库文件: $LOCAL_DB_PATH"
|
119 |
-
touch "$LOCAL_DB_PATH"
|
120 |
fi
|
121 |
|
122 |
-
|
123 |
-
|
124 |
-
# --- 后台同步任务 ---
|
125 |
-
|
126 |
-
# WebDAV 同步函数
|
127 |
webdav_sync() {
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
while true; do
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
else
|
142 |
-
echo "
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
echo "[WebDAV Sync @ $timestamp] 主文件更新成功。"
|
157 |
-
last_uploaded_hash=$current_hash # 只有主文件也成功才更新哈希
|
158 |
-
else
|
159 |
-
echo "[WebDAV Sync @ $timestamp] 错误:主文件更新失败!"
|
160 |
-
# 主文件失败,不更新哈希,下次会
|
|
|
1 |
#!/bin/bash
|
|
|
2 |
|
3 |
+
# 检查必要的环境变量
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
if [ -z "$WEBDAV_URL" ] || [ -z "$WEBDAV_USERNAME" ] || [ -z "$WEBDAV_PASSWORD" ]; then
|
5 |
+
echo "缺少必要的环境变量: WEBDAV_URL、WEBDAV_USERNAME 或 WEBDAV_PASSWORD"
|
6 |
exit 1
|
7 |
fi
|
8 |
+
|
9 |
if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
|
10 |
+
echo "缺少必要的环境变量: HF_TOKEN 或 DATASET_ID"
|
11 |
exit 1
|
12 |
fi
|
13 |
|
14 |
+
# 创建数据目录
|
15 |
+
mkdir -p ./data
|
|
|
16 |
|
17 |
+
# 定义哈希计算函数
|
|
|
18 |
get_file_hash() {
|
19 |
local file_path="$1"
|
20 |
if [ -f "$file_path" ]; then
|
21 |
md5sum "$file_path" | awk '{print $1}'
|
22 |
else
|
23 |
+
echo "文件不存在"
|
24 |
fi
|
25 |
}
|
26 |
|
27 |
+
# 创建 Hugging Face 同步脚本
|
28 |
+
cat > /tmp/hf_sync.py << 'EOL'
|
29 |
+
from huggingface_hub import HfApi
|
30 |
+
import sys
|
31 |
+
import os
|
32 |
+
|
33 |
+
def manage_backups(api, repo_id, max_files=50):
|
34 |
+
"""管理备份文件,保留最新的max_files个文件"""
|
35 |
+
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
|
36 |
+
backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
|
37 |
+
|
38 |
+
# 按日期分组文件(从文件名中提取日期)
|
39 |
+
backup_by_date = {}
|
40 |
+
for file in backup_files:
|
41 |
+
try:
|
42 |
+
date_part = file.split('_')[2].split('.')[0]
|
43 |
+
backup_by_date[date_part] = file
|
44 |
+
except:
|
45 |
+
continue
|
46 |
+
|
47 |
+
# 保留最新的max_files个文件
|
48 |
+
sorted_dates = sorted(backup_by_date.keys(), reverse=True)
|
49 |
+
if len(sorted_dates) > max_files:
|
50 |
+
files_to_delete = [backup_by_date[date] for date in sorted_dates[max_files:]]
|
51 |
+
for file in files_to_delete:
|
52 |
+
api.delete_file(path_in_repo=file, repo_id=repo_id, repo_type="dataset")
|
53 |
+
print(f"已删除旧备份: {file}")
|
54 |
+
|
55 |
+
def upload_backup(file_path, file_name, token, repo_id):
|
56 |
+
"""上传备份文件到Hugging Face"""
|
57 |
+
api = HfApi(token=token)
|
58 |
+
try:
|
59 |
+
# 删除同名文件(如有)
|
60 |
+
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
|
61 |
+
if file_name in files:
|
62 |
+
api.delete_file(path_in_repo=file_name, repo_id=repo_id, repo_type="dataset")
|
63 |
+
print(f"已删除同名文件: {file_name}")
|
64 |
+
|
65 |
+
# 上传新文件
|
66 |
+
api.upload_file(
|
67 |
+
path_or_fileobj=file_path,
|
68 |
+
path_in_repo=file_name,
|
69 |
+
repo_id=repo_id,
|
70 |
+
repo_type="dataset"
|
71 |
+
)
|
72 |
+
print(f"成功上传: {file_name}")
|
73 |
+
manage_backups(api, repo_id)
|
74 |
+
except Exception as e:
|
75 |
+
print(f"上传失败: {str(e)}")
|
76 |
+
|
77 |
+
def download_latest_backup(token, repo_id):
|
78 |
+
"""从Hugging Face下载最新备份"""
|
79 |
+
api = HfApi(token=token)
|
80 |
+
try:
|
81 |
+
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
|
82 |
+
backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
|
83 |
+
if not backup_files:
|
84 |
+
return False
|
85 |
+
|
86 |
+
# 找到最新的文件(按日期排序)
|
87 |
+
latest_file = max(backup_files, key=lambda x: x.split('_')[2].split('.')[0])
|
88 |
+
file_path = api.hf_hub_download(
|
89 |
+
repo_id=repo_id,
|
90 |
+
filename=latest_file,
|
91 |
+
repo_type="dataset"
|
92 |
+
)
|
93 |
+
|
94 |
+
if file_path and os.path.exists(file_path):
|
95 |
+
os.makedirs('./data', exist_ok=True)
|
96 |
+
os.system(f'cp "{file_path}" ./data/webui.db')
|
97 |
+
print(f"成功从Hugging Face恢复: {latest_file}")
|
98 |
+
return True
|
99 |
+
else:
|
100 |
+
return False
|
101 |
+
except Exception as e:
|
102 |
+
print(f"下载失败: {str(e)}")
|
103 |
+
return False
|
104 |
+
|
105 |
+
if __name__ == "__main__":
|
106 |
+
action = sys.argv[1]
|
107 |
+
token = sys.argv[2]
|
108 |
+
repo_id = sys.argv[3]
|
109 |
+
|
110 |
+
if action == "upload":
|
111 |
+
file_path = sys.argv[4]
|
112 |
+
file_name = sys.argv[5]
|
113 |
+
upload_backup(file_path, file_name, token, repo_id)
|
114 |
+
elif action == "download":
|
115 |
+
download_latest_backup(token, repo_id)
|
116 |
+
EOL
|
117 |
+
|
118 |
+
# 初始化数据恢复策略
|
119 |
+
echo "初始化数据恢复..."
|
120 |
+
echo "WebDAV URL: $WEBDAV_URL"
|
121 |
+
echo "WebDAV 用户名: $WEBDAV_USERNAME"
|
122 |
+
echo "WebDAV 密码: $(echo $WEBDAV_PASSWORD | sed 's/./*/g')"
|
123 |
+
|
124 |
+
# 尝试从 WebDAV 恢复最新文件
|
125 |
+
echo "尝试从 WebDAV 获取文件列表..."
|
126 |
+
webdav_files=$(curl -s -X PROPFIND --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -H "Depth: 1" "$WEBDAV_URL/openwebui/" | grep '<d:href>' | grep 'webui_[0-9]\{8\}.db' | sed 's|</?d:href>||g')
|
127 |
+
|
128 |
+
if [ -n "$webdav_files" ]; then
|
129 |
+
latest_file=$(echo "$webdav_files" | sort -r | head -n 1)
|
130 |
+
download_url="$WEBDAV_URL/openwebui/$latest_file"
|
131 |
+
curl -L -o "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$download_url" && {
|
132 |
+
echo "成功从 WebDAV 下载最新数据库: $latest_file"
|
133 |
+
} || {
|
134 |
+
echo "WebDAV 下载失败,尝试从 Hugging Face 恢复..."
|
135 |
+
python /tmp/hf_sync.py download "$HF_TOKEN" "$DATASET_ID"
|
136 |
+
}
|
137 |
else
|
138 |
+
echo "WebDAV 无有效备份,尝试从 Hugging Face 恢复..."
|
139 |
+
python /tmp/hf_sync.py download "$HF_TOKEN" "$DATASET_ID" || {
|
140 |
+
echo "所有恢复失败,创建空数据库..."
|
141 |
+
touch ./data/webui.db
|
142 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
fi
|
144 |
|
145 |
+
# WebDAV 同步函数(仅上传变化文件)
|
|
|
|
|
|
|
|
|
146 |
webdav_sync() {
|
147 |
+
SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认 2 小时
|
148 |
+
echo "WebDAV 同步启动,间隔: ${SYNC_INTERVAL} 秒"
|
149 |
+
|
150 |
while true; do
|
151 |
+
echo "开始 WebDAV 同步: $(date)"
|
152 |
+
|
153 |
+
if [ -f "./data/webui.db" ]; then
|
154 |
+
# 生成文件名(包含年月日)
|
155 |
+
current_date=$(date +'%Y%m%d')
|
156 |
+
file_name="webui_${current_date}.db"
|
157 |
+
upload_url="$WEBDAV_URL/openwebui/${file_name}"
|
158 |
+
|
159 |
+
# 计算本地文件哈希
|
160 |
+
local_hash=$(get_file_hash "./data/webui.db")
|
161 |
+
|
162 |
+
# 获取远程文件哈希(通过临时下载)
|
163 |
+
remote_temp="/tmp/webui_remote.db"
|
164 |
+
curl -s -o "$remote_temp" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" > /dev/null 2>&1
|
165 |
+
remote_hash=$(get_file_hash "$remote_temp")
|
166 |
+
rm -f "$remote_temp"
|
167 |
+
|
168 |
+
if [ "$local_hash" == "$remote_hash" ]; then
|
169 |
+
echo "文件未变化,跳过 WebDAV 上传"
|
170 |
else
|
171 |
+
echo "检测到文件变化,开始上传到 WebDAV..."
|
172 |
+
curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" && {
|
173 |
+
echo "WebDAV 上传成功: $file_name"
|
174 |
+
|
175 |
+
# 更新主文件(覆盖 webui.db)
|
176 |
+
main_url="$WEBDAV_URL/openwebui/webui.db"
|
177 |
+
curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_url" && {
|
178 |
+
echo "主文件更新成功"
|
179 |
+
} || {
|
180 |
+
echo "主文件更新失败"
|
181 |
+
}
|
182 |
+
} || {
|
183 |
+
echo "WebDAV 上传失败,等待重试..."
|
184 |
+
sleep 10
|
185 |
+
curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" || {
|
186 |
+
echo "重试失败,放弃本次上传"
|
187 |
+
}
|
188 |
+
}
|
189 |
+
fi
|
190 |
+
|
191 |
+
# 清理过期 WebDAV 文件(保留最近 7 天)
|
192 |
+
cleanup_days=7
|
193 |
+
cutoff_date=$(date -d "-${cleanup_days} days" +%Y%m%d)
|
194 |
+
for file in $webdav_files; do
|
195 |
+
file_date=$(echo "$file" | grep -oE '[0-9]{8}')
|
196 |
+
if [ "$file_date" -lt "$cutoff_date" ]; then
|
197 |
+
delete_url="$WEBDAV_URL/openwebui/$file"
|
198 |
+
curl -X DELETE --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$delete_url" && echo "删除过期文件: $file"
|
199 |
+
fi
|
200 |
+
done
|
201 |
+
else
|
202 |
+
echo "未找到 webui.db,跳过同步"
|
203 |
+
fi
|
204 |
+
|
205 |
+
sleep $SYNC_INTERVAL
|
206 |
+
done
|
207 |
+
}
|
208 |
|
209 |
+
# Hugging Face 同步函数
|
210 |
+
hf_sync() {
|
211 |
+
SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
|
212 |
+
echo "Hugging Face 同步启动,间隔: ${SYNC_INTERVAL} 秒"
|
213 |
+
|
214 |
+
while true; do
|
215 |
+
echo "开始 Hugging Face 同步: $(date)"
|
216 |
+
|
217 |
+
if [ -f "./data/webui.db" ]; then
|
218 |
+
current_date=$(date +'%Y%m%d')
|
219 |
+
backup_file="webui_backup_${current_date}.db"
|
220 |
+
temp_path="/tmp/${backup_file}"
|
221 |
+
cp "./data/webui.db" "$temp_path"
|
222 |
+
|
223 |
+
echo "正在上传到 Hugging Face..."
|
224 |
+
python /tmp/hf_sync.py upload "$HF_TOKEN" "$DATASET_ID" "$temp_path" "$backup_file"
|
225 |
+
rm -f "$temp_path"
|
226 |
+
else
|
227 |
+
echo "未找到数据库文件,跳过 Hugging Face 同��"
|
228 |
+
fi
|
229 |
+
|
230 |
+
sleep $SYNC_INTERVAL
|
231 |
+
done
|
232 |
+
}
|
233 |
|
234 |
+
# 启动同步进程
|
235 |
+
webdav_sync &
|
236 |
+
hf_sync &
|
|
|
|
|
|
|
|
|
|