Update sync_data.sh
Browse files- sync_data.sh +80 -61
sync_data.sh
CHANGED
@@ -14,29 +14,50 @@ fi
|
|
14 |
# 创建数据目录
|
15 |
mkdir -p ./data
|
16 |
|
17 |
-
# 创建
|
18 |
cat > /tmp/hf_sync.py << 'EOL'
|
19 |
from huggingface_hub import HfApi
|
20 |
import sys
|
21 |
import os
|
22 |
|
23 |
def manage_backups(api, repo_id, max_files=50):
|
|
|
24 |
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
|
25 |
backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
|
26 |
-
backup_files.sort()
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
try:
|
32 |
-
api.delete_file(path_in_repo=
|
33 |
-
print(f'已删除旧备份: {
|
34 |
except Exception as e:
|
35 |
-
print(f'删除 {
|
36 |
|
37 |
def upload_backup(file_path, file_name, token, repo_id):
|
|
|
38 |
api = HfApi(token=token)
|
39 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
api.upload_file(
|
41 |
path_or_fileobj=file_path,
|
42 |
path_in_repo=file_name,
|
@@ -44,7 +65,6 @@ def upload_backup(file_path, file_name, token, repo_id):
|
|
44 |
repo_type="dataset"
|
45 |
)
|
46 |
print(f"成功上传 {file_name}")
|
47 |
-
|
48 |
manage_backups(api, repo_id)
|
49 |
except Exception as e:
|
50 |
print(f"文件上传出错: {str(e)}")
|
@@ -55,26 +75,24 @@ def download_latest_backup(token, repo_id):
|
|
55 |
api = HfApi(token=token)
|
56 |
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
|
57 |
backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
|
58 |
-
|
59 |
if not backup_files:
|
60 |
print("未找到备份文件")
|
61 |
return False
|
62 |
-
|
63 |
-
latest_backup = sorted(backup_files)[-1]
|
64 |
|
65 |
-
|
|
|
66 |
repo_id=repo_id,
|
67 |
filename=latest_backup,
|
68 |
repo_type="dataset"
|
69 |
)
|
70 |
|
71 |
-
if
|
72 |
os.makedirs('./data', exist_ok=True)
|
73 |
-
os.system(f'cp "{
|
74 |
print(f"成功从 {latest_backup} 恢复备份")
|
75 |
return True
|
|
|
76 |
return False
|
77 |
-
|
78 |
except Exception as e:
|
79 |
print(f"下载备份时出错: {str(e)}")
|
80 |
return False
|
@@ -103,10 +121,10 @@ echo "WebDAV 密码: $masked_password"
|
|
103 |
|
104 |
# 首先尝试从 WebDAV 恢复最新文件
|
105 |
echo "正在尝试从 WebDAV 获取文件列表..."
|
106 |
-
echo "PROPFIND 请求: $WEBDAV_URL/
|
107 |
|
108 |
# 获取并打印 WebDAV 目录内容
|
109 |
-
webdav_list_output=$(curl -v -X PROPFIND --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -H "Depth: 1" "$WEBDAV_URL/
|
110 |
echo "WebDAV 目录内容响应:"
|
111 |
echo "$webdav_list_output"
|
112 |
|
@@ -125,32 +143,34 @@ if [ -n "$webdav_files" ]; then
|
|
125 |
# 下载最新文件
|
126 |
curl -v -L --fail --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$download_url" -o "./data/webui.db" && {
|
127 |
echo "成功从 WebDAV 下载最新数据库文件: $latest_file"
|
|
|
128 |
# 上传到 HuggingFace
|
129 |
-
|
130 |
-
backup_file="webui_backup_${
|
131 |
cp ./data/webui.db "/tmp/${backup_file}"
|
132 |
-
echo "正在上传初始备份到
|
133 |
-
|
134 |
rm -f "/tmp/${backup_file}"
|
135 |
} || {
|
136 |
echo "从 WebDAV 下载最新文件失败,尝试下载 WebDAV 的 webui.db..."
|
137 |
|
138 |
# 打印 webui.db 下载链接
|
139 |
-
main_db_url="$WEBDAV_URL/
|
140 |
echo "尝试下载主数据库文件 URL: $main_db_url"
|
141 |
|
142 |
curl -v -L --fail --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_db_url" -o "./data/webui.db" && {
|
143 |
echo "成功从 WebDAV 下载主数据库文件"
|
|
|
144 |
# 上传到 HuggingFace
|
145 |
-
|
146 |
-
backup_file="webui_backup_${
|
147 |
cp ./data/webui.db "/tmp/${backup_file}"
|
148 |
-
echo "正在上传初始备份到
|
149 |
-
|
150 |
rm -f "/tmp/${backup_file}"
|
151 |
} || {
|
152 |
-
echo "从 WebDAV 下载失败,尝试从
|
153 |
-
|
154 |
echo "所有恢复方式均失败,将使用空数据库开始"
|
155 |
}
|
156 |
}
|
@@ -159,50 +179,50 @@ else
|
|
159 |
echo "WebDAV 中没有找到符合格式的备份文件,尝试下载 webui.db..."
|
160 |
|
161 |
# 打印 webui.db 下载链接
|
162 |
-
main_db_url="$WEBDAV_URL/
|
163 |
echo "尝试下载主数据库文件 URL: $main_db_url"
|
164 |
|
165 |
curl -v -L --fail --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_db_url" -o "./data/webui.db" && {
|
166 |
echo "成功从 WebDAV 下载主数据库文件"
|
|
|
167 |
# 上传到 HuggingFace
|
168 |
-
|
169 |
-
backup_file="webui_backup_${
|
170 |
cp ./data/webui.db "/tmp/${backup_file}"
|
171 |
-
echo "正在上传初始备份到
|
172 |
-
|
173 |
rm -f "/tmp/${backup_file}"
|
174 |
} || {
|
175 |
-
echo "从 WebDAV 下载失败,尝试从
|
176 |
-
|
177 |
echo "所有恢复方式均失败,将使用空数据库开始"
|
178 |
}
|
179 |
}
|
180 |
fi
|
181 |
|
182 |
# 定义 WebDAV 同步函数
|
183 |
-
|
184 |
SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认间隔时间为 7200 秒
|
185 |
echo "WebDAV 同步进程启动,等待 ${SYNC_INTERVAL} 秒后开始同步..."
|
186 |
sleep $SYNC_INTERVAL
|
187 |
-
|
188 |
while true; do
|
189 |
echo "开始 WebDAV 同步 $(date)"
|
190 |
|
191 |
# 检查数据库文件是否存在
|
192 |
if [ -f "./data/webui.db" ]; then
|
193 |
-
#
|
194 |
-
|
195 |
-
|
196 |
echo "同步到 WebDAV..."
|
197 |
-
upload_url="$WEBDAV_URL/
|
198 |
echo "上传 URL: $upload_url"
|
199 |
-
|
200 |
# 上传以日期命名的数据库文件
|
201 |
curl -v -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" && {
|
202 |
-
echo "WebDAV 上传成功: $
|
203 |
|
204 |
# 覆盖Webdav目录下默认的webui.db文件
|
205 |
-
main_file_url="$WEBDAV_URL/
|
206 |
echo "更新主文件 URL: $main_file_url"
|
207 |
curl -v -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_file_url" && {
|
208 |
echo "WebDAV 更新主文件成功"
|
@@ -220,42 +240,41 @@ webdav_sync() {
|
|
220 |
else
|
221 |
echo "未找到 webui.db 文件,跳过 WebDAV 同步"
|
222 |
fi
|
223 |
-
|
224 |
# 等待下一次同步间隔
|
225 |
echo "WebDAV 同步完成,下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
|
226 |
sleep $SYNC_INTERVAL
|
227 |
done
|
228 |
}
|
229 |
|
230 |
-
# 定义
|
231 |
-
|
232 |
SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
|
233 |
-
echo "
|
234 |
sleep $(($SYNC_INTERVAL / 2)) # 错开与 WebDAV 同步的时间
|
235 |
-
|
236 |
while true; do
|
237 |
-
echo "开始
|
238 |
|
239 |
if [ -f "./data/webui.db" ]; then
|
240 |
-
|
241 |
-
|
|
|
242 |
|
243 |
# 复制数据库文件
|
244 |
cp ./data/webui.db "/tmp/${backup_file}"
|
245 |
-
|
246 |
-
|
247 |
-
python3 /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
|
248 |
-
|
249 |
rm -f "/tmp/${backup_file}"
|
250 |
else
|
251 |
-
echo "数据库文件不存在,跳过
|
252 |
fi
|
253 |
|
254 |
-
echo "
|
255 |
sleep $SYNC_INTERVAL
|
256 |
done
|
257 |
}
|
258 |
|
259 |
# 后台启动同步进程
|
260 |
-
|
261 |
-
|
|
|
14 |
# 创建数据目录
|
15 |
mkdir -p ./data
|
16 |
|
17 |
+
# 创建 Hugging Face 同步脚本
|
18 |
cat > /tmp/hf_sync.py << 'EOL'
|
19 |
from huggingface_hub import HfApi
|
20 |
import sys
|
21 |
import os
|
22 |
|
23 |
def manage_backups(api, repo_id, max_files=50):
|
24 |
+
"""管理备份文件,保留最新的max_files个文件"""
|
25 |
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
|
26 |
backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
|
|
|
27 |
|
28 |
+
# 按日期分组文件(从文件名中提取日期)
|
29 |
+
backup_by_date = {}
|
30 |
+
for file in backup_files:
|
31 |
+
try:
|
32 |
+
# 从文件名格式 webui_backup_YYYYMMDD.db 中提取日期部分
|
33 |
+
date_part = file.split('_')[2].split('.')[0] # 提取YYYYMMDD部分
|
34 |
+
backup_by_date[date_part] = file
|
35 |
+
except:
|
36 |
+
# 如果文件名格式不符,则跳过
|
37 |
+
continue
|
38 |
+
|
39 |
+
# 日期排序并仅保留最新max_files个文件
|
40 |
+
sorted_dates = sorted(backup_by_date.keys())
|
41 |
+
if len(sorted_dates) > max_files:
|
42 |
+
dates_to_delete = sorted_dates[:(len(sorted_dates) - max_files)]
|
43 |
+
for date in dates_to_delete:
|
44 |
try:
|
45 |
+
api.delete_file(path_in_repo=backup_by_date[date], repo_id=repo_id, repo_type="dataset")
|
46 |
+
print(f'已删除旧备份: {backup_by_date[date]}')
|
47 |
except Exception as e:
|
48 |
+
print(f'删除 {backup_by_date[date]} 时出错: {str(e)}')
|
49 |
|
50 |
def upload_backup(file_path, file_name, token, repo_id):
|
51 |
+
"""上传备份文件到Hugging Face"""
|
52 |
api = HfApi(token=token)
|
53 |
try:
|
54 |
+
# 检查同名文件是否已存在
|
55 |
+
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
|
56 |
+
if file_name in files:
|
57 |
+
# 上传前删除同名文件
|
58 |
+
api.delete_file(path_in_repo=file_name, repo_id=repo_id, repo_type="dataset")
|
59 |
+
print(f"已删除同名文件: {file_name}")
|
60 |
+
|
61 |
api.upload_file(
|
62 |
path_or_fileobj=file_path,
|
63 |
path_in_repo=file_name,
|
|
|
65 |
repo_type="dataset"
|
66 |
)
|
67 |
print(f"成功上传 {file_name}")
|
|
|
68 |
manage_backups(api, repo_id)
|
69 |
except Exception as e:
|
70 |
print(f"文件上传出错: {str(e)}")
|
|
|
75 |
api = HfApi(token=token)
|
76 |
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
|
77 |
backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
|
|
|
78 |
if not backup_files:
|
79 |
print("未找到备份文件")
|
80 |
return False
|
|
|
|
|
81 |
|
82 |
+
latest_backup = sorted(backup_files)[-1]
|
83 |
+
file = api.hf_hub_download(
|
84 |
repo_id=repo_id,
|
85 |
filename=latest_backup,
|
86 |
repo_type="dataset"
|
87 |
)
|
88 |
|
89 |
+
if file and os.path.exists(file):
|
90 |
os.makedirs('./data', exist_ok=True)
|
91 |
+
os.system(f'cp "{file}" ./data/webui.db')
|
92 |
print(f"成功从 {latest_backup} 恢复备份")
|
93 |
return True
|
94 |
+
|
95 |
return False
|
|
|
96 |
except Exception as e:
|
97 |
print(f"下载备份时出错: {str(e)}")
|
98 |
return False
|
|
|
121 |
|
122 |
# 首先尝试从 WebDAV 恢复最新文件
|
123 |
echo "正在尝试从 WebDAV 获取文件列表..."
|
124 |
+
echo "PROPFIND 请求: $WEBDAV_URL/open/"
|
125 |
|
126 |
# 获取并打印 WebDAV 目录内容
|
127 |
+
webdav_list_output=$(curl -v -X PROPFIND --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -H "Depth: 1" "$WEBDAV_URL/open/" 2>&1)
|
128 |
echo "WebDAV 目录内容响应:"
|
129 |
echo "$webdav_list_output"
|
130 |
|
|
|
143 |
# 下载最新文件
|
144 |
curl -v -L --fail --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$download_url" -o "./data/webui.db" && {
|
145 |
echo "成功从 WebDAV 下载最新数据库文件: $latest_file"
|
146 |
+
|
147 |
# 上传到 HuggingFace
|
148 |
+
DATE_FORMAT=$(date +%Y%m%d)
|
149 |
+
backup_file="webui_backup_${DATE_FORMAT}.db"
|
150 |
cp ./data/webui.db "/tmp/${backup_file}"
|
151 |
+
echo "正在上传初始备份到 Hugging Face..."
|
152 |
+
python /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
|
153 |
rm -f "/tmp/${backup_file}"
|
154 |
} || {
|
155 |
echo "从 WebDAV 下载最新文件失败,尝试下载 WebDAV 的 webui.db..."
|
156 |
|
157 |
# 打印 webui.db 下载链接
|
158 |
+
main_db_url="$WEBDAV_URL/open/webui.db"
|
159 |
echo "尝试下载主数据库文件 URL: $main_db_url"
|
160 |
|
161 |
curl -v -L --fail --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_db_url" -o "./data/webui.db" && {
|
162 |
echo "成功从 WebDAV 下载主数据库文件"
|
163 |
+
|
164 |
# 上传到 HuggingFace
|
165 |
+
DATE_FORMAT=$(date +%Y%m%d)
|
166 |
+
backup_file="webui_backup_${DATE_FORMAT}.db"
|
167 |
cp ./data/webui.db "/tmp/${backup_file}"
|
168 |
+
echo "正在上传初始备份到 Hugging Face..."
|
169 |
+
python /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
|
170 |
rm -f "/tmp/${backup_file}"
|
171 |
} || {
|
172 |
+
echo "从 WebDAV 下载失败,尝试从 Hugging Face 恢复..."
|
173 |
+
python /tmp/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" || {
|
174 |
echo "所有恢复方式均失败,将使用空数据库开始"
|
175 |
}
|
176 |
}
|
|
|
179 |
echo "WebDAV 中没有找到符合格式的备份文件,尝试下载 webui.db..."
|
180 |
|
181 |
# 打印 webui.db 下载链接
|
182 |
+
main_db_url="$WEBDAV_URL/open/webui.db"
|
183 |
echo "尝试下载主数据库文件 URL: $main_db_url"
|
184 |
|
185 |
curl -v -L --fail --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_db_url" -o "./data/webui.db" && {
|
186 |
echo "成功从 WebDAV 下载主数据库文件"
|
187 |
+
|
188 |
# 上传到 HuggingFace
|
189 |
+
DATE_FORMAT=$(date +%Y%m%d)
|
190 |
+
backup_file="webui_backup_${DATE_FORMAT}.db"
|
191 |
cp ./data/webui.db "/tmp/${backup_file}"
|
192 |
+
echo "正在上传初始备份到 Hugging Face..."
|
193 |
+
python /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
|
194 |
rm -f "/tmp/${backup_file}"
|
195 |
} || {
|
196 |
+
echo "从 WebDAV 下载失败,尝试从 Hugging Face 恢复..."
|
197 |
+
python /tmp/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" || {
|
198 |
echo "所有恢复方式均失败,将使用空数据库开始"
|
199 |
}
|
200 |
}
|
201 |
fi
|
202 |
|
203 |
# 定义 WebDAV 同步函数
|
204 |
+
webdav_sync_() {
|
205 |
SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认间隔时间为 7200 秒
|
206 |
echo "WebDAV 同步进程启动,等待 ${SYNC_INTERVAL} 秒后开始同步..."
|
207 |
sleep $SYNC_INTERVAL
|
208 |
+
|
209 |
while true; do
|
210 |
echo "开始 WebDAV 同步 $(date)"
|
211 |
|
212 |
# 检查数据库文件是否存在
|
213 |
if [ -f "./data/webui.db" ]; then
|
214 |
+
# 生成仅含日期的文件名(月-日)
|
215 |
+
FILE="webui_$(date +'%m-%d').db"
|
|
|
216 |
echo "同步到 WebDAV..."
|
217 |
+
upload_url="$WEBDAV_URL/open/$FILE"
|
218 |
echo "上传 URL: $upload_url"
|
219 |
+
|
220 |
# 上传以日期命名的数据库文件
|
221 |
curl -v -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" && {
|
222 |
+
echo "WebDAV 上传成功: $FILE"
|
223 |
|
224 |
# 覆盖Webdav目录下默认的webui.db文件
|
225 |
+
main_file_url="$WEBDAV_URL/open/webui.db"
|
226 |
echo "更新主文件 URL: $main_file_url"
|
227 |
curl -v -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_file_url" && {
|
228 |
echo "WebDAV 更新主文件成功"
|
|
|
240 |
else
|
241 |
echo "未找到 webui.db 文件,跳过 WebDAV 同步"
|
242 |
fi
|
243 |
+
|
244 |
# 等待下一次同步间隔
|
245 |
echo "WebDAV 同步完成,下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
|
246 |
sleep $SYNC_INTERVAL
|
247 |
done
|
248 |
}
|
249 |
|
250 |
+
# 定义 Hugging Face 同步函数
|
251 |
+
hf_sync_() {
|
252 |
SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
|
253 |
+
echo "Hugging Face 同步进程启动,等待 ${SYNC_INTERVAL} 秒后开始同步..."
|
254 |
sleep $(($SYNC_INTERVAL / 2)) # 错开与 WebDAV 同步的时间
|
255 |
+
|
256 |
while true; do
|
257 |
+
echo "开始 Hugging Face 同步 $(date)"
|
258 |
|
259 |
if [ -f "./data/webui.db" ]; then
|
260 |
+
# 使用仅含日期的格式(年月日)
|
261 |
+
DATE_FORMAT=$(date +%Y%m%d)
|
262 |
+
backup_file="webui_backup_${DATE_FORMAT}.db"
|
263 |
|
264 |
# 复制数据库文件
|
265 |
cp ./data/webui.db "/tmp/${backup_file}"
|
266 |
+
echo "正在上传备份到 Hugging Face..."
|
267 |
+
python /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
|
|
|
|
|
268 |
rm -f "/tmp/${backup_file}"
|
269 |
else
|
270 |
+
echo "数据库文件不存在,跳过 Hugging Face 同步"
|
271 |
fi
|
272 |
|
273 |
+
echo "Hugging Face 同步完成,下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
|
274 |
sleep $SYNC_INTERVAL
|
275 |
done
|
276 |
}
|
277 |
|
278 |
# 后台启动同步进程
|
279 |
+
webdav_sync_ &
|
280 |
+
hf_sync_ &
|