web / sync_data.sh
nbugs's picture
Update sync_data.sh
4ba79ba verified
#!/bin/bash
# 检查必要的环境变量
if [ -z "$WEBDAV_URL" ] || [ -z "$WEBDAV_USERNAME" ] || [ -z "$WEBDAV_PASSWORD" ]; then
echo "缺少必要的环境变量: WEBDAV_URL、WEBDAV_USERNAME 或 WEBDAV_PASSWORD"
exit 1
fi
if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
echo "缺少必要的环境变量: HF_TOKEN 或 DATASET_ID"
exit 1
fi
# 创建数据目录
mkdir -p ./data
# 定义哈希计算函数
get_file_hash() {
local file_path="$1"
if [ -f "$file_path" ]; then
md5sum "$file_path" | awk '{print $1}'
else
echo "文件不存在"
fi
}
# 创建 Hugging Face 同步脚本
cat > /tmp/hf_sync.py << 'EOL'
from huggingface_hub import HfApi
import sys
import os
def manage_backups(api, repo_id, max_files=50):
"""管理备份文件,保留最新的max_files个文件"""
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
# 按日期分组文件(从文件名中提取日期)
backup_by_date = {}
for file in backup_files:
try:
date_part = file.split('_')[2].split('.')[0]
backup_by_date[date_part] = file
except:
continue
# 保留最新的max_files个文件
sorted_dates = sorted(backup_by_date.keys(), reverse=True)
if len(sorted_dates) > max_files:
files_to_delete = [backup_by_date[date] for date in sorted_dates[max_files:]]
for file in files_to_delete:
api.delete_file(path_in_repo=file, repo_id=repo_id, repo_type="dataset")
print(f"已删除旧备份: {file}")
def upload_backup(file_path, file_name, token, repo_id):
"""上传备份文件到Hugging Face"""
api = HfApi(token=token)
try:
# 删除同名文件(如有)
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
if file_name in files:
api.delete_file(path_in_repo=file_name, repo_id=repo_id, repo_type="dataset")
print(f"已删除同名文件: {file_name}")
# 上传新文件
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=file_name,
repo_id=repo_id,
repo_type="dataset"
)
print(f"成功上传: {file_name}")
manage_backups(api, repo_id)
except Exception as e:
print(f"上传失败: {str(e)}")
def download_latest_backup(token, repo_id):
"""从Hugging Face下载最新备份"""
api = HfApi(token=token)
try:
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
if not backup_files:
return False
# 找到最新的文件(按日期排序)
latest_file = max(backup_files, key=lambda x: x.split('_')[2].split('.')[0])
file_path = api.hf_hub_download(
repo_id=repo_id,
filename=latest_file,
repo_type="dataset"
)
if file_path and os.path.exists(file_path):
os.makedirs('./data', exist_ok=True)
os.system(f'cp "{file_path}" ./data/webui.db')
print(f"成功从Hugging Face恢复: {latest_file}")
return True
else:
return False
except Exception as e:
print(f"下载失败: {str(e)}")
return False
if __name__ == "__main__":
action = sys.argv[1]
token = sys.argv[2]
repo_id = sys.argv[3]
if action == "upload":
file_path = sys.argv[4]
file_name = sys.argv[5]
upload_backup(file_path, file_name, token, repo_id)
elif action == "download":
download_latest_backup(token, repo_id)
EOL
# 初始化数据恢复策略
echo "初始化数据恢复..."
echo "WebDAV URL: $WEBDAV_URL"
echo "WebDAV 用户名: $WEBDAV_USERNAME"
echo "WebDAV 密码: $(echo $WEBDAV_PASSWORD | sed 's/./*/g')"
# 尝试从 WebDAV 恢复最新文件
echo "尝试从 WebDAV 获取文件列表..."
webdav_files=$(curl -s -X PROPFIND --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -H "Depth: 1" "$WEBDAV_URL/openwebui/" | grep '<d:href>' | grep 'webui_[0-9]\{8\}.db' | sed 's|</?d:href>||g')
if [ -n "$webdav_files" ]; then
latest_file=$(echo "$webdav_files" | sort -r | head -n 1)
download_url="$WEBDAV_URL/openwebui/$latest_file"
curl -L -o "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$download_url" && {
echo "成功从 WebDAV 下载最新数据库: $latest_file"
} || {
echo "WebDAV 下载失败,尝试从 Hugging Face 恢复..."
python /tmp/hf_sync.py download "$HF_TOKEN" "$DATASET_ID"
}
else
echo "WebDAV 无有效备份,尝试从 Hugging Face 恢复..."
python /tmp/hf_sync.py download "$HF_TOKEN" "$DATASET_ID" || {
echo "所有恢复失败,创建空数据库..."
touch ./data/webui.db
}
fi
# WebDAV 同步函数(仅上传变化文件)
webdav_sync() {
SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认 2 小时
echo "WebDAV 同步启动,间隔: ${SYNC_INTERVAL} 秒"
while true; do
echo "开始 WebDAV 同步: $(date)"
if [ -f "./data/webui.db" ]; then
# 生成文件名(包含年月日)
current_date=$(date +'%Y%m%d')
file_name="webui_${current_date}.db"
upload_url="$WEBDAV_URL/openwebui/${file_name}"
# 计算本地文件哈希
local_hash=$(get_file_hash "./data/webui.db")
# 获取远程文件哈希(通过临时下载)
remote_temp="/tmp/webui_remote.db"
curl -s -o "$remote_temp" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" > /dev/null 2>&1
remote_hash=$(get_file_hash "$remote_temp")
rm -f "$remote_temp"
if [ "$local_hash" == "$remote_hash" ]; then
echo "文件未变化,跳过 WebDAV 上传"
else
echo "检测到文件变化,开始上传到 WebDAV..."
curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" && {
echo "WebDAV 上传成功: $file_name"
# 更新主文件(覆盖 webui.db)
main_url="$WEBDAV_URL/openwebui/webui.db"
curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$main_url" && {
echo "主文件更新成功"
} || {
echo "主文件更新失败"
}
} || {
echo "WebDAV 上传失败,等待重试..."
sleep 10
curl -L -T "./data/webui.db" --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$upload_url" || {
echo "重试失败,放弃本次上传"
}
}
fi
# 清理过期 WebDAV 文件(保留最近 7 天)
cleanup_days=7
cutoff_date=$(date -d "-${cleanup_days} days" +%Y%m%d)
for file in $webdav_files; do
file_date=$(echo "$file" | grep -oE '[0-9]{8}')
if [ "$file_date" -lt "$cutoff_date" ]; then
delete_url="$WEBDAV_URL/openwebui/$file"
curl -X DELETE --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$delete_url" && echo "删除过期文件: $file"
fi
done
else
echo "未找到 webui.db,跳过同步"
fi
sleep $SYNC_INTERVAL
done
}
# Hugging Face 同步函数
hf_sync() {
SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
echo "Hugging Face 同步启动,间隔: ${SYNC_INTERVAL} 秒"
while true; do
echo "开始 Hugging Face 同步: $(date)"
if [ -f "./data/webui.db" ]; then
current_date=$(date +'%Y%m%d')
backup_file="webui_backup_${current_date}.db"
temp_path="/tmp/${backup_file}"
cp "./data/webui.db" "$temp_path"
echo "正在上传到 Hugging Face..."
python /tmp/hf_sync.py upload "$HF_TOKEN" "$DATASET_ID" "$temp_path" "$backup_file"
rm -f "$temp_path"
else
echo "未找到数据库文件,跳过 Hugging Face 同步"
fi
sleep $SYNC_INTERVAL
done
}
# 启动同步进程
webdav_sync &
hf_sync &