#!/bin/bash if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then echo "缺少环境变量HF_TOKEN或DATASET_ID,启动服务但不启用备份功能" exec uvicorn app.main:app --host 0.0.0.0 --port 7860 exit 0 fi export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN restore_latest() { echo "正在检查备份..." python3 -c " from huggingface_hub import HfApi import os api = HfApi() files = api.list_repo_files('${DATASET_ID}', repo_type='dataset') backup_files = sorted([f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]) if backup_files: latest = backup_files[-1] print(f'找到备份文件: {latest}, 开始下载...') api.hf_hub_download('${DATASET_ID}', latest, repo_type='dataset', local_dir='/tmp') backup_path = f'/tmp/{latest}' if os.path.exists(backup_path): print(f'备份文件已下载: {backup_path}, 大小: {os.path.getsize(backup_path)} bytes') # 解压到/app/app目录,避免路径嵌套和权限问题 result = os.system(f'tar --no-same-owner --no-same-permissions --touch --warning=no-timestamp -xzf {backup_path} -C /app/app || true') exit_code = result >> 8 if exit_code == 0: print(f'成功恢复数据!') else: print(f'解压时出现次要警告或错误,请检查数据完整性,tar返回码: {result}') os.remove(backup_path) else: print('下载备份文件失败!') else: print('未发现任何备份文件,跳过恢复步骤') " } backup_upload_download_test() { echo "正在执行备份上传-下载权限完整性测试..." TEST_FILE_CONTENT="备份测试内容 $(date)" TEST_FILE_NAME="backup_test_$(date +%Y%m%d_%H%M%S).txt" LOCAL_TEST_PATH="/tmp/${TEST_FILE_NAME}" echo "${TEST_FILE_CONTENT}" > "${LOCAL_TEST_PATH}" python3 -c " from huggingface_hub import HfApi import os, sys api = HfApi() repo_id = '${DATASET_ID}' test_file_name = '${TEST_FILE_NAME}' local_test_path = '${LOCAL_TEST_PATH}' # 上传测试文件 try: api.upload_file( path_or_fileobj=local_test_path, path_in_repo=test_file_name, repo_id=repo_id, repo_type='dataset' ) print('✅ 测试文件上传成功') except Exception as e: print(f'❌ 测试文件上传失败: {e}') sys.exit(1) # 下载测试文件 try: downloaded_path = api.hf_hub_download( repo_id=repo_id, filename=test_file_name, repo_type='dataset', local_dir='/tmp' ) with open(downloaded_path, 'r') as f: content = f.read().strip() if content == '${TEST_FILE_CONTENT}': print('✅ 测试文件下载成功且内容一致') else: print('❌ 测试文件内容不一致') sys.exit(1) except Exception as e: print(f'❌ 测试文件下载失败: {e}') sys.exit(1) # 测试完成后删除测试文件 try: api.delete_file( path_in_repo=test_file_name, repo_id=repo_id, repo_type='dataset' ) print('✅ 测试文件已成功删除') except Exception as e: print(f'⚠️ 测试文件删除失败: {e}') # 清理本地临时文件 os.remove(local_test_path) os.remove(downloaded_path) " } sync_data() { echo "启动后首次备份将在${SYNC_INTERVAL:-7200}秒后执行" sleep ${SYNC_INTERVAL:-7200} while true; do echo "开始备份: $(date)" cd /app/app timestamp=$(date +%Y%m%d_%H%M%S) backup_file="backup_${timestamp}.tar.gz" if [ "$(ls -A . 2>/dev/null)" ]; then tar -czf "/tmp/${backup_file}" ./ python3 -c " from huggingface_hub import HfApi api = HfApi() api.upload_file( path_or_fileobj='/tmp/${backup_file}', path_in_repo='${backup_file}', repo_id='${DATASET_ID}', repo_type='dataset' ) print('备份上传成功') backup_files = sorted([f for f in api.list_repo_files('${DATASET_ID}', repo_type='dataset') if f.startswith('backup_')]) for old_backup in backup_files[:-50]: api.delete_file(path_in_repo=old_backup, repo_id='${DATASET_ID}', repo_type='dataset') print(f'删除旧备份: {old_backup}') " rm -f "/tmp/${backup_file}" else echo "无数据需要备份" fi echo "下次备份将在${SYNC_INTERVAL:-7200}秒后执行" sleep ${SYNC_INTERVAL:-7200} done } ( restore_latest # 恢复备份 backup_upload_download_test # 新增:首次启动备份上传下载测试 sync_data & # 后台定期备份 exec uvicorn app.main:app --host 0.0.0.0 --port 7860 ) 2>&1 | tee -a /app/data/backup.log