File size: 4,654 Bytes
398ee84
 
 
d905c44
398ee84
 
 
 
 
 
 
d905c44
398ee84
2ebc237
 
febf61a
9506a5d
febf61a
9506a5d
febf61a
9506a5d
 
d905c44
 
bcaf9a9
 
 
d905c44
 
 
 
 
 
 
 
 
bcaf9a9
d905c44
bcaf9a9
d905c44
9506a5d
 
2ebc237
b9b6493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9506a5d
d905c44
febf61a
 
9506a5d
d905c44
 
9506a5d
 
febf61a
d905c44
 
9506a5d
 
 
8f1f74b
 
 
 
 
 
d905c44
9506a5d
 
8f1f74b
d905c44
398ee84
9506a5d
 
d905c44
9506a5d
d905c44
9506a5d
 
398ee84
 
8f1f74b
398ee84
b9b6493
 
 
398ee84
 
b9b6493
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/bin/bash

if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
    echo "缺少环境变量HF_TOKEN或DATASET_ID,启动服务但不启用备份功能"
    exec uvicorn app.main:app --host 0.0.0.0 --port 7860
    exit 0
fi

export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN

restore_latest() {
    echo "正在检查备份..."
    python3 -c "
from huggingface_hub import HfApi
import os

api = HfApi()
files = api.list_repo_files('${DATASET_ID}', repo_type='dataset')
backup_files = sorted([f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')])

if backup_files:
    latest = backup_files[-1]
    print(f'找到备份文件: {latest}, 开始下载...')
    api.hf_hub_download('${DATASET_ID}', latest, repo_type='dataset', local_dir='/tmp')

    backup_path = f'/tmp/{latest}'
    if os.path.exists(backup_path):
        print(f'备份文件已下载: {backup_path}, 大小: {os.path.getsize(backup_path)} bytes')
        # 解压到/app/app目录,避免路径嵌套和权限问题
        result = os.system(f'tar --no-same-owner --no-same-permissions --touch --warning=no-timestamp -xzf {backup_path} -C /app/app || true')
        exit_code = result >> 8
        if exit_code == 0:
            print(f'成功恢复数据!')
        else:
            print(f'解压时出现次要警告或错误,请检查数据完整性,tar返回码: {result}')
        os.remove(backup_path)
    else:
        print('下载备份文件失败!')
else:
    print('未发现任何备份文件,跳过恢复步骤')
"
}

backup_upload_download_test() {
    echo "正在执行备份上传-下载权限完整性测试..."

    TEST_FILE_CONTENT="备份测试内容 $(date)"
    TEST_FILE_NAME="backup_test_$(date +%Y%m%d_%H%M%S).txt"
    LOCAL_TEST_PATH="/tmp/${TEST_FILE_NAME}"

    echo "${TEST_FILE_CONTENT}" > "${LOCAL_TEST_PATH}"

    python3 -c "
from huggingface_hub import HfApi
import os, sys

api = HfApi()
repo_id = '${DATASET_ID}'
test_file_name = '${TEST_FILE_NAME}'
local_test_path = '${LOCAL_TEST_PATH}'

# 上传测试文件
try:
    api.upload_file(
        path_or_fileobj=local_test_path,
        path_in_repo=test_file_name,
        repo_id=repo_id,
        repo_type='dataset'
    )
    print('✅ 测试文件上传成功')
except Exception as e:
    print(f'❌ 测试文件上传失败: {e}')
    sys.exit(1)

# 下载测试文件
try:
    downloaded_path = api.hf_hub_download(
        repo_id=repo_id,
        filename=test_file_name,
        repo_type='dataset',
        local_dir='/tmp'
    )
    with open(downloaded_path, 'r') as f:
        content = f.read().strip()
    if content == '${TEST_FILE_CONTENT}':
        print('✅ 测试文件下载成功且内容一致')
    else:
        print('❌ 测试文件内容不一致')
        sys.exit(1)
except Exception as e:
    print(f'❌ 测试文件下载失败: {e}')
    sys.exit(1)

# 测试完成后删除测试文件
try:
    api.delete_file(
        path_in_repo=test_file_name,
        repo_id=repo_id,
        repo_type='dataset'
    )
    print('✅ 测试文件已成功删除')
except Exception as e:
    print(f'⚠️ 测试文件删除失败: {e}')

# 清理本地临时文件
os.remove(local_test_path)
os.remove(downloaded_path)
"
}


sync_data() {
    echo "启动后首次备份将在${SYNC_INTERVAL:-7200}秒后执行"
    sleep ${SYNC_INTERVAL:-7200}

    while true; do
        echo "开始备份: $(date)"
        cd /app/app
        timestamp=$(date +%Y%m%d_%H%M%S)
        backup_file="backup_${timestamp}.tar.gz"

        if [ "$(ls -A . 2>/dev/null)" ]; then
            tar -czf "/tmp/${backup_file}" ./
            python3 -c "
from huggingface_hub import HfApi
api = HfApi()
api.upload_file(
    path_or_fileobj='/tmp/${backup_file}',
    path_in_repo='${backup_file}',
    repo_id='${DATASET_ID}',
    repo_type='dataset'
)
print('备份上传成功')
backup_files = sorted([f for f in api.list_repo_files('${DATASET_ID}', repo_type='dataset') if f.startswith('backup_')])
for old_backup in backup_files[:-50]:
    api.delete_file(path_in_repo=old_backup, repo_id='${DATASET_ID}', repo_type='dataset')
    print(f'删除旧备份: {old_backup}')
"
            rm -f "/tmp/${backup_file}"
        else
            echo "无数据需要备份"
        fi
        echo "下次备份将在${SYNC_INTERVAL:-7200}秒后执行"
        sleep ${SYNC_INTERVAL:-7200}
    done
}


(
    restore_latest          # 恢复备份
    backup_upload_download_test  # 新增:首次启动备份上传下载测试
    sync_data &             # 后台定期备份
    exec uvicorn app.main:app --host 0.0.0.0 --port 7860
) 2>&1 | tee -a /app/data/backup.log