cronjob / app.py
testdeep123's picture
Update app.py
b3566cd verified
raw
history blame
20.4 kB
import os
import shutil
import zipfile
import threading
import time
import humanize
from flask import Flask, request, jsonify, render_template_string
import gdown
from huggingface_hub import HfApi, login, upload_folder, hf_hub_url
from huggingface_hub.utils import HfHubHTTPError
# --- Configuration & Initialization ---
# Ensure Hugging Face cache and other temp data writes to /tmp
os.environ["HF_HOME"] = "/tmp/hf_home"
DOWNLOAD_DIR = "/tmp/backups"
EXTRACT_DIR = "/tmp/extracted_backups"
# Environment variables (set these in your Space secrets)
FOLDER_URL = os.getenv("FOLDER_URL")
REPO_ID = os.getenv("REPO_ID")
TOKEN = os.getenv("HF_TOKEN")
# --- Global State Management ---
app_state = {
"backup_status": "idle", # idle, running, success, error
"backup_log": ["Awaiting first run."],
"last_backup_time": "Never",
"next_backup_time": "Scheduler disabled",
"schedule_interval_minutes": 0, # 0 means disabled
"scheduler_thread": None # This will hold the non-serializable Thread object
}
# --- Flask App Setup ---
app = Flask(__name__)
api = HfApi()
# --- HTML, CSS, JS Template ---
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="en" data-bs-theme="dark">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Backup & Dataset Controller</title>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css">
<style>
body {
background-color: #212529; /* Dark background */
}
.log-box {
height: 300px;
overflow-y: auto;
font-family: 'Courier New', Courier, monospace;
font-size: 0.85rem;
color: #f8f9fa;
background-color: #111315 !important;
border-top: 1px solid #495057;
}
.log-box div {
padding: 2px 5px;
border-bottom: 1px solid #343a40;
}
.status-badge {
padding: 0.35em 0.65em;
font-size: .75em;
font-weight: 700;
line-height: 1;
color: #fff;
text-align: center;
white-space: nowrap;
vertical-align: baseline;
border-radius: 0.25rem;
transition: background-color 0.3s ease-in-out;
}
.status-idle { background-color: #6c757d; } /* Gray */
.status-running { background-color: #0d6efd; } /* Blue */
.status-success { background-color: #198754; } /* Green */
.status-error { background-color: #dc3545; } /* Red */
#files-list-container {
max-height: 450px;
overflow-y: auto;
}
.btn i, .btn .spinner-border {
pointer-events: none; /* Make clicks on icons pass through to the button */
}
.card {
border: 1px solid rgba(255, 255, 255, 0.1);
}
</style>
</head>
<body>
<div class="container my-4">
<header class="d-flex align-items-center pb-3 mb-4 border-bottom border-secondary">
<i class="fas fa-server fa-2x me-3 text-info"></i>
<span class="fs-4">Minecraft Backup & Dataset Controller</span>
</header>
<div class="row g-4">
<!-- Left Panel: Backup Controls -->
<div class="col-lg-6">
<div class="card h-100 shadow-sm">
<div class="card-header d-flex justify-content-between align-items-center">
<h5 class="mb-0"><i class="fas fa-shield-alt me-2"></i>Backup Controls</h5>
<div id="backup-status-indicator" class="status-badge" data-bs-toggle="tooltip" title="Current Status">Idle</div>
</div>
<div class="card-body">
<div class="d-grid gap-2 mb-4">
<button id="run-now-btn" class="btn btn-lg btn-success">
<i class="fas fa-play-circle me-2"></i>Run Backup Now
</button>
</div>
<form id="schedule-form" class="row g-2 align-items-center">
<div class="col">
<label for="interval-input" class="form-label">Schedule Interval (minutes)</label>
<input type="number" class="form-control" id="interval-input" placeholder="0 to disable" min="0">
</div>
<div class="col-auto align-self-end">
<button type="submit" class="btn btn-primary">
<i class="fas fa-save me-2"></i>Set
</button>
</div>
</form>
<ul class="list-group list-group-flush mt-4">
<li class="list-group-item d-flex justify-content-between bg-transparent">
<span>Last Backup:</span>
<strong id="last-run-time">Never</strong>
</li>
<li class="list-group-item d-flex justify-content-between bg-transparent">
<span>Next Scheduled:</span>
<strong id="next-run-time">N/A</strong>
</li>
</ul>
</div>
<div class="card-footer">
<strong><i class="fas fa-clipboard-list me-2"></i>Live Log</strong>
</div>
<div id="log-output" class="log-box card-body"></div>
</div>
</div>
<!-- Right Panel: Dataset Management -->
<div class="col-lg-6">
<div class="card h-100 shadow-sm">
<div class="card-header d-flex justify-content-between align-items-center">
<h5 class="mb-0"><i class="fas fa-database me-2"></i>Dataset Management</h5>
<a href="https://huggingface.co/datasets/{{ repo_id }}" target="_blank" class="btn btn-sm btn-outline-info">
View on Hub <i class="fas fa-external-link-alt"></i>
</a>
</div>
<div class="card-body">
<div class="d-flex justify-content-between align-items-center mb-3">
<p class="text-muted mb-0">Files in <strong>{{ repo_id }}</strong></p>
<button id="refresh-files-btn" class="btn btn-sm btn-secondary">
<i class="fas fa-sync-alt me-1"></i> Refresh
</button>
</div>
<div id="files-list-container">
<div id="files-loader" class="text-center p-4" style="display: none;">
<div class="spinner-border text-primary" role="status">
<span class="visually-hidden">Loading...</span>
</div>
</div>
<table class="table table-hover">
<thead>
<tr>
<th>File Path</th>
<th>Size</th>
<th>Actions</th>
</tr>
</thead>
<tbody id="files-list"></tbody>
</table>
</div>
</div>
</div>
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
<script>
document.addEventListener('DOMContentLoaded', () => {
const runNowBtn = document.getElementById('run-now-btn');
const scheduleForm = document.getElementById('schedule-form');
const intervalInput = document.getElementById('interval-input');
const logOutput = document.getElementById('log-output');
const statusIndicator = document.getElementById('backup-status-indicator');
const lastRunTimeEl = document.getElementById('last-run-time');
const nextRunTimeEl = document.getElementById('next-run-time');
const refreshFilesBtn = document.getElementById('refresh-files-btn');
const filesListBody = document.getElementById('files-list');
const filesLoader = document.getElementById('files-loader');
async function apiCall(endpoint, options = {}) {
try {
const response = await fetch(endpoint, options);
if (!response.ok) {
const errorData = await response.json().catch(() => ({ message: `HTTP error! Status: ${response.status}` }));
throw new Error(errorData.message);
}
return response.json();
} catch (error) {
console.error(`API call to ${endpoint} failed:`, error);
alert(`Error: ${error.message}`);
return null;
}
}
function updateLog(logs) {
logOutput.innerHTML = logs.map(line => `<div>${line.replace(/</g, "<").replace(/>/g, ">")}</div>`).join('');
logOutput.scrollTop = logOutput.scrollHeight;
}
function updateStatusUI(data) {
statusIndicator.textContent = data.backup_status.charAt(0).toUpperCase() + data.backup_status.slice(1);
statusIndicator.className = 'status-badge';
statusIndicator.classList.add(`status-${data.backup_status}`);
lastRunTimeEl.textContent = data.last_backup_time;
nextRunTimeEl.textContent = data.next_backup_time;
if (document.activeElement !== intervalInput) {
intervalInput.value = data.schedule_interval_minutes > 0 ? data.schedule_interval_minutes : '';
}
runNowBtn.disabled = data.backup_status === 'running';
if (data.backup_status === 'running') {
runNowBtn.innerHTML = `<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span> Running...`;
} else {
runNowBtn.innerHTML = `<i class="fas fa-play-circle me-2"></i>Run Backup Now`;
}
}
async function fetchStatus() {
const data = await apiCall('/api/status');
if (data) {
updateLog(data.backup_log);
updateStatusUI(data);
}
}
async function runBackup() {
if (runNowBtn.disabled) return;
const data = await apiCall('/api/run-backup', { method: 'POST' });
if (data) fetchStatus();
}
async function setSchedule(event) {
event.preventDefault();
const interval = intervalInput.value;
await apiCall('/api/set-schedule', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ interval: parseInt(interval, 10) || 0 }),
});
fetchStatus();
}
async function listFiles() {
filesLoader.style.display = 'block';
filesListBody.innerHTML = '';
refreshFilesBtn.disabled = true;
const data = await apiCall('/api/list-files');
filesLoader.style.display = 'none';
refreshFilesBtn.disabled = false;
if (data && data.files) {
if (data.files.length === 0) {
filesListBody.innerHTML = '<tr><td colspan="3" class="text-center text-muted">No files found in repository.</td></tr>';
return;
}
data.files.forEach(file => {
const row = document.createElement('tr');
row.innerHTML = `
<td class="text-break">
<a href="${file.url}" target="_blank" title="${file.name}">${file.name}</a>
</td>
<td>${file.size}</td>
<td>
<button class="btn btn-sm btn-outline-danger delete-btn" data-filename="${file.name}" title="Delete File">
<i class="fas fa-trash-alt"></i>
</button>
</td>
`;
filesListBody.appendChild(row);
});
}
}
async function deleteFile(event) {
const button = event.target.closest('.delete-btn');
if (!button) return;
const filename = button.dataset.filename;
if (!confirm(`Are you sure you want to permanently delete "${filename}"?`)) return;
button.disabled = true;
button.innerHTML = `<span class="spinner-border spinner-border-sm"></span>`;
const data = await apiCall('/api/delete-file', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ filename }),
});
if (data) listFiles();
else {
button.disabled = false;
button.innerHTML = `<i class="fas fa-trash-alt"></i>`;
}
}
runNowBtn.addEventListener('click', runBackup);
scheduleForm.addEventListener('submit', setSchedule);
refreshFilesBtn.addEventListener('click', listFiles);
filesListBody.addEventListener('click', deleteFile);
const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
tooltipTriggerList.map(function (tooltipTriggerEl) {
return new bootstrap.Tooltip(tooltipTriggerEl);
});
fetchStatus();
listFiles();
setInterval(fetchStatus, 3000);
});
</script>
</body>
</html>
"""
# --- Core Backup Logic ---
def run_backup_job():
global app_state
app_state["backup_status"] = "running"
app_state["backup_log"] = ["Starting backup process..."]
def log(message):
print(message)
app_state["backup_log"].append(message)
try:
log("Resetting temporary directories...")
shutil.rmtree(DOWNLOAD_DIR, ignore_errors=True)
shutil.rmtree(EXTRACT_DIR, ignore_errors=True)
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
os.makedirs(EXTRACT_DIR, exist_ok=True)
log("Downloading from Google Drive folder...")
gdown.download_folder(url=FOLDER_URL, output=DOWNLOAD_DIR, use_cookies=False, quiet=True)
log("Download finished.")
log("Extracting zip archives...")
extracted_count = 0
for root, _, files in os.walk(DOWNLOAD_DIR):
for f in files:
if f.endswith(".zip"):
zp = os.path.join(root, f)
with zipfile.ZipFile(zp) as z:
z.extractall(EXTRACT_DIR)
log(f"Extracted: {f}")
extracted_count += 1
if extracted_count == 0:
log("Warning: No .zip files found to extract.")
bad_path = os.path.join(EXTRACT_DIR, "world_nither")
good_path = os.path.join(EXTRACT_DIR, "world_nether")
if os.path.exists(bad_path) and not os.path.exists(good_path):
os.rename(bad_path, good_path)
log("Fixed folder name typo: 'world_nither' -> 'world_nether'")
log("Logging into Hugging Face Hub...")
login(token=TOKEN)
log(f"Ensuring dataset repository '{REPO_ID}' exists...")
api.create_repo(repo_id=REPO_ID, repo_type="dataset", private=False, exist_ok=True)
log("Repository is ready.")
subfolders_to_upload = {
"world": os.path.join(EXTRACT_DIR, "world"),
"world_nether": os.path.join(EXTRACT_DIR, "world_nether"),
"world_the_end": os.path.join(EXTRACT_DIR, "world_the_end"),
"plugins": os.path.join(EXTRACT_DIR, "plugins")
}
for name, path in subfolders_to_upload.items():
if os.path.exists(path):
log(f"Uploading '{name}'...")
upload_folder(
repo_id=REPO_ID, folder_path=path, repo_type="dataset",
path_in_repo=name, commit_message=f"Backup update for {name}"
)
log(f"'{name}' uploaded successfully.")
else:
log(f"Skipping '{name}' - directory not found.")
app_state["last_backup_time"] = time.strftime("%Y-%m-%d %H:%M:%S %Z")
log(f"Backup completed successfully at {app_state['last_backup_time']}.")
app_state["backup_status"] = "success"
except Exception as e:
log(f"AN ERROR OCCURRED: {str(e)}")
app_state["backup_status"] = "error"
# --- Scheduler Thread ---
def scheduler_loop():
global app_state
while True:
interval = app_state["schedule_interval_minutes"]
if interval > 0:
if app_state["backup_status"] != "running":
run_backup_job()
next_run_timestamp = time.time() + interval * 60
app_state["next_backup_time"] = time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime(next_run_timestamp))
time.sleep(interval * 60)
else:
app_state["next_backup_time"] = "Scheduler disabled"
time.sleep(5)
# --- Flask Routes (API Endpoints) ---
@app.route("/")
def index():
return render_template_string(HTML_TEMPLATE, repo_id=REPO_ID)
# ===================================================================
# THIS IS THE CORRECTED FUNCTION
# ===================================================================
@app.route("/api/status", methods=["GET"])
def get_status():
"""Provides a JSON-safe snapshot of the application state."""
# Create a copy of the state dictionary that EXCLUDES the non-serializable thread object.
state_for_json = {
key: value
for key, value in app_state.items()
if key != "scheduler_thread"
}
return jsonify(state_for_json)
# ===================================================================
@app.route("/api/run-backup", methods=["POST"])
def start_backup():
if app_state["backup_status"] == "running":
return jsonify({"status": "error", "message": "A backup is already in progress."}), 409
threading.Thread(target=run_backup_job, daemon=True).start()
return jsonify({"status": "ok", "message": "Backup process started."})
@app.route("/api/set-schedule", methods=["POST"])
def set_schedule():
try:
interval = int(request.json.get("interval", 0))
if interval < 0: raise ValueError()
app_state["schedule_interval_minutes"] = interval
if interval > 0:
next_run_timestamp = time.time() + interval * 60
app_state["next_backup_time"] = time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime(next_run_timestamp))
else:
app_state["next_backup_time"] = "Scheduler disabled"
return jsonify({"status": "ok", "message": f"Schedule set to {interval} minutes."})
except (ValueError, TypeError):
return jsonify({"status": "error", "message": "Invalid interval value."}), 400
@app.route("/api/list-files", methods=["GET"])
def list_repo_files():
try:
repo_files = api.list_repo_files(repo_id=REPO_ID, repo_type="dataset")
files_details = []
for filename in repo_files:
try:
info = api.get_repo_file_info(repo_id=REPO_ID, path_in_repo=filename, repo_type="dataset")
size = humanize.naturalsize(info.size) if info.size else "0 B"
except HfHubHTTPError: size = "N/A"
files_details.append({
"name": filename, "size": size,
"url": hf_hub_url(repo_id=REPO_ID, filename=filename, repo_type="dataset")
})
return jsonify({"status": "ok", "files": files_details})
except Exception as e:
return jsonify({"status": "error", "message": str(e)}), 500
@app.route("/api/delete-file", methods=["POST"])
def delete_repo_file():
filename = request.json.get("filename")
if not filename:
return jsonify({"status": "error", "message": "Filename not provided."}), 400
try:
api.delete_file(
repo_id=REPO_ID, path_in_repo=filename, repo_type="dataset",
commit_message=f"Deleted file: {filename}"
)
return jsonify({"status": "ok", "message": f"Successfully deleted '{filename}'."})
except Exception as e:
return jsonify({"status": "error", "message": str(e)}), 500
# --- Main Execution ---
if __name__ == "__main__":
app_state["scheduler_thread"] = threading.Thread(target=scheduler_loop, daemon=True)
app_state["scheduler_thread"].start()
app.run(host="0.0.0.0", port=7860)