Spaces:
Running
Running
import os | |
import shutil | |
import zipfile | |
import threading | |
import time | |
import humanize | |
from flask import Flask, request, jsonify, render_template_string | |
import gdown | |
from huggingface_hub import HfApi, login, upload_folder, hf_hub_url | |
from huggingface_hub.utils import HfHubHTTPError | |
# --- Configuration & Initialization --- | |
# Ensure Hugging Face cache and other temp data writes to /tmp | |
os.environ["HF_HOME"] = "/tmp/hf_home" | |
DOWNLOAD_DIR = "/tmp/backups" | |
EXTRACT_DIR = "/tmp/extracted_backups" | |
# Environment variables (set these in your Space secrets) | |
FOLDER_URL = os.getenv("FOLDER_URL") | |
REPO_ID = os.getenv("REPO_ID") | |
TOKEN = os.getenv("HF_TOKEN") | |
# --- Global State Management --- | |
# Using a dictionary to hold state is thread-safe in CPython for simple reads/writes | |
app_state = { | |
"backup_status": "idle", # idle, running, success, error | |
"backup_log": ["Awaiting first run."], | |
"last_backup_time": "Never", | |
"next_backup_time": "Scheduler disabled", | |
"schedule_interval_minutes": 0, # 0 means disabled | |
"scheduler_thread": None | |
} | |
# --- Flask App Setup --- | |
app = Flask(__name__) | |
api = HfApi() | |
# --- HTML, CSS, JS Template --- | |
HTML_TEMPLATE = """ | |
<!DOCTYPE html> | |
<html lang="en" data-bs-theme="dark"> | |
<head> | |
<meta charset="utf-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1"> | |
<title>Backup & Dataset Controller</title> | |
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet"> | |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css"> | |
<style> | |
body { | |
background-color: #212529; /* Dark background */ | |
} | |
.log-box { | |
height: 300px; | |
overflow-y: auto; | |
font-family: 'Courier New', Courier, monospace; | |
font-size: 0.85rem; | |
color: #f8f9fa; | |
background-color: #111315 !important; | |
border-top: 1px solid #495057; | |
} | |
.log-box div { | |
padding: 2px 5px; | |
border-bottom: 1px solid #343a40; | |
} | |
.status-badge { | |
padding: 0.35em 0.65em; | |
font-size: .75em; | |
font-weight: 700; | |
line-height: 1; | |
color: #fff; | |
text-align: center; | |
white-space: nowrap; | |
vertical-align: baseline; | |
border-radius: 0.25rem; | |
transition: background-color 0.3s ease-in-out; | |
} | |
.status-idle { background-color: #6c757d; } /* Gray */ | |
.status-running { background-color: #0d6efd; } /* Blue */ | |
.status-success { background-color: #198754; } /* Green */ | |
.status-error { background-color: #dc3545; } /* Red */ | |
#files-list-container { | |
max-height: 450px; | |
overflow-y: auto; | |
} | |
.btn i, .btn .spinner-border { | |
pointer-events: none; /* Make clicks on icons pass through to the button */ | |
} | |
.card { | |
border: 1px solid rgba(255, 255, 255, 0.1); | |
} | |
</style> | |
</head> | |
<body> | |
<div class="container my-4"> | |
<header class="d-flex align-items-center pb-3 mb-4 border-bottom border-secondary"> | |
<i class="fas fa-server fa-2x me-3 text-info"></i> | |
<span class="fs-4">Minecraft Backup & Dataset Controller</span> | |
</header> | |
<div class="row g-4"> | |
<!-- Left Panel: Backup Controls --> | |
<div class="col-lg-6"> | |
<div class="card h-100 shadow-sm"> | |
<div class="card-header d-flex justify-content-between align-items-center"> | |
<h5 class="mb-0"><i class="fas fa-shield-alt me-2"></i>Backup Controls</h5> | |
<div id="backup-status-indicator" class="status-badge" data-bs-toggle="tooltip" title="Current Status">Idle</div> | |
</div> | |
<div class="card-body"> | |
<div class="d-grid gap-2 mb-4"> | |
<button id="run-now-btn" class="btn btn-lg btn-success"> | |
<i class="fas fa-play-circle me-2"></i>Run Backup Now | |
</button> | |
</div> | |
<form id="schedule-form" class="row g-2 align-items-center"> | |
<div class="col"> | |
<label for="interval-input" class="form-label">Schedule Interval (minutes)</label> | |
<input type="number" class="form-control" id="interval-input" placeholder="0 to disable" min="0"> | |
</div> | |
<div class="col-auto align-self-end"> | |
<button type="submit" class="btn btn-primary"> | |
<i class="fas fa-save me-2"></i>Set | |
</button> | |
</div> | |
</form> | |
<ul class="list-group list-group-flush mt-4"> | |
<li class="list-group-item d-flex justify-content-between bg-transparent"> | |
<span>Last Backup:</span> | |
<strong id="last-run-time">Never</strong> | |
</li> | |
<li class="list-group-item d-flex justify-content-between bg-transparent"> | |
<span>Next Scheduled:</span> | |
<strong id="next-run-time">N/A</strong> | |
</li> | |
</ul> | |
</div> | |
<div class="card-footer"> | |
<strong><i class="fas fa-clipboard-list me-2"></i>Live Log</strong> | |
</div> | |
<div id="log-output" class="log-box card-body"> | |
<!-- Logs will be populated here by JavaScript --> | |
</div> | |
</div> | |
</div> | |
<!-- Right Panel: Dataset Management --> | |
<div class="col-lg-6"> | |
<div class="card h-100 shadow-sm"> | |
<div class="card-header d-flex justify-content-between align-items-center"> | |
<h5 class="mb-0"><i class="fas fa-database me-2"></i>Dataset Management</h5> | |
<a href="https://huggingface.co/datasets/{{ repo_id }}" target="_blank" class="btn btn-sm btn-outline-info"> | |
View on Hub <i class="fas fa-external-link-alt"></i> | |
</a> | |
</div> | |
<div class="card-body"> | |
<div class="d-flex justify-content-between align-items-center mb-3"> | |
<p class="text-muted mb-0">Files in <strong>{{ repo_id }}</strong></p> | |
<button id="refresh-files-btn" class="btn btn-sm btn-secondary"> | |
<i class="fas fa-sync-alt me-1"></i> Refresh | |
</button> | |
</div> | |
<div id="files-list-container"> | |
<div id="files-loader" class="text-center p-4" style="display: none;"> | |
<div class="spinner-border text-primary" role="status"> | |
<span class="visually-hidden">Loading...</span> | |
</div> | |
</div> | |
<table class="table table-hover"> | |
<thead> | |
<tr> | |
<th>File Path</th> | |
<th>Size</th> | |
<th>Actions</th> | |
</tr> | |
</thead> | |
<tbody id="files-list"> | |
<!-- File list will be populated here --> | |
</tbody> | |
</table> | |
</div> | |
</div> | |
</div> | |
</div> | |
</div> | |
</div> | |
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script> | |
<script> | |
document.addEventListener('DOMContentLoaded', () => { | |
// --- DOM Elements --- | |
const runNowBtn = document.getElementById('run-now-btn'); | |
const scheduleForm = document.getElementById('schedule-form'); | |
const intervalInput = document.getElementById('interval-input'); | |
const logOutput = document.getElementById('log-output'); | |
const statusIndicator = document.getElementById('backup-status-indicator'); | |
const lastRunTimeEl = document.getElementById('last-run-time'); | |
const nextRunTimeEl = document.getElementById('next-run-time'); | |
const refreshFilesBtn = document.getElementById('refresh-files-btn'); | |
const filesListBody = document.getElementById('files-list'); | |
const filesLoader = document.getElementById('files-loader'); | |
let statusInterval; | |
// --- API Helper --- | |
async function apiCall(endpoint, options = {}) { | |
try { | |
const response = await fetch(endpoint, options); | |
if (!response.ok) { | |
const errorData = await response.json(); | |
throw new Error(errorData.message || `HTTP error! status: ${response.status}`); | |
} | |
return response.json(); | |
} catch (error) { | |
console.error(`API call to ${endpoint} failed:`, error); | |
alert(`Error: ${error.message}`); | |
return null; | |
} | |
} | |
// --- UI Update Functions --- | |
function updateLog(logs) { | |
logOutput.innerHTML = logs.map(line => `<div>${line}</div>`).join(''); | |
logOutput.scrollTop = logOutput.scrollHeight; | |
} | |
function updateStatusUI(data) { | |
// Status Badge | |
statusIndicator.textContent = data.backup_status.charAt(0).toUpperCase() + data.backup_status.slice(1); | |
statusIndicator.className = 'status-badge'; // Reset classes | |
statusIndicator.classList.add(`status-${data.backup_status}`); | |
// Timestamps | |
lastRunTimeEl.textContent = data.last_backup_time; | |
nextRunTimeEl.textContent = data.next_backup_time; | |
// Interval Input | |
if (document.activeElement !== intervalInput) { | |
intervalInput.value = data.schedule_interval_minutes > 0 ? data.schedule_interval_minutes : ''; | |
} | |
// Button state | |
runNowBtn.disabled = data.backup_status === 'running'; | |
if (data.backup_status === 'running') { | |
runNowBtn.innerHTML = `<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span> Running...`; | |
} else { | |
runNowBtn.innerHTML = `<i class="fas fa-play-circle me-2"></i>Run Backup Now`; | |
} | |
} | |
// --- Core Functions --- | |
async function fetchStatus() { | |
const data = await apiCall('/api/status'); | |
if (data) { | |
updateLog(data.backup_log); | |
updateStatusUI(data); | |
} | |
} | |
async function runBackup() { | |
if (runNowBtn.disabled) return; | |
const data = await apiCall('/api/run-backup', { method: 'POST' }); | |
if (data) { | |
fetchStatus(); // Immediately update status | |
} | |
} | |
async function setSchedule(event) { | |
event.preventDefault(); | |
const interval = intervalInput.value; | |
await apiCall('/api/set-schedule', { | |
method: 'POST', | |
headers: { 'Content-Type': 'application/json' }, | |
body: JSON.stringify({ interval: parseInt(interval, 10) || 0 }), | |
}); | |
fetchStatus(); | |
} | |
async function listFiles() { | |
filesLoader.style.display = 'block'; | |
filesListBody.innerHTML = ''; | |
refreshFilesBtn.disabled = true; | |
const data = await apiCall('/api/list-files'); | |
filesLoader.style.display = 'none'; | |
refreshFilesBtn.disabled = false; | |
if (data && data.files) { | |
if (data.files.length === 0) { | |
filesListBody.innerHTML = '<tr><td colspan="3" class="text-center text-muted">No files found in repository.</td></tr>'; | |
return; | |
} | |
data.files.forEach(file => { | |
const row = document.createElement('tr'); | |
row.innerHTML = ` | |
<td class="text-break"> | |
<a href="${file.url}" target="_blank" title="${file.name}"> | |
${file.name} | |
</a> | |
</td> | |
<td>${file.size}</td> | |
<td> | |
<button class="btn btn-sm btn-outline-danger delete-btn" data-filename="${file.name}" title="Delete File"> | |
<i class="fas fa-trash-alt"></i> | |
</button> | |
</td> | |
`; | |
filesListBody.appendChild(row); | |
}); | |
} | |
} | |
async function deleteFile(event) { | |
const button = event.target.closest('.delete-btn'); | |
if (!button) return; | |
const filename = button.dataset.filename; | |
if (!confirm(`Are you sure you want to permanently delete "${filename}"?`)) { | |
return; | |
} | |
button.disabled = true; | |
button.innerHTML = `<span class="spinner-border spinner-border-sm"></span>`; | |
const data = await apiCall('/api/delete-file', { | |
method: 'POST', | |
headers: { 'Content-Type': 'application/json' }, | |
body: JSON.stringify({ filename }), | |
}); | |
if (data) { | |
listFiles(); | |
} else { | |
button.disabled = false; | |
button.innerHTML = `<i class="fas fa-trash-alt"></i>`; | |
} | |
} | |
// --- Event Listeners & Initializers --- | |
runNowBtn.addEventListener('click', runBackup); | |
scheduleForm.addEventListener('submit', setSchedule); | |
refreshFilesBtn.addEventListener('click', listFiles); | |
filesListBody.addEventListener('click', deleteFile); | |
// Initialize tooltips | |
const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]')); | |
tooltipTriggerList.map(function (tooltipTriggerEl) { | |
return new bootstrap.Tooltip(tooltipTriggerEl); | |
}); | |
// Initial data load and periodic polling | |
fetchStatus(); | |
listFiles(); | |
statusInterval = setInterval(fetchStatus, 3000); // Poll for status every 3 seconds | |
}); | |
</script> | |
</body> | |
</html> | |
""" | |
# --- Core Backup Logic --- | |
def run_backup_job(): | |
"""The main backup logic, designed to be run in a background thread.""" | |
global app_state | |
app_state["backup_status"] = "running" | |
app_state["backup_log"] = ["Starting backup process..."] | |
def log(message): | |
print(message) | |
app_state["backup_log"].append(message) | |
try: | |
# 1. Clean up old directories | |
log("Resetting temporary directories...") | |
shutil.rmtree(DOWNLOAD_DIR, ignore_errors=True) | |
shutil.rmtree(EXTRACT_DIR, ignore_errors=True) | |
os.makedirs(DOWNLOAD_DIR, exist_ok=True) | |
os.makedirs(EXTRACT_DIR, exist_ok=True) | |
log("Directories reset.") | |
# 2. Download from Google Drive | |
log(f"Downloading from Google Drive folder...") | |
gdown.download_folder(url=FOLDER_URL, output=DOWNLOAD_DIR, use_cookies=False, quiet=True) | |
log("Download finished.") | |
# 3. Extract downloaded zip files | |
log("Extracting zip archives...") | |
extracted_count = 0 | |
for root, _, files in os.walk(DOWNLOAD_DIR): | |
for f in files: | |
if f.endswith(".zip"): | |
zp = os.path.join(root, f) | |
with zipfile.ZipFile(zp) as z: | |
z.extractall(EXTRACT_DIR) | |
log(f"Extracted: {f}") | |
extracted_count += 1 | |
if extracted_count == 0: | |
log("Warning: No .zip files found to extract.") | |
# 4. Fix potential folder name typo | |
bad_path = os.path.join(EXTRACT_DIR, "world_nither") | |
good_path = os.path.join(EXTRACT_DIR, "world_nether") | |
if os.path.exists(bad_path) and not os.path.exists(good_path): | |
os.rename(bad_path, good_path) | |
log("Fixed folder name typo: 'world_nither' -> 'world_nether'") | |
# 5. Log in to Hugging Face | |
log("Logging into Hugging Face Hub...") | |
login(token=TOKEN) | |
log("Login successful.") | |
# 6. Ensure repository exists | |
log(f"Ensuring dataset repository '{REPO_ID}' exists...") | |
api.create_repo(repo_id=REPO_ID, repo_type="dataset", private=False, exist_ok=True) | |
log("Repository is ready.") | |
# 7. Upload specified subfolders | |
subfolders_to_upload = { | |
"world": os.path.join(EXTRACT_DIR, "world"), | |
"world_nether": os.path.join(EXTRACT_DIR, "world_nether"), | |
"world_the_end": os.path.join(EXTRACT_DIR, "world_the_end"), | |
"plugins": os.path.join(EXTRACT_DIR, "plugins") | |
} | |
for name, path in subfolders_to_upload.items(): | |
if os.path.exists(path): | |
log(f"Uploading '{name}'...") | |
upload_folder( | |
repo_id=REPO_ID, | |
folder_path=path, | |
repo_type="dataset", | |
path_in_repo=name, | |
commit_message=f"Backup update for {name}" | |
) | |
log(f"'{name}' uploaded successfully.") | |
else: | |
log(f"Skipping '{name}' - directory not found.") | |
app_state["last_backup_time"] = time.strftime("%Y-%m-%d %H:%M:%S %Z") | |
log(f"Backup completed successfully at {app_state['last_backup_time']}.") | |
app_state["backup_status"] = "success" | |
except Exception as e: | |
log(f"AN ERROR OCCURRED: {str(e)}") | |
app_state["backup_status"] = "error" | |
# --- Scheduler Thread --- | |
def scheduler_loop(): | |
"""Periodically triggers the backup job based on the set interval.""" | |
global app_state | |
while True: | |
interval = app_state["schedule_interval_minutes"] | |
if interval > 0: | |
if app_state["backup_status"] != "running": | |
print(f"Scheduler triggering backup. Interval: {interval} mins.") | |
run_backup_job() | |
next_run_timestamp = time.time() + interval * 60 | |
app_state["next_backup_time"] = time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime(next_run_timestamp)) | |
time.sleep(interval * 60) | |
else: | |
app_state["next_backup_time"] = "Scheduler disabled" | |
time.sleep(5) | |
# --- Flask Routes (API Endpoints) --- | |
def index(): | |
"""Serves the main HTML page by rendering the template string.""" | |
return render_template_string(HTML_TEMPLATE, repo_id=REPO_ID) | |
def get_status(): | |
"""Provides the current status of the application to the frontend.""" | |
return jsonify(dict(app_state)) | |
def start_backup(): | |
"""Triggers a manual backup run in a background thread.""" | |
if app_state["backup_status"] == "running": | |
return jsonify({"status": "error", "message": "A backup is already in progress."}), 409 | |
threading.Thread(target=run_backup_job, daemon=True).start() | |
return jsonify({"status": "ok", "message": "Backup process started."}) | |
def set_schedule(): | |
"""Sets the backup interval.""" | |
try: | |
interval = int(request.json.get("interval", 0)) | |
if interval < 0: | |
raise ValueError("Interval must be non-negative.") | |
app_state["schedule_interval_minutes"] = interval | |
if interval > 0: | |
next_run_timestamp = time.time() + interval * 60 | |
app_state["next_backup_time"] = time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime(next_run_timestamp)) | |
else: | |
app_state["next_backup_time"] = "Scheduler disabled" | |
return jsonify({"status": "ok", "message": f"Schedule set to {interval} minutes."}) | |
except (ValueError, TypeError): | |
return jsonify({"status": "error", "message": "Invalid interval value."}), 400 | |
def list_repo_files(): | |
"""Lists all files in the dataset repository.""" | |
try: | |
repo_files = api.list_repo_files(repo_id=REPO_ID, repo_type="dataset") | |
files_details = [] | |
for filename in repo_files: | |
try: | |
info = api.get_repo_file_info(repo_id=REPO_ID, path_in_repo=filename, repo_type="dataset") | |
size = humanize.naturalsize(info.size) if info.size else "0 B" | |
except HfHubHTTPError: | |
size = "N/A" | |
files_details.append({ | |
"name": filename, | |
"size": size, | |
"url": hf_hub_url(repo_id=REPO_ID, filename=filename, repo_type="dataset") | |
}) | |
return jsonify({"status": "ok", "files": files_details}) | |
except Exception as e: | |
return jsonify({"status": "error", "message": str(e)}), 500 | |
def delete_repo_file(): | |
"""Deletes a specific file from the dataset repository.""" | |
filename = request.json.get("filename") | |
if not filename: | |
return jsonify({"status": "error", "message": "Filename not provided."}), 400 | |
try: | |
api.delete_file( | |
repo_id=REPO_ID, | |
path_in_repo=filename, | |
repo_type="dataset", | |
commit_message=f"Deleted file: {filename}" | |
) | |
return jsonify({"status": "ok", "message": f"Successfully deleted '{filename}'."}) | |
except HfHubHTTPError as e: | |
return jsonify({"status": "error", "message": f"File not found or permission error: {e}"}), 404 | |
except Exception as e: | |
return jsonify({"status": "error", "message": str(e)}), 500 | |
# --- Main Execution --- | |
if __name__ == "__main__": | |
# Start the scheduler in a background thread | |
app_state["scheduler_thread"] = threading.Thread(target=scheduler_loop, daemon=True) | |
app_state["scheduler_thread"].start() | |
# Start the Flask web server | |
app.run(host="0.0.0.0", port=7860) |