Spaces:
Running
Running
# app.py | |
import os | |
import shutil | |
import zipfile | |
import threading | |
import time | |
import logging | |
from flask import Flask, request, jsonify, render_template_string | |
import gdown | |
from huggingface_hub import HfApi, login | |
from huggingface_hub.utils import HfHubHTTPError | |
# --- CONFIGURATION --- | |
# Ensure Hugging Face cache and other temp files write to the writable /tmp directory | |
os.environ["HF_HOME"] = "/tmp/hf_home" | |
os.environ["GDOWN_CACHE_DIR"] = "/tmp/gdown_cache" | |
# Environment variables (set these in your Space secret settings) | |
FOLDER_URL = os.getenv("FOLDER_URL", "YOUR_GOOGLE_DRIVE_FOLDER_URL_HERE") | |
REPO_ID = os.getenv("REPO_ID", "your-hf-username/your-dataset-name") | |
TOKEN = os.getenv("HF_TOKEN") | |
# Directories in writable /tmp | |
DOWNLOAD_DIR = "/tmp/backups" | |
EXTRACT_DIR = "/tmp/extracted_backups" | |
# --- HTML TEMPLATE WITH EMBEDDED CSS AND JAVASCRIPT --- | |
HTML_TEMPLATE = """ | |
<!DOCTYPE html> | |
<html lang="en" data-theme="dark"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>HF Backup & Manager</title> | |
<!-- Pico.css for a clean, modern look --> | |
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@1/css/pico.min.css"> | |
<!-- Embedded Custom Styles --> | |
<style> | |
:root { | |
--pico-card-background-color: #1e2025; | |
--pico-card-border-color: #33363d; | |
} | |
body { | |
padding: 1rem; | |
} | |
main.container { | |
max-width: 1000px; | |
padding-top: 1rem; | |
} | |
header { | |
text-align: center; | |
margin-bottom: 2rem; | |
} | |
article { | |
padding: 1.5rem; | |
} | |
.grid { | |
grid-template-columns: 1fr; | |
gap: 1.5rem; | |
} | |
@media (min-width: 992px) { | |
.grid { | |
grid-template-columns: 1fr 1fr; | |
} | |
} | |
.log-box { | |
background-color: #111317; | |
border: 1px solid var(--pico-card-border-color); | |
border-radius: var(--pico-border-radius); | |
padding: 1rem; | |
height: 200px; | |
overflow-y: auto; | |
font-family: monospace; | |
font-size: 0.875em; | |
white-space: pre-wrap; | |
word-break: break-all; | |
} | |
#status-text.idle { color: var(--pico-color-green-400); } | |
#status-text.running { color: var(--pico-color-amber-400); } | |
#status-text.error { color: var(--pico-color-red-400); } | |
button { | |
display: flex; | |
align-items: center; | |
justify-content: center; | |
gap: 0.75rem; | |
} | |
.spinner { | |
border: 3px solid rgba(255, 255, 255, 0.2); | |
border-top: 3px solid var(--pico-primary); | |
border-radius: 50%; | |
width: 16px; | |
height: 16px; | |
animation: spin 1s linear infinite; | |
} | |
@keyframes spin { | |
0% { transform: rotate(0deg); } | |
100% { transform: rotate(360deg); } | |
} | |
.file-manager-container { | |
max-height: 400px; | |
overflow-y: auto; | |
margin-top: 1rem; | |
} | |
.file-manager-container table button { | |
margin: 0; | |
padding: 0.25rem 0.5rem; | |
background-color: var(--pico-color-red-600); | |
border-color: var(--pico-color-red-600); | |
} | |
small { | |
display: block; | |
margin-top: -0.5rem; | |
margin-bottom: 1rem; | |
color: var(--pico-secondary-text); | |
} | |
</style> | |
</head> | |
<body> | |
<main class="container"> | |
<header> | |
<hgroup> | |
<h1>Hugging Face Backup & Manager</h1> | |
<p>Automate server backups and manage your dataset on the Hub.</p> | |
</hgroup> | |
</header> | |
<div class="grid"> | |
<article> | |
<hgroup> | |
<h2>Control Panel</h2> | |
<h3>Manage your backup tasks and schedule.</h3> | |
</hgroup> | |
<button id="run-now-btn" onclick="runNow()"> | |
<span id="run-now-spinner" class="spinner" style="display: none;"></span> | |
Run Backup Now | |
</button> | |
<small>Manually trigger a full backup cycle.</small> | |
<form id="schedule-form" onsubmit="setSchedule(event)"> | |
<label for="interval">Automatic Backup Interval (minutes)</label> | |
<input type="number" id="interval" name="interval" placeholder="0" min="0"> | |
<small>Set to 0 to disable automatic backups.</small> | |
<button type="submit">Set Schedule</button> | |
</form> | |
</article> | |
<article> | |
<hgroup> | |
<h2>Live Status</h2> | |
<h3 id="status-text">Status: Fetching...</h3> | |
</hgroup> | |
<p><strong>Last Successful Backup:</strong> <span id="last-backup-time">Never</span></p> | |
<p><strong>Current Schedule:</strong> Every <span id="current-schedule">...</span> minutes</p> | |
<strong>Logs:</strong> | |
<pre id="logs" class="log-box"></pre> | |
</article> | |
</div> | |
<article> | |
<hgroup> | |
<h2>Dataset File Manager</h2> | |
<h3>Manage files in your repository: <a href="https://huggingface.co/datasets/{{ repo_id }}" target="_blank">{{ repo_id }}</a></h3> | |
</hgroup> | |
<button id="refresh-files-btn" onclick="fetchRepoFiles()" aria-busy="false">Refresh File List</button> | |
<div id="file-manager" class="file-manager-container"> | |
<p>Loading files...</p> | |
</div> | |
</article> | |
</main> | |
<!-- Embedded JavaScript --> | |
<script> | |
const runNowBtn = document.getElementById('run-now-btn'); | |
const runNowSpinner = document.getElementById('run-now-spinner'); | |
const statusText = document.getElementById('status-text'); | |
const lastBackupTime = document.getElementById('last-backup-time'); | |
const currentSchedule = document.getElementById('current-schedule'); | |
const scheduleInput = document.getElementById('interval'); | |
const logsBox = document.getElementById('logs'); | |
const fileManagerDiv = document.getElementById('file-manager'); | |
const refreshFilesBtn = document.getElementById('refresh-files-btn'); | |
async function fetchAPI(url, options = {}) { | |
try { | |
const response = await fetch(url, options); | |
if (!response.ok) { | |
const errorData = await response.json(); | |
throw new Error(errorData.error || `HTTP error! status: ${response.status}`); | |
} | |
return await response.json(); | |
} catch (error) { | |
console.error(`API Error on ${url}:`, error); | |
throw error; | |
} | |
} | |
async function fetchStatus() { | |
try { | |
const data = await fetchAPI('/api/status'); | |
updateStatusUI(data); | |
} catch (error) { | |
statusText.textContent = "Status: Connection Error"; | |
statusText.className = "error"; | |
} | |
} | |
async function runNow() { | |
if (runNowBtn.disabled) return; | |
try { | |
await fetchAPI('/api/start-backup', { method: 'POST' }); | |
} catch (error) { | |
alert(`Failed to start backup: ${error.message}`); | |
} | |
} | |
async function setSchedule(event) { | |
event.preventDefault(); | |
const interval = parseInt(scheduleInput.value, 10); | |
if (isNaN(interval) || interval < 0) { | |
alert("Please enter a valid non-negative number for the interval."); | |
return; | |
} | |
try { | |
await fetchAPI('/api/set-schedule', { | |
method: 'POST', | |
headers: { 'Content-Type': 'application/json' }, | |
body: JSON.stringify({ interval }) | |
}); | |
} catch (error) { | |
alert(`Error setting schedule: ${error.message}`); | |
} | |
} | |
async function fetchRepoFiles() { | |
refreshFilesBtn.setAttribute('aria-busy', 'true'); | |
try { | |
const data = await fetchAPI('/api/repo-files'); | |
renderFileManager(data.files); | |
} catch (error) { | |
fileManagerDiv.innerHTML = `<p style="color: var(--pico-color-red-500);">Error loading files: ${error.message}</p>`; | |
} finally { | |
refreshFilesBtn.setAttribute('aria-busy', 'false'); | |
} | |
} | |
async function deleteFile(path) { | |
if (!confirm(`Are you sure you want to permanently delete "${path}"? This cannot be undone.`)) return; | |
try { | |
await fetchAPI('/api/delete-file', { | |
method: 'POST', | |
headers: { 'Content-Type': 'application/json' }, | |
body: JSON.stringify({ path }) | |
}); | |
await fetchRepoFiles(); | |
} catch (error) { | |
alert(`Error deleting file: ${error.message}`); | |
} | |
} | |
function updateStatusUI(data) { | |
statusText.textContent = `Status: ${data.status}`; | |
statusText.className = data.status.toLowerCase(); | |
const isRunning = data.status === 'Running'; | |
runNowBtn.disabled = isRunning; | |
runNowSpinner.style.display = isRunning ? 'inline-block' : 'none'; | |
lastBackupTime.textContent = data.last_backup_time; | |
const interval = data.schedule_interval; | |
currentSchedule.textContent = interval > 0 ? `${interval}` : '0 (disabled)'; | |
if (document.activeElement !== scheduleInput) { | |
scheduleInput.value = interval > 0 ? interval : ''; | |
} | |
const newLogs = data.logs.join('\\n'); | |
if (logsBox.textContent !== newLogs) { | |
logsBox.textContent = newLogs; | |
logsBox.scrollTop = logsBox.scrollHeight; | |
} | |
} | |
function renderFileManager(files) { | |
if (!files || files.length === 0) { | |
fileManagerDiv.innerHTML = "<p>No files found in the repository.</p>"; | |
return; | |
} | |
let html = '<table><thead><tr><th>File Path</th><th style="text-align: right;">Action</th></tr></thead><tbody>'; | |
files.forEach(file => { | |
html += ` | |
<tr> | |
<td>${file}</td> | |
<td style="text-align: right;"><button class="outline secondary" onclick="deleteFile('${file}')">Delete</button></td> | |
</tr> | |
`; | |
}); | |
html += '</tbody></table>'; | |
fileManagerDiv.innerHTML = html; | |
} | |
document.addEventListener('DOMContentLoaded', () => { | |
fetchStatus(); | |
fetchRepoFiles(); | |
setInterval(fetchStatus, 3000); | |
}); | |
</script> | |
</body> | |
</html> | |
""" | |
# --- FLASK APP & STATE MANAGEMENT --- | |
app = Flask(__name__) | |
logging.basicConfig(level=logging.INFO) | |
app_state = { | |
"status": "Idle", # Idle, Running, Error | |
"logs": [], | |
"last_backup_time": "Never", | |
"schedule_interval": 0, # in minutes | |
"scheduler_thread": None, | |
"lock": threading.Lock(), | |
} | |
# --- HUGGING FACE HELPER CLASS --- | |
class HFManager: | |
def __init__(self, token, repo_id, repo_type="dataset"): | |
if not token: | |
raise ValueError("Hugging Face token (HF_TOKEN) is not set.") | |
self.token = token | |
self.repo_id = repo_id | |
self.repo_type = repo_type | |
self.api = HfApi() | |
login(token=self.token) | |
def ensure_repo_exists(self): | |
self.api.create_repo(repo_id=self.repo_id, repo_type=self.repo_type, exist_ok=True, token=self.token) | |
def list_files(self): | |
try: | |
return sorted(self.api.list_repo_files(repo_id=self.repo_id, repo_type=self.repo_type, token=self.token)) | |
except HfHubHTTPError as e: | |
if e.response.status_code == 404: return [] | |
raise e | |
def delete_file(self, path_in_repo): | |
self.api.delete_file(path_in_repo, repo_id=self.repo_id, repo_type=self.repo_type, token=self.token, commit_message=f"Delete file: {path_in_repo}") | |
def upload(self, folder_path, path_in_repo, commit_message): | |
self.api.upload_folder(repo_id=self.repo_id, folder_path=folder_path, repo_type=self.repo_type, token=self.token, path_in_repo=path_in_repo, commit_message=commit_message) | |
# --- BACKUP LOGIC --- | |
def run_backup_job(): | |
with app_state['lock']: | |
if app_state["status"] == "Running": | |
app_state['logs'].append("Backup is already in progress. Skipping scheduled run.") | |
return | |
app_state["status"] = "Running" | |
app_state["logs"] = ["Starting backup process..."] | |
log_entry = lambda msg: app_state['logs'].append(f"[{time.strftime('%H:%M:%S')}] {msg}") | |
try: | |
if not FOLDER_URL or "YOUR_GOOGLE_DRIVE" in FOLDER_URL: | |
raise ValueError("FOLDER_URL is not set. Please set it in your Space secrets.") | |
if not TOKEN: | |
raise ValueError("HF_TOKEN is not set. Please set it in your Space secrets.") | |
log_entry("Cleaning up temporary directories...") | |
shutil.rmtree(DOWNLOAD_DIR, ignore_errors=True) | |
shutil.rmtree(EXTRACT_DIR, ignore_errors=True) | |
os.makedirs(EXTRACT_DIR, exist_ok=True) | |
log_entry(f"Downloading from Google Drive...") | |
gdown.download_folder(url=FOLDER_URL, output=DOWNLOAD_DIR, use_cookies=False, quiet=True) | |
log_entry("Download finished.") | |
extracted_files = False | |
for root, _, files in os.walk(DOWNLOAD_DIR): | |
for f in files: | |
if f.endswith(".zip"): | |
zip_path = os.path.join(root, f) | |
with zipfile.ZipFile(zip_path, 'r') as z: | |
z.extractall(EXTRACT_DIR) | |
log_entry(f"Extracted: {f}") | |
extracted_files = True | |
if not extracted_files: | |
log_entry("Warning: No .zip files found to extract.") | |
bad_path, good_path = os.path.join(EXTRACT_DIR, "world_nither"), os.path.join(EXTRACT_DIR, "world_nether") | |
if os.path.exists(bad_path): | |
os.rename(bad_path, good_path) | |
log_entry("Fixed 'world_nither' typo to 'world_nether'.") | |
hf_manager = HFManager(TOKEN, REPO_ID) | |
hf_manager.ensure_repo_exists() | |
log_entry(f"Repo ready: {REPO_ID}") | |
for name in ["world", "world_nether", "world_the_end", "plugins"]: | |
local_path = os.path.join(EXTRACT_DIR, name) | |
if os.path.exists(local_path): | |
log_entry(f"Uploading '{name}'...") | |
hf_manager.upload(local_path, name, f"Backup update for {name}") | |
log_entry(f"Successfully uploaded '{name}'.") | |
else: | |
log_entry(f"Source folder '{name}' not found, skipping.") | |
with app_state['lock']: | |
app_state["last_backup_time"] = time.strftime('%Y-%m-%d %H:%M:%S %Z') | |
log_entry(f"Backup complete!") | |
app_state["status"] = "Idle" | |
except Exception as e: | |
error_message = f"An error occurred: {str(e)}" | |
logging.error(error_message, exc_info=True) | |
with app_state['lock']: | |
app_state["logs"].append(f"ERROR: {error_message}") | |
app_state["status"] = "Error" | |
# --- SCHEDULER THREAD --- | |
def scheduler_loop(): | |
while True: | |
with app_state['lock']: | |
interval_minutes = app_state['schedule_interval'] | |
if interval_minutes > 0: | |
next_run_time = time.time() + interval_minutes * 60 | |
run_backup_job() | |
sleep_duration = next_run_time - time.time() | |
if sleep_duration > 0: | |
time.sleep(sleep_duration) | |
else: | |
time.sleep(15) | |
# --- FLASK ROUTES --- | |
def index(): | |
return render_template_string(HTML_TEMPLATE, repo_id=REPO_ID) | |
def status(): | |
# THE ONLY CHANGE IS IN THIS FUNCTION | |
with app_state['lock']: | |
# Create a new dictionary with only the JSON-serializable items. | |
# This prevents the "TypeError: Object of type lock is not JSON serializable" | |
serializable_state = { | |
"status": app_state["status"], | |
"logs": app_state["logs"], | |
"last_backup_time": app_state["last_backup_time"], | |
"schedule_interval": app_state["schedule_interval"], | |
} | |
return jsonify(serializable_state) | |
def start_backup(): | |
threading.Thread(target=run_backup_job).start() | |
return jsonify({"message": "Backup process initiated."}) | |
def set_schedule(): | |
try: | |
interval = int(request.json.get("interval", 0)) | |
if interval < 0: raise ValueError("Interval cannot be negative.") | |
with app_state['lock']: | |
app_state['schedule_interval'] = interval | |
return jsonify({"message": f"Schedule updated to {interval} minutes."}) | |
except (ValueError, TypeError): | |
return jsonify({"error": "Invalid interval. Please provide a non-negative integer."}), 400 | |
def get_repo_files(): | |
try: | |
hf_manager = HFManager(TOKEN, REPO_ID) | |
return jsonify({"files": hf_manager.list_files()}) | |
except Exception as e: | |
return jsonify({"error": str(e)}), 500 | |
def delete_repo_file(): | |
path = request.json.get("path") | |
if not path: | |
return jsonify({"error": "File path not provided."}), 400 | |
try: | |
hf_manager = HFManager(TOKEN, REPO_ID) | |
hf_manager.delete_file(path) | |
return jsonify({"message": f"Successfully deleted {path}"}) | |
except Exception as e: | |
return jsonify({"error": str(e)}), 500 | |
# --- MAIN EXECUTION --- | |
if __name__ == "__main__": | |
app_state["scheduler_thread"] = threading.Thread(target=scheduler_loop, daemon=True) | |
app_state["scheduler_thread"].start() | |
app.run(host="0.0.0.0", port=7860) |