Backup & Dataset Controller

import os import shutil import zipfile import threading import time import humanize from flask import Flask, request, jsonify, render_template_string import gdown from huggingface_hub import HfApi, login, upload_folder, hf_hub_url from huggingface_hub.utils import HfHubHTTPError # --- Configuration & Initialization --- # Ensure Hugging Face cache and other temp data writes to /tmp os.environ["HF_HOME"] = "/tmp/hf_home" DOWNLOAD_DIR = "/tmp/backups" EXTRACT_DIR = "/tmp/extracted_backups" # Environment variables (set these in your Space secrets) FOLDER_URL = os.getenv("FOLDER_URL") REPO_ID = os.getenv("REPO_ID") TOKEN = os.getenv("HF_TOKEN") # --- Global State Management --- # Using a dictionary to hold state is thread-safe in CPython for simple reads/writes app_state = { "backup_status": "idle", # idle, running, success, error "backup_log": ["Awaiting first run."], "last_backup_time": "Never", "next_backup_time": "Scheduler disabled", "schedule_interval_minutes": 0, # 0 means disabled "scheduler_thread": None } # --- Flask App Setup --- app = Flask(__name__) api = HfApi() # --- HTML, CSS, JS Template --- HTML_TEMPLATE = """ Backup & Dataset Controller

Minecraft Backup & Dataset Controller

Backup Controls

Idle

Last Backup: Never
Next Scheduled: N/A

Live Log

Dataset Management

View on Hub

Files in {{ repo_id }}

File Path	Size	Actions

""" # --- Core Backup Logic --- def run_backup_job(): """The main backup logic, designed to be run in a background thread.""" global app_state app_state["backup_status"] = "running" app_state["backup_log"] = ["Starting backup process..."] def log(message): print(message) app_state["backup_log"].append(message) try: # 1. Clean up old directories log("Resetting temporary directories...") shutil.rmtree(DOWNLOAD_DIR, ignore_errors=True) shutil.rmtree(EXTRACT_DIR, ignore_errors=True) os.makedirs(DOWNLOAD_DIR, exist_ok=True) os.makedirs(EXTRACT_DIR, exist_ok=True) log("Directories reset.") # 2. Download from Google Drive log(f"Downloading from Google Drive folder...") gdown.download_folder(url=FOLDER_URL, output=DOWNLOAD_DIR, use_cookies=False, quiet=True) log("Download finished.") # 3. Extract downloaded zip files log("Extracting zip archives...") extracted_count = 0 for root, _, files in os.walk(DOWNLOAD_DIR): for f in files: if f.endswith(".zip"): zp = os.path.join(root, f) with zipfile.ZipFile(zp) as z: z.extractall(EXTRACT_DIR) log(f"Extracted: {f}") extracted_count += 1 if extracted_count == 0: log("Warning: No .zip files found to extract.") # 4. Fix potential folder name typo bad_path = os.path.join(EXTRACT_DIR, "world_nither") good_path = os.path.join(EXTRACT_DIR, "world_nether") if os.path.exists(bad_path) and not os.path.exists(good_path): os.rename(bad_path, good_path) log("Fixed folder name typo: 'world_nither' -> 'world_nether'") # 5. Log in to Hugging Face log("Logging into Hugging Face Hub...") login(token=TOKEN) log("Login successful.") # 6. Ensure repository exists log(f"Ensuring dataset repository '{REPO_ID}' exists...") api.create_repo(repo_id=REPO_ID, repo_type="dataset", private=False, exist_ok=True) log("Repository is ready.") # 7. Upload specified subfolders subfolders_to_upload = { "world": os.path.join(EXTRACT_DIR, "world"), "world_nether": os.path.join(EXTRACT_DIR, "world_nether"), "world_the_end": os.path.join(EXTRACT_DIR, "world_the_end"), "plugins": os.path.join(EXTRACT_DIR, "plugins") } for name, path in subfolders_to_upload.items(): if os.path.exists(path): log(f"Uploading '{name}'...") upload_folder( repo_id=REPO_ID, folder_path=path, repo_type="dataset", path_in_repo=name, commit_message=f"Backup update for {name}" ) log(f"'{name}' uploaded successfully.") else: log(f"Skipping '{name}' - directory not found.") app_state["last_backup_time"] = time.strftime("%Y-%m-%d %H:%M:%S %Z") log(f"Backup completed successfully at {app_state['last_backup_time']}.") app_state["backup_status"] = "success" except Exception as e: log(f"AN ERROR OCCURRED: {str(e)}") app_state["backup_status"] = "error" # --- Scheduler Thread --- def scheduler_loop(): """Periodically triggers the backup job based on the set interval.""" global app_state while True: interval = app_state["schedule_interval_minutes"] if interval > 0: if app_state["backup_status"] != "running": print(f"Scheduler triggering backup. Interval: {interval} mins.") run_backup_job() next_run_timestamp = time.time() + interval * 60 app_state["next_backup_time"] = time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime(next_run_timestamp)) time.sleep(interval * 60) else: app_state["next_backup_time"] = "Scheduler disabled" time.sleep(5) # --- Flask Routes (API Endpoints) --- @app.route("/") def index(): """Serves the main HTML page by rendering the template string.""" return render_template_string(HTML_TEMPLATE, repo_id=REPO_ID) @app.route("/api/status", methods=["GET"]) def get_status(): """Provides the current status of the application to the frontend.""" return jsonify(dict(app_state)) @app.route("/api/run-backup", methods=["POST"]) def start_backup(): """Triggers a manual backup run in a background thread.""" if app_state["backup_status"] == "running": return jsonify({"status": "error", "message": "A backup is already in progress."}), 409 threading.Thread(target=run_backup_job, daemon=True).start() return jsonify({"status": "ok", "message": "Backup process started."}) @app.route("/api/set-schedule", methods=["POST"]) def set_schedule(): """Sets the backup interval.""" try: interval = int(request.json.get("interval", 0)) if interval < 0: raise ValueError("Interval must be non-negative.") app_state["schedule_interval_minutes"] = interval if interval > 0: next_run_timestamp = time.time() + interval * 60 app_state["next_backup_time"] = time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime(next_run_timestamp)) else: app_state["next_backup_time"] = "Scheduler disabled" return jsonify({"status": "ok", "message": f"Schedule set to {interval} minutes."}) except (ValueError, TypeError): return jsonify({"status": "error", "message": "Invalid interval value."}), 400 @app.route("/api/list-files", methods=["GET"]) def list_repo_files(): """Lists all files in the dataset repository.""" try: repo_files = api.list_repo_files(repo_id=REPO_ID, repo_type="dataset") files_details = [] for filename in repo_files: try: info = api.get_repo_file_info(repo_id=REPO_ID, path_in_repo=filename, repo_type="dataset") size = humanize.naturalsize(info.size) if info.size else "0 B" except HfHubHTTPError: size = "N/A" files_details.append({ "name": filename, "size": size, "url": hf_hub_url(repo_id=REPO_ID, filename=filename, repo_type="dataset") }) return jsonify({"status": "ok", "files": files_details}) except Exception as e: return jsonify({"status": "error", "message": str(e)}), 500 @app.route("/api/delete-file", methods=["POST"]) def delete_repo_file(): """Deletes a specific file from the dataset repository.""" filename = request.json.get("filename") if not filename: return jsonify({"status": "error", "message": "Filename not provided."}), 400 try: api.delete_file( repo_id=REPO_ID, path_in_repo=filename, repo_type="dataset", commit_message=f"Deleted file: {filename}" ) return jsonify({"status": "ok", "message": f"Successfully deleted '{filename}'."}) except HfHubHTTPError as e: return jsonify({"status": "error", "message": f"File not found or permission error: {e}"}), 404 except Exception as e: return jsonify({"status": "error", "message": str(e)}), 500 # --- Main Execution --- if __name__ == "__main__": # Start the scheduler in a background thread app_state["scheduler_thread"] = threading.Thread(target=scheduler_loop, daemon=True) app_state["scheduler_thread"].start() # Start the Flask web server app.run(host="0.0.0.0", port=7860)