Spaces:
Running
Running
import os | |
import shutil | |
import zipfile | |
import threading | |
import time | |
from flask import Flask, request, render_template_string, redirect, url_for | |
import gdown | |
from huggingface_hub import HfApi, login, upload_folder, list_repo_files | |
# Ensure Hugging Face cache writes to tmp | |
os.environ["HF_HOME"] = "/tmp/hf_home" | |
# Environment variables | |
FOLDER_URL = os.getenv("FOLDER_URL") | |
REPO_ID = os.getenv("REPO_ID") | |
TOKEN = os.getenv("HF_TOKEN") | |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") # for future Drive API use | |
# Directories | |
DOWNLOAD_DIR = "/tmp/backups" | |
EXTRACT_DIR = "/tmp/extracted_backups" | |
# Application state | |
last_backup_time = "Never" | |
schedule_interval = 0 | |
app = Flask(__name__) | |
def run_backup(): | |
global last_backup_time | |
log = [] | |
try: | |
log.append("Starting backup process") | |
shutil.rmtree(DOWNLOAD_DIR, ignore_errors=True) | |
shutil.rmtree(EXTRACT_DIR, ignore_errors=True) | |
os.makedirs(DOWNLOAD_DIR, exist_ok=True) | |
os.makedirs(EXTRACT_DIR, exist_ok=True) | |
log.append(f"Downloading from {FOLDER_URL}") | |
gdown.download_folder(url=FOLDER_URL, output=DOWNLOAD_DIR, use_cookies=False, quiet=True) | |
log.append("Download complete") | |
for root, _, files in os.walk(DOWNLOAD_DIR): | |
for name in files: | |
if name.endswith(".zip"): | |
path = os.path.join(root, name) | |
with zipfile.ZipFile(path) as z: | |
z.extractall(EXTRACT_DIR) | |
log.append(f"Extracted {name}") | |
bad = os.path.join(EXTRACT_DIR, "world_nither") | |
good = os.path.join(EXTRACT_DIR, "world_nether") | |
if os.path.exists(bad) and not os.path.exists(good): | |
os.rename(bad, good) | |
log.append("Fixed world_nether typo") | |
login(token=TOKEN) | |
api = HfApi() | |
log.append("Authenticated with Hugging Face") | |
api.create_repo(repo_id=REPO_ID, repo_type="dataset", private=False, exist_ok=True, token=TOKEN) | |
log.append(f"Repository ready {REPO_ID}") | |
sections = { | |
"world": os.path.join(EXTRACT_DIR, "world"), | |
"world_nether": os.path.join(EXTRACT_DIR, "world_nether"), | |
"world_the_end": os.path.join(EXTRACT_DIR, "world_the_end"), | |
"plugins": os.path.join(EXTRACT_DIR, "plugins") | |
} | |
for key, path in sections.items(): | |
if os.path.isdir(path): | |
log.append(f"Uploading section {key}") | |
upload_folder( | |
repo_id=REPO_ID, | |
folder_path=path, | |
repo_type="dataset", | |
token=TOKEN, | |
path_in_repo=key, | |
commit_message=f"Add {key}" | |
) | |
log.append(f"Uploaded {key}") | |
else: | |
log.append(f"Skipped missing section {key}") | |
last_backup_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) | |
log.append(f"Backup finished at {last_backup_time}") | |
except Exception as e: | |
log.append(f"Error {e}") | |
return "\n".join(log) | |
def schedule_loop(): | |
while True: | |
if schedule_interval > 0: | |
run_backup() | |
time.sleep(schedule_interval * 60) | |
else: | |
time.sleep(5) | |
threading.Thread(target=schedule_loop, daemon=True).start() | |
HTML = """ | |
<!doctype html> | |
<html> | |
<head> | |
<meta charset="utf-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1"> | |
<title>Backup & Dataset Manager</title> | |
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet"> | |
<style> | |
body { padding: 2rem; } | |
.log-area { height: 300px; overflow-y: scroll; background: #f9f9f9; padding: 1rem; border-radius: .5rem; font-family: monospace; white-space: pre-wrap; } | |
.nav-link { cursor: pointer; } | |
</style> | |
</head> | |
<body> | |
<h1 class="mb-4">Backup & Dataset Manager</h1> | |
<ul class="nav nav-tabs mb-3" id="mainTabs"> | |
<li class="nav-item"> | |
<a class="nav-link active" data-bs-target="#backupTab">Backup Control</a> | |
</li> | |
<li class="nav-item"> | |
<a class="nav-link" data-bs-target="#datasetTab">Manage Dataset</a> | |
</li> | |
</ul> | |
<div class="tab-content"> | |
<div id="backupTab" class="tab-pane fade show active"> | |
<form method="post" action="/"> | |
<div class="mb-3"> | |
<label class="form-label">Interval Minutes</label> | |
<input type="number" class="form-control" name="interval" value="{{ interval }}" min="1"> | |
</div> | |
<button class="btn btn-primary">Set Schedule</button> | |
<button class="btn btn-secondary ms-2" name="manual_run" value="1">Run Now</button> | |
</form> | |
<div class="mt-4"> | |
<strong>Last Backup:</strong> {{ last_run }}<br> | |
<div class="log-area mt-2">{{ status }}</div> | |
</div> | |
</div> | |
<div id="datasetTab" class="tab-pane fade"> | |
<form method="post" action="/manage"> | |
<div class="mb-3"> | |
<label class="form-label">Drive File or Folder URL</label> | |
<input type="url" class="form-control" name="drive_url" placeholder="Enter Google Drive link"> | |
</div> | |
<div class="mb-3"> | |
<label class="form-label">Destination Path in Dataset</label> | |
<input type="text" class="form-control" name="dest_path" placeholder="eg world_extra"> | |
</div> | |
<button class="btn btn-success">Upload to Dataset</button> | |
</form> | |
<hr> | |
<h5>Current Dataset Contents</h5> | |
<ul class="list-group"> | |
{% for file in files %} | |
<li class="list-group-item">{{ file }}</li> | |
{% endfor %} | |
</ul> | |
{% if manage_status %} | |
<div class="alert alert-info mt-3">{{ manage_status }}</div> | |
{% endif %} | |
</div> | |
</div> | |
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script> | |
<script> | |
document.querySelectorAll('.nav-link').forEach(function(el) { | |
el.addEventListener('click', function() { | |
document.querySelectorAll('.nav-link').forEach(function(x) { x.classList.remove('active') }); | |
document.querySelectorAll('.tab-pane').forEach(function(x) { x.classList.remove('show','active') }); | |
el.classList.add('active'); | |
document.querySelector(el.getAttribute('data-bs-target')).classList.add('show','active'); | |
}); | |
}); | |
</script> | |
</body> | |
</html> | |
""" | |
def index(): | |
global schedule_interval | |
status = "" | |
if request.method == "POST": | |
if "manual_run" in request.form: | |
status = run_backup() | |
else: | |
try: | |
schedule_interval = int(request.form.get("interval", "0")) | |
status = f"Scheduled every {schedule_interval} minutes" | |
except: | |
status = "Invalid interval" | |
return render_template_string( | |
HTML, | |
last_run=last_backup_time, | |
interval=schedule_interval, | |
status=status, | |
files=[], | |
manage_status=None | |
) | |
def manage(): | |
drive_url = request.form.get("drive_url") | |
dest_path = request.form.get("dest_path") or "" | |
manage_log = "" | |
try: | |
# download then upload | |
tmp = os.path.join(DOWNLOAD_DIR, "upload_tmp") | |
shutil.rmtree(tmp, ignore_errors=True) | |
os.makedirs(tmp, exist_ok=True) | |
gdown.download(url=drive_url, output=tmp, quiet=True) | |
login(token=TOKEN) | |
upload_folder( | |
repo_id=REPO_ID, | |
folder_path=tmp, | |
repo_type="dataset", | |
token=TOKEN, | |
path_in_repo=dest_path, | |
commit_message=f"Upload via UI to {dest_path or 'root'}" | |
) | |
manage_log = "Upload successful" | |
except Exception as e: | |
manage_log = f"Error {e}" | |
api = HfApi() | |
files = api.list_repo_files(repo_id=REPO_ID, repo_type="dataset", token=TOKEN) | |
return render_template_string( | |
HTML, | |
last_run=last_backup_time, | |
interval=schedule_interval, | |
status="", | |
files=files, | |
manage_status=manage_log | |
) | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=7860) | |