cronjob / app.py
testdeep123's picture
Update app.py
1be9faa verified
raw
history blame
8.3 kB
import os
import shutil
import zipfile
import threading
import time
from flask import Flask, request, render_template_string, redirect, url_for
import gdown
from huggingface_hub import HfApi, login, upload_folder, list_repo_files
# Ensure Hugging Face cache writes to tmp
os.environ["HF_HOME"] = "/tmp/hf_home"
# Environment variables
FOLDER_URL = os.getenv("FOLDER_URL")
REPO_ID = os.getenv("REPO_ID")
TOKEN = os.getenv("HF_TOKEN")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") # for future Drive API use
# Directories
DOWNLOAD_DIR = "/tmp/backups"
EXTRACT_DIR = "/tmp/extracted_backups"
# Application state
last_backup_time = "Never"
schedule_interval = 0
app = Flask(__name__)
def run_backup():
global last_backup_time
log = []
try:
log.append("Starting backup process")
shutil.rmtree(DOWNLOAD_DIR, ignore_errors=True)
shutil.rmtree(EXTRACT_DIR, ignore_errors=True)
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
os.makedirs(EXTRACT_DIR, exist_ok=True)
log.append(f"Downloading from {FOLDER_URL}")
gdown.download_folder(url=FOLDER_URL, output=DOWNLOAD_DIR, use_cookies=False, quiet=True)
log.append("Download complete")
for root, _, files in os.walk(DOWNLOAD_DIR):
for name in files:
if name.endswith(".zip"):
path = os.path.join(root, name)
with zipfile.ZipFile(path) as z:
z.extractall(EXTRACT_DIR)
log.append(f"Extracted {name}")
bad = os.path.join(EXTRACT_DIR, "world_nither")
good = os.path.join(EXTRACT_DIR, "world_nether")
if os.path.exists(bad) and not os.path.exists(good):
os.rename(bad, good)
log.append("Fixed world_nether typo")
login(token=TOKEN)
api = HfApi()
log.append("Authenticated with Hugging Face")
api.create_repo(repo_id=REPO_ID, repo_type="dataset", private=False, exist_ok=True, token=TOKEN)
log.append(f"Repository ready {REPO_ID}")
sections = {
"world": os.path.join(EXTRACT_DIR, "world"),
"world_nether": os.path.join(EXTRACT_DIR, "world_nether"),
"world_the_end": os.path.join(EXTRACT_DIR, "world_the_end"),
"plugins": os.path.join(EXTRACT_DIR, "plugins")
}
for key, path in sections.items():
if os.path.isdir(path):
log.append(f"Uploading section {key}")
upload_folder(
repo_id=REPO_ID,
folder_path=path,
repo_type="dataset",
token=TOKEN,
path_in_repo=key,
commit_message=f"Add {key}"
)
log.append(f"Uploaded {key}")
else:
log.append(f"Skipped missing section {key}")
last_backup_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
log.append(f"Backup finished at {last_backup_time}")
except Exception as e:
log.append(f"Error {e}")
return "\n".join(log)
def schedule_loop():
while True:
if schedule_interval > 0:
run_backup()
time.sleep(schedule_interval * 60)
else:
time.sleep(5)
threading.Thread(target=schedule_loop, daemon=True).start()
HTML = """
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Backup & Dataset Manager</title>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
<style>
body { padding: 2rem; }
.log-area { height: 300px; overflow-y: scroll; background: #f9f9f9; padding: 1rem; border-radius: .5rem; font-family: monospace; white-space: pre-wrap; }
.nav-link { cursor: pointer; }
</style>
</head>
<body>
<h1 class="mb-4">Backup & Dataset Manager</h1>
<ul class="nav nav-tabs mb-3" id="mainTabs">
<li class="nav-item">
<a class="nav-link active" data-bs-target="#backupTab">Backup Control</a>
</li>
<li class="nav-item">
<a class="nav-link" data-bs-target="#datasetTab">Manage Dataset</a>
</li>
</ul>
<div class="tab-content">
<div id="backupTab" class="tab-pane fade show active">
<form method="post" action="/">
<div class="mb-3">
<label class="form-label">Interval Minutes</label>
<input type="number" class="form-control" name="interval" value="{{ interval }}" min="1">
</div>
<button class="btn btn-primary">Set Schedule</button>
<button class="btn btn-secondary ms-2" name="manual_run" value="1">Run Now</button>
</form>
<div class="mt-4">
<strong>Last Backup:</strong> {{ last_run }}<br>
<div class="log-area mt-2">{{ status }}</div>
</div>
</div>
<div id="datasetTab" class="tab-pane fade">
<form method="post" action="/manage">
<div class="mb-3">
<label class="form-label">Drive File or Folder URL</label>
<input type="url" class="form-control" name="drive_url" placeholder="Enter Google Drive link">
</div>
<div class="mb-3">
<label class="form-label">Destination Path in Dataset</label>
<input type="text" class="form-control" name="dest_path" placeholder="eg world_extra">
</div>
<button class="btn btn-success">Upload to Dataset</button>
</form>
<hr>
<h5>Current Dataset Contents</h5>
<ul class="list-group">
{% for file in files %}
<li class="list-group-item">{{ file }}</li>
{% endfor %}
</ul>
{% if manage_status %}
<div class="alert alert-info mt-3">{{ manage_status }}</div>
{% endif %}
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
<script>
document.querySelectorAll('.nav-link').forEach(function(el) {
el.addEventListener('click', function() {
document.querySelectorAll('.nav-link').forEach(function(x) { x.classList.remove('active') });
document.querySelectorAll('.tab-pane').forEach(function(x) { x.classList.remove('show','active') });
el.classList.add('active');
document.querySelector(el.getAttribute('data-bs-target')).classList.add('show','active');
});
});
</script>
</body>
</html>
"""
@app.route("/", methods=["GET","POST"])
def index():
global schedule_interval
status = ""
if request.method == "POST":
if "manual_run" in request.form:
status = run_backup()
else:
try:
schedule_interval = int(request.form.get("interval", "0"))
status = f"Scheduled every {schedule_interval} minutes"
except:
status = "Invalid interval"
return render_template_string(
HTML,
last_run=last_backup_time,
interval=schedule_interval,
status=status,
files=[],
manage_status=None
)
@app.route("/manage", methods=["POST"])
def manage():
drive_url = request.form.get("drive_url")
dest_path = request.form.get("dest_path") or ""
manage_log = ""
try:
# download then upload
tmp = os.path.join(DOWNLOAD_DIR, "upload_tmp")
shutil.rmtree(tmp, ignore_errors=True)
os.makedirs(tmp, exist_ok=True)
gdown.download(url=drive_url, output=tmp, quiet=True)
login(token=TOKEN)
upload_folder(
repo_id=REPO_ID,
folder_path=tmp,
repo_type="dataset",
token=TOKEN,
path_in_repo=dest_path,
commit_message=f"Upload via UI to {dest_path or 'root'}"
)
manage_log = "Upload successful"
except Exception as e:
manage_log = f"Error {e}"
api = HfApi()
files = api.list_repo_files(repo_id=REPO_ID, repo_type="dataset", token=TOKEN)
return render_template_string(
HTML,
last_run=last_backup_time,
interval=schedule_interval,
status="",
files=files,
manage_status=manage_log
)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)