from flask import Flask, request, jsonify, send_file from flask_cors import CORS import pandas as pd import os import threading import time import re app = Flask(__name__) CORS(app) UPLOAD_FOLDER = "/tmp" SESSION_KEY_PREFIX = "data_tool_session_id" app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config['MAX_CONTENT_LENGTH'] = 512 * 1024 * 1024 # 512 MB # === Cleanup Thread: delete files older than 60 minutes === def clean_old_files(folder=UPLOAD_FOLDER, max_age=60): def cleanup_loop(): while True: now = time.time() for f in os.listdir(folder): path = os.path.join(folder, f) if os.path.isfile(path): if now - os.path.getmtime(path) > max_age * 60: try: os.remove(path) print(f"[Cleanup] Deleted: {path}") except Exception as e: print(f"[Cleanup Error] {e}") time.sleep(600) # Every 10 minutes threading.Thread(target=cleanup_loop, daemon=True).start() # Start cleanup thread clean_old_files() # === Instruction Parser === def apply_instruction(df, instruction): instruction = instruction.lower() try: match = re.search(r"drop column (\w+)", instruction) if match: df = df.drop(columns=[match.group(1)]) if "remove duplicates" in instruction: df = df.drop_duplicates() if "drop missing" in instruction or "remove null" in instruction: df = df.dropna() match = re.search(r"fill missing.*with ([\w\.]+)", instruction) if match: val = match.group(1) try: val = float(val) except: pass df = df.fillna(val) match = re.search(r"sort by (\w+)( descending| desc)?", instruction) if match: col = match.group(1) ascending = not bool(match.group(2)) df = df.sort_values(by=col, ascending=ascending) match = re.search(r"rename column (\w+) to (\w+)", instruction) if match: df = df.rename(columns={match.group(1): match.group(2)}) match = re.search(r"filter where (\w+) > (\d+)", instruction) if match: df = df[df[match.group(1)] > float(match.group(2))] match = re.search(r"group by (\w+) and sum (\w+)", instruction) if match: df = df.groupby(match.group(1))[match.group(2)].sum().reset_index() match = re.search(r"add column (\w+) as (\w+) \+ (\w+)", instruction) if match: df[match.group(1)] = df[match.group(2)] + df[match.group(3)] match = re.search(r"normalize column (\w+)", instruction) if match: col = match.group(1) df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min()) match = re.search(r"standardize column (\w+)", instruction) if match: col = match.group(1) df[col] = (df[col] - df[col].mean()) / df[col].std() match = re.search(r"split column (\w+) by comma", instruction) if match: col = match.group(1) df[[f"{col}_1", f"{col}_2"]] = df[col].str.split(",", expand=True) match = re.search(r"remove special characters from (\w+)", instruction) if match: col = match.group(1) df[col] = df[col].astype(str).str.replace(r"[^a-zA-Z0-9]", "", regex=True) except Exception as e: return df, f"Error: {e}" return df, "success" # === File Processor Endpoint === @app.route("/process", methods=["POST"]) def process_file(): if "file" not in request.files or "instruction" not in request.form or "session_id" not in request.form: return jsonify({"error": "Missing file, instruction, or session_id"}), 400 file = request.files["file"] instruction = request.form["instruction"] session_id = request.form["session_id"] try: df = pd.read_csv(file) if file.filename.endswith(".csv") else pd.read_excel(file) except Exception as e: return jsonify({"error": f"File read error: {str(e)}"}), 400 df, status = apply_instruction(df, instruction) filename = f"cleaned_{session_id}_{file.filename}" filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) df.to_csv(filepath, index=False) preview = df.head(10).to_dict(orient="records") return jsonify({ "preview": preview, "download_url": f"/download/{filename}", "status": status }) # === File Download with Session ID Verification === @app.route("/download/", methods=["GET"]) def download_file(filename): session_id = request.args.get("session_id") if not session_id or f"_{session_id}_" not in filename: return jsonify({"error": "Unauthorized download attempt"}), 403 path = os.path.join(app.config['UPLOAD_FOLDER'], filename) if os.path.exists(path): return send_file(path, as_attachment=True) return jsonify({"error": "File not found"}), 404 # === Run on Port 7860 for Hugging Face === if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)