Spaces:

mike23415
/

Data-analytics

Sleeping

App Files Files Community

mike23415 commited on Jun 21

Commit

2b74b13

verified ·

1 Parent(s): 3eb14e0

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -45

app.py CHANGED Viewed

@@ -14,101 +14,86 @@ SESSION_KEY_PREFIX = "data_tool_session_id"
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['MAX_CONTENT_LENGTH'] = 512 * 1024 * 1024  # 512 MB
-# Cleanup function runs every 10 mins and deletes files older than 60 mins
 def clean_old_files(folder=UPLOAD_FOLDER, max_age=60):
-    while True:
-        now = time.time()
-        for f in os.listdir(folder):
-            path = os.path.join(folder, f)
-            if os.path.isfile(path):
-                if now - os.path.getmtime(path) > max_age * 60:
-                    try:
-                        os.remove(path)
-                        print(f"[Cleanup] Deleted: {path}")
-                    except Exception as e:
-                        print(f"[Cleanup Error] {e}")
-        time.sleep(600)  # Run every 10 minutes
-# Start cleanup thread at launch
-threading.Thread(target=clean_old_files, daemon=True).start()
 def apply_instruction(df, instruction):
     instruction = instruction.lower()
     try:
-        # Drop column
         match = re.search(r"drop column (\w+)", instruction)
         if match:
             df = df.drop(columns=[match.group(1)])
-        # Remove duplicates
         if "remove duplicates" in instruction:
             df = df.drop_duplicates()
-        # Drop missing values
         if "drop missing" in instruction or "remove null" in instruction:
             df = df.dropna()
-        # Fill missing values
         match = re.search(r"fill missing.*with ([\w\.]+)", instruction)
         if match:
             val = match.group(1)
-            try:
-                val = float(val)
-            except:
-                pass
             df = df.fillna(val)
-        # Sort
         match = re.search(r"sort by (\w+)( descending| desc)?", instruction)
         if match:
             col = match.group(1)
             ascending = not bool(match.group(2))
             df = df.sort_values(by=col, ascending=ascending)
-        # Rename
         match = re.search(r"rename column (\w+) to (\w+)", instruction)
         if match:
-            old, new = match.group(1), match.group(2)
-            df = df.rename(columns={old: new})
-        # Filter where col > val
         match = re.search(r"filter where (\w+) > (\d+)", instruction)
         if match:
-            col, val = match.group(1), float(match.group(2))
-            df = df[df[col] > val]
-        # Group by and sum
         match = re.search(r"group by (\w+) and sum (\w+)", instruction)
         if match:
-            group_col, sum_col = match.group(1), match.group(2)
-            df = df.groupby(group_col)[sum_col].sum().reset_index()
-        # Add column as sum
         match = re.search(r"add column (\w+) as (\w+) \+ (\w+)", instruction)
         if match:
-            new_col, col1, col2 = match.group(1), match.group(2), match.group(3)
-            df[new_col] = df[col1] + df[col2]
-        # Normalize column
         match = re.search(r"normalize column (\w+)", instruction)
         if match:
             col = match.group(1)
             df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
-        # Standardize column
         match = re.search(r"standardize column (\w+)", instruction)
         if match:
             col = match.group(1)
             df[col] = (df[col] - df[col].mean()) / df[col].std()
-        # Split column by comma
         match = re.search(r"split column (\w+) by comma", instruction)
         if match:
             col = match.group(1)
             df[[f"{col}_1", f"{col}_2"]] = df[col].str.split(",", expand=True)
-        # Remove special characters
         match = re.search(r"remove special characters from (\w+)", instruction)
         if match:
             col = match.group(1)
@@ -119,6 +104,7 @@ def apply_instruction(df, instruction):
     return df, "success"
 @app.route("/process", methods=["POST"])
 def process_file():
     if "file" not in request.files or "instruction" not in request.form or "session_id" not in request.form:
@@ -131,13 +117,13 @@ def process_file():
     try:
         df = pd.read_csv(file) if file.filename.endswith(".csv") else pd.read_excel(file)
     except Exception as e:
-        return jsonify({"error": f"Failed to read file: {str(e)}"}), 400
     df, status = apply_instruction(df, instruction)
     filename = f"cleaned_{session_id}_{file.filename}"
-    output_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
-    df.to_csv(output_path, index=False)
     preview = df.head(10).to_dict(orient="records")
     return jsonify({
@@ -146,6 +132,7 @@ def process_file():
         "status": status
     })
 @app.route("/download/<filename>", methods=["GET"])
 def download_file(filename):
     session_id = request.args.get("session_id")
@@ -157,5 +144,6 @@ def download_file(filename):
         return send_file(path, as_attachment=True)
     return jsonify({"error": "File not found"}), 404
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)

 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['MAX_CONTENT_LENGTH'] = 512 * 1024 * 1024  # 512 MB
+# === Cleanup Thread: delete files older than 60 minutes ===
 def clean_old_files(folder=UPLOAD_FOLDER, max_age=60):
+    def cleanup_loop():
+        while True:
+            now = time.time()
+            for f in os.listdir(folder):
+                path = os.path.join(folder, f)
+                if os.path.isfile(path):
+                    if now - os.path.getmtime(path) > max_age * 60:
+                        try:
+                            os.remove(path)
+                            print(f"[Cleanup] Deleted: {path}")
+                        except Exception as e:
+                            print(f"[Cleanup Error] {e}")
+            time.sleep(600)  # Every 10 minutes
+    threading.Thread(target=cleanup_loop, daemon=True).start()
+# Start cleanup thread
+clean_old_files()
+# === Instruction Parser ===
 def apply_instruction(df, instruction):
     instruction = instruction.lower()
     try:
         match = re.search(r"drop column (\w+)", instruction)
         if match:
             df = df.drop(columns=[match.group(1)])
         if "remove duplicates" in instruction:
             df = df.drop_duplicates()
         if "drop missing" in instruction or "remove null" in instruction:
             df = df.dropna()
         match = re.search(r"fill missing.*with ([\w\.]+)", instruction)
         if match:
             val = match.group(1)
+            try: val = float(val)
+            except: pass
             df = df.fillna(val)
         match = re.search(r"sort by (\w+)( descending| desc)?", instruction)
         if match:
             col = match.group(1)
             ascending = not bool(match.group(2))
             df = df.sort_values(by=col, ascending=ascending)
         match = re.search(r"rename column (\w+) to (\w+)", instruction)
         if match:
+            df = df.rename(columns={match.group(1): match.group(2)})
         match = re.search(r"filter where (\w+) > (\d+)", instruction)
         if match:
+            df = df[df[match.group(1)] > float(match.group(2))]
         match = re.search(r"group by (\w+) and sum (\w+)", instruction)
         if match:
+            df = df.groupby(match.group(1))[match.group(2)].sum().reset_index()
         match = re.search(r"add column (\w+) as (\w+) \+ (\w+)", instruction)
         if match:
+            df[match.group(1)] = df[match.group(2)] + df[match.group(3)]
         match = re.search(r"normalize column (\w+)", instruction)
         if match:
             col = match.group(1)
             df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
         match = re.search(r"standardize column (\w+)", instruction)
         if match:
             col = match.group(1)
             df[col] = (df[col] - df[col].mean()) / df[col].std()
         match = re.search(r"split column (\w+) by comma", instruction)
         if match:
             col = match.group(1)
             df[[f"{col}_1", f"{col}_2"]] = df[col].str.split(",", expand=True)
         match = re.search(r"remove special characters from (\w+)", instruction)
         if match:
             col = match.group(1)
     return df, "success"
+# === File Processor Endpoint ===
 @app.route("/process", methods=["POST"])
 def process_file():
     if "file" not in request.files or "instruction" not in request.form or "session_id" not in request.form:
     try:
         df = pd.read_csv(file) if file.filename.endswith(".csv") else pd.read_excel(file)
     except Exception as e:
+        return jsonify({"error": f"File read error: {str(e)}"}), 400
     df, status = apply_instruction(df, instruction)
     filename = f"cleaned_{session_id}_{file.filename}"
+    filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+    df.to_csv(filepath, index=False)
     preview = df.head(10).to_dict(orient="records")
     return jsonify({
         "status": status
     })
+# === File Download with Session ID Verification ===
 @app.route("/download/<filename>", methods=["GET"])
 def download_file(filename):
     session_id = request.args.get("session_id")
         return send_file(path, as_attachment=True)
     return jsonify({"error": "File not found"}), 404
+# === Run on Port 7860 for Hugging Face ===
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)