Spaces:

limitedonly41
/

csv_manipulations

Sleeping

App Files Files Community

limitedonly41 commited on Oct 20

Commit

29f7300

verified ·

1 Parent(s): e5f0f5b

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -1136

app.py CHANGED Viewed

@@ -1,1142 +1,54 @@
-#!/usr/bin/env python3
-"""
-Advanced CSV Manipulation Tool with Gradio Interface
-Commercial-ready application for powerful CSV data processing
-Features:
-- File upload with 1GB limit
-- Data preview with selectable rows
-- Value replacement based on conditions
-- CSV concatenation with column selection
-- Advanced statistical analysis and visualization
-- Data validation and quality checks
-- Export to CSV, Excel, JSON
-- Batch operations and operation recipes
-- Undo/Redo functionality
-- Memory-efficient large file processing
-"""
-import gradio as gr
-import pandas as pd
-import numpy as np
-import json
-import io
-import zipfile
-from datetime import datetime, timedelta
-import re
-import matplotlib.pyplot as plt
-import seaborn as sns
-import plotly.express as px
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-import warnings
 import os
-from typing import Dict, List, Tuple, Optional, Any
-import hashlib
-import pickle
-from pathlib import Path
-warnings.filterwarnings('ignore')
-plt.style.use('seaborn-v0_8')
-sns.set_palette("husl")
-class CSVProcessor:
-    """Advanced CSV processing class with state management and history"""
-    def __init__(self):
-        self.original_df = None
-        self.current_df = None
-        self.history = []
-        self.recipes = {}
-        self.batch_files = []
-    def load_data(self, file, preview_rows=100, encoding='utf-8'):
-        """Load data file with error handling and memory optimization"""
-        try:
-            if file is None:
-                return None, "No file provided"
-            file_path = file.name if hasattr(file, 'name') else str(file)
-            file_extension = Path(file_path).suffix.lower()
-            # Chunked reading for large files
-            if file_extension == '.csv':
-                # Try different encodings
-                encodings = [encoding, 'utf-8', 'latin-1', 'cp1252']
-                df = None
-                for enc in encodings:
-                    try:
-                        df = pd.read_csv(file_path, encoding=enc, low_memory=False)
-                        break
-                    except UnicodeDecodeError:
-                        continue
-                if df is None:
-                    return None, "Failed to decode file with supported encodings"
-            elif file_extension in ['.xlsx', '.xls']:
-                df = pd.read_excel(file_path)
-            elif file_extension == '.json':
-                df = pd.read_json(file_path)
-            elif file_extension == '.parquet':
-                df = pd.read_parquet(file_path)
-            else:
-                return None, f"Unsupported file format: {file_extension}"
-            self.original_df = df.copy()
-            self.current_df = df.copy()
-            self.history = []
-            # Create preview
-            if preview_rows > 0:
-                preview = df.head(preview_rows)
-            else:
-                preview = df
-            # Memory and performance info
-            memory_mb = df.memory_usage(deep=True).sum() / 1024**2
-            info = {
-                'rows': len(df),
-                'columns': len(df.columns),
-                'memory_usage': f"{memory_mb:.2f} MB",
-                'dtypes': dict(df.dtypes.astype(str)),
-                'null_counts': dict(df.isnull().sum()),
-                'duplicates': df.duplicated().sum()
-            }
-            success_msg = f"✅ File loaded successfully!\n"
-            success_msg += f"📊 {info['rows']:,} rows × {info['columns']} columns\n"
-            success_msg += f"💾 Memory usage: {info['memory_usage']}\n"
-            success_msg += f"🔄 Duplicates: {info['duplicates']:,}\n"
-            success_msg += f"❌ Missing values: {sum(info['null_counts'].values()):,}"
-            return preview, success_msg, info
-        except Exception as e:
-            return None, f"❌ Error loading file: {str(e)}", {}
-    def save_state(self, operation_name: str):
-        """Save current state to history with memory management"""
-        if len(self.history) > 50:  # Limit history to prevent memory issues
-            self.history = self.history[-25:]  # Keep last 25 operations
-        self.history.append({
-            'operation': operation_name,
-            'timestamp': datetime.now(),
-            'df': self.current_df.copy() if self.current_df is not None else None
-        })
-    def undo_operation(self):
-        """Undo last operation"""
-        if len(self.history) > 1:
-            self.history.pop()
-            self.current_df = self.history[-1]['df'].copy()
-            return self.current_df, f"✅ Undone: {self.history[-1]['operation']}"
-        elif len(self.history) == 1:
-            self.current_df = self.original_df.copy()
-            self.history = []
-            return self.current_df, "✅ Reset to original data"
-        else:
-            return self.current_df, "❌ No operations to undo"
-    def reset_to_original(self):
-        """Reset to original data"""
-        if self.original_df is not None:
-            self.current_df = self.original_df.copy()
-            self.history = []
-            return self.current_df, "✅ Reset to original data"
-        return None, "❌ No original data available"
-# Global processor instance
-processor = CSVProcessor()
-def create_download_file(df: pd.DataFrame, format_type: str, filename: str = "processed_data"):
-    """Create downloadable file in specified format"""
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    filename_with_timestamp = f"{filename}_{timestamp}"
-    try:
-        if format_type == "csv":
-            csv_data = df.to_csv(index=False)
-            return csv_data, f"{filename_with_timestamp}.csv"
-        elif format_type == "excel":
-            buffer = io.BytesIO()
-            with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
-                df.to_excel(writer, index=False, sheet_name='Data')
-            buffer.seek(0)
-            return buffer.getvalue(), f"{filename_with_timestamp}.xlsx"
-        elif format_type == "json":
-            json_data = df.to_json(orient='records', indent=2, date_format='iso')
-            return json_data, f"{filename_with_timestamp}.json"
-    except Exception as e:
-        return None, f"Error creating {format_type} file: {str(e)}"
-def get_data_info(df: pd.DataFrame) -> str:
-    """Get comprehensive data information"""
-    if df is None or df.empty:
-        return "No data loaded"
-    info_dict = {
-        '📊 Shape': f"{df.shape[0]:,} rows × {df.shape[1]} columns",
-        '💾 Memory': f"{df.memory_usage(deep=True).sum() / 1024**2:.2f} MB",
-        '🔄 Duplicates': f"{df.duplicated().sum():,}",
-        '❌ Missing Values': f"{df.isnull().sum().sum():,}",
-        '📈 Numeric Columns': f"{len(df.select_dtypes(include=[np.number]).columns)}",
-        '📝 Text Columns': f"{len(df.select_dtypes(include=['object']).columns)}",
-        '📅 Date Columns': f"{len(df.select_dtypes(include=['datetime64']).columns)}"
-    }
-    return "\n".join([f"{k}: {v}" for k, v in info_dict.items()])
-def get_column_options(df: pd.DataFrame) -> List[str]:
-    """Get list of column names for dropdowns"""
-    return list(df.columns) if df is not None else []
-# ===========================================
-# CORE DATA MANIPULATION FUNCTIONS
-# ===========================================
-def rename_values_conditional(df: pd.DataFrame, target_col: str, condition_col: str,
-                             condition_value: str, new_value: str, match_type: str = "exact") -> Tuple[pd.DataFrame, str]:
-    """Rename values in target column based on condition in another column"""
-    try:
-        if df is None or df.empty:
-            return df, "❌ No data available"
-        if target_col not in df.columns or condition_col not in df.columns:
-            return df, "❌ One or more columns not found"
-        df_result = df.copy()
-        if match_type == "exact":
-            mask = df_result[condition_col] == condition_value
-        elif match_type == "contains":
-            mask = df_result[condition_col].astype(str).str.contains(condition_value, na=False)
-        elif match_type == "regex":
-            mask = df_result[condition_col].astype(str).str.match(condition_value, na=False)
-        elif match_type == "starts_with":
-            mask = df_result[condition_col].astype(str).str.startswith(condition_value, na=False)
-        elif match_type == "ends_with":
-            mask = df_result[condition_col].astype(str).str.endswith(condition_value, na=False)
-        affected_rows = mask.sum()
-        df_result.loc[mask, target_col] = new_value
-        processor.current_df = df_result
-        processor.save_state(f"Renamed values in '{target_col}' based on '{condition_col}'")
-        return df_result, f"✅ Updated {affected_rows:,} rows in column '{target_col}'"
-    except Exception as e:
-        return df, f"❌ Error: {str(e)}"
-def concatenate_csvs(files: List, selected_columns: str, join_type: str = "outer") -> Tuple[pd.DataFrame, str]:
-    """Concatenate multiple CSV files with column selection"""
-    try:
-        if not files:
-            return None, "❌ No files provided"
-        dfs = []
-        columns_to_use = [col.strip() for col in selected_columns.split(",") if col.strip()] if selected_columns else None
-        for file in files:
-            if hasattr(file, 'name'):
-                file_path = file.name
-                if file_path.endswith('.csv'):
-                    df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)
-                elif file_path.endswith(('.xlsx', '.xls')):
-                    df = pd.read_excel(file_path)
-                else:
-                    continue
-                # Select specific columns if specified
-                if columns_to_use:
-                    available_cols = [col for col in columns_to_use if col in df.columns]
-                    if available_cols:
-                        df = df[available_cols]
-                    else:
-                        continue
-                # Add source file identifier
-                df['_source_file'] = Path(file_path).stem
-                dfs.append(df)
-        if not dfs:
-            return None, "❌ No valid files found or columns don't exist"
-        # Concatenate with specified join type
-        if join_type == "inner":
-            result_df = pd.concat(dfs, ignore_index=True, join='inner')
-        else:
-            result_df = pd.concat(dfs, ignore_index=True, join='outer')
-        processor.current_df = result_df
-        processor.save_state(f"Concatenated {len(dfs)} files")
-        return result_df, f"✅ Successfully concatenated {len(dfs)} files with {len(result_df):,} total rows"
-    except Exception as e:
-        return None, f"❌ Error concatenating files: {str(e)}"
-def get_value_counts(df: pd.DataFrame, column: str, top_n: int = 20, normalize: bool = False) -> Tuple[pd.DataFrame, str]:
-    """Get value counts for specified column"""
-    try:
-        if df is None or df.empty:
-            return None, "❌ No data available"
-        if column not in df.columns:
-            return None, f"❌ Column '{column}' not found"
-        value_counts = df[column].value_counts(normalize=normalize, dropna=False).head(top_n)
-        # Convert to DataFrame for better display
-        result_df = pd.DataFrame({
-            'Value': value_counts.index,
-            'Count' if not normalize else 'Percentage': value_counts.values
-        })
-        if normalize:
-            result_df['Percentage'] = result_df['Percentage'].map(lambda x: f"{x:.2%}")
-        return result_df, f"✅ Value counts for '{column}' (Top {min(top_n, len(result_df))})"
-    except Exception as e:
-        return None, f"❌ Error: {str(e)}"
-def filter_data(df: pd.DataFrame, column: str, condition: str, value: str) -> Tuple[pd.DataFrame, str]:
-    """Filter data based on conditions"""
-    try:
-        if df is None or df.empty:
-            return df, "❌ No data available"
-        if column not in df.columns:
-            return df, f"❌ Column '{column}' not found"
-        df_result = df.copy()
-        if condition == "equals":
-            mask = df_result[column] == value
-        elif condition == "not_equals":
-            mask = df_result[column] != value
-        elif condition == "contains":
-            mask = df_result[column].astype(str).str.contains(value, na=False)
-        elif condition == "not_contains":
-            mask = ~df_result[column].astype(str).str.contains(value, na=False)
-        elif condition == "starts_with":
-            mask = df_result[column].astype(str).str.startswith(value, na=False)
-        elif condition == "ends_with":
-            mask = df_result[column].astype(str).str.endswith(value, na=False)
-        elif condition == "greater_than":
-            mask = pd.to_numeric(df_result[column], errors='coerce') > float(value)
-        elif condition == "less_than":
-            mask = pd.to_numeric(df_result[column], errors='coerce') < float(value)
-        elif condition == "is_null":
-            mask = df_result[column].isnull()
-        elif condition == "is_not_null":
-            mask = df_result[column].notnull()
-        else:
-            return df, f"❌ Unknown condition: {condition}"
-        filtered_df = df_result[mask]
-        processor.current_df = filtered_df
-        processor.save_state(f"Filtered data: {column} {condition} {value}")
-        return filtered_df, f"✅ Filtered to {len(filtered_df):,} rows (removed {len(df) - len(filtered_df):,} rows)"
-    except Exception as e:
-        return df, f"❌ Error: {str(e)}"
-def handle_missing_values(df: pd.DataFrame, column: str, method: str, fill_value: str = "") -> Tuple[pd.DataFrame, str]:
-    """Handle missing values in specified column"""
-    try:
-        if df is None or df.empty:
-            return df, "❌ No data available"
-        if column != "ALL" and column not in df.columns:
-            return df, f"❌ Column '{column}' not found"
-        df_result = df.copy()
-        columns_to_process = [column] if column != "ALL" else df_result.columns.tolist()
-        total_missing_before = df_result.isnull().sum().sum()
-        for col in columns_to_process:
-            if method == "drop_rows":
-                df_result = df_result.dropna(subset=[col])
-            elif method == "fill_value":
-                df_result[col] = df_result[col].fillna(fill_value)
-            elif method == "fill_mean":
-                if df_result[col].dtype in ['int64', 'float64']:
-                    df_result[col] = df_result[col].fillna(df_result[col].mean())
-            elif method == "fill_median":
-                if df_result[col].dtype in ['int64', 'float64']:
-                    df_result[col] = df_result[col].fillna(df_result[col].median())
-            elif method == "fill_mode":
-                mode_val = df_result[col].mode()
-                if len(mode_val) > 0:
-                    df_result[col] = df_result[col].fillna(mode_val[0])
-            elif method == "forward_fill":
-                df_result[col] = df_result[col].fillna(method='ffill')
-            elif method == "backward_fill":
-                df_result[col] = df_result[col].fillna(method='bfill')
-        total_missing_after = df_result.isnull().sum().sum()
-        processor.current_df = df_result
-        processor.save_state(f"Handle missing values: {method}")
-        return df_result, f"✅ Processed missing values. Before: {total_missing_before:,}, After: {total_missing_after:,}"
-    except Exception as e:
-        return df, f"❌ Error: {str(e)}"
-def detect_and_remove_duplicates(df: pd.DataFrame, columns: str = "", keep: str = "first") -> Tuple[pd.DataFrame, str]:
-    """Detect and remove duplicate rows"""
-    try:
-        if df is None or df.empty:
-            return df, "❌ No data available"
-        df_result = df.copy()
-        # Parse columns
-        if columns.strip():
-            cols_list = [col.strip() for col in columns.split(",") if col.strip() in df.columns]
-            subset = cols_list if cols_list else None
         else:
-            subset = None
-        duplicates_before = df_result.duplicated(subset=subset).sum()
-        if duplicates_before == 0:
-            return df_result, "✅ No duplicate rows found"
-        df_result = df_result.drop_duplicates(subset=subset, keep=keep)
-        processor.current_df = df_result
-        processor.save_state(f"Removed {duplicates_before:,} duplicate rows")
-        return df_result, f"✅ Removed {duplicates_before:,} duplicate rows. Remaining: {len(df_result):,} rows"
-    except Exception as e:
-        return df, f"❌ Error: {str(e)}"
-def perform_column_operations(df: pd.DataFrame, operation: str, col1: str, col2: str = "",
-                            new_col_name: str = "", constant: str = "") -> Tuple[pd.DataFrame, str]:
-    """Perform mathematical and string operations on columns"""
-    try:
-        if df is None or df.empty:
-            return df, "❌ No data available"
-        if col1 not in df.columns:
-            return df, f"❌ Column '{col1}' not found"
-        df_result = df.copy()
-        if not new_col_name:
-            new_col_name = f"{col1}_{operation}"
-        if operation == "add":
-            if col2 and col2 in df.columns:
-                df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') + pd.to_numeric(df_result[col2], errors='coerce')
-            elif constant:
-                df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') + float(constant)
-        elif operation == "subtract":
-            if col2 and col2 in df.columns:
-                df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') - pd.to_numeric(df_result[col2], errors='coerce')
-            elif constant:
-                df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') - float(constant)
-        elif operation == "multiply":
-            if col2 and col2 in df.columns:
-                df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') * pd.to_numeric(df_result[col2], errors='coerce')
-            elif constant:
-                df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') * float(constant)
-        elif operation == "divide":
-            if col2 and col2 in df.columns:
-                df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') / pd.to_numeric(df_result[col2], errors='coerce')
-            elif constant:
-                df_result[new_col_name] = pd.to_numeric(df_result[col1], errors='coerce') / float(constant)
-        elif operation == "concatenate":
-            if col2 and col2 in df.columns:
-                df_result[new_col_name] = df_result[col1].astype(str) + " " + df_result[col2].astype(str)
-            elif constant:
-                df_result[new_col_name] = df_result[col1].astype(str) + constant
-        elif operation == "extract_numbers":
-            df_result[new_col_name] = df_result[col1].astype(str).str.extract(r'(\d+)')[0]
-        elif operation == "upper":
-            df_result[new_col_name] = df_result[col1].astype(str).str.upper()
-        elif operation == "lower":
-            df_result[new_col_name] = df_result[col1].astype(str).str.lower()
-        elif operation == "title":
-            df_result[new_col_name] = df_result[col1].astype(str).str.title()
-        elif operation == "length":
-            df_result[new_col_name] = df_result[col1].astype(str).str.len()
-        else:
-            return df, f"❌ Unknown operation: {operation}"
-        processor.current_df = df_result
-        processor.save_state(f"Column operation: {operation} on {col1}")
-        return df_result, f"✅ Created new column '{new_col_name}' using {operation} operation"
     except Exception as e:
-        return df, f"❌ Error: {str(e)}"
-def convert_data_types(df: pd.DataFrame, column: str, target_type: str) -> Tuple[pd.DataFrame, str]:
-    """Convert column data types"""
-    try:
-        if df is None or df.empty:
-            return df, "❌ No data available"
-        if column not in df.columns:
-            return df, f"❌ Column '{column}' not found"
-        df_result = df.copy()
-        if target_type == "string":
-            df_result[column] = df_result[column].astype(str)
-        elif target_type == "integer":
-            df_result[column] = pd.to_numeric(df_result[column], errors='coerce').astype('Int64')
-        elif target_type == "float":
-            df_result[column] = pd.to_numeric(df_result[column], errors='coerce')
-        elif target_type == "datetime":
-            df_result[column] = pd.to_datetime(df_result[column], errors='coerce')
-        elif target_type == "boolean":
-            df_result[column] = df_result[column].astype(bool)
-        elif target_type == "category":
-            df_result[column] = df_result[column].astype('category')
-        else:
-            return df, f"❌ Unknown data type: {target_type}"
-        processor.current_df = df_result
-        processor.save_state(f"Converted '{column}' to {target_type}")
-        return df_result, f"✅ Converted column '{column}' to {target_type}"
-    except Exception as e:
-        return df, f"❌ Error: {str(e)}"
-# ===========================================
-# ANALYSIS AND VISUALIZATION FUNCTIONS
-# ===========================================
-def generate_statistical_summary(df: pd.DataFrame) -> Tuple[pd.DataFrame, str]:
-    """Generate comprehensive statistical summary"""
-    try:
-        if df is None or df.empty:
-            return None, "❌ No data available"
-        numeric_cols = df.select_dtypes(include=[np.number]).columns
-        if len(numeric_cols) == 0:
-            return None, "❌ No numeric columns found"
-        stats_df = df[numeric_cols].describe()
-        # Add additional statistics
-        stats_df.loc['variance'] = df[numeric_cols].var()
-        stats_df.loc['skewness'] = df[numeric_cols].skew()
-        stats_df.loc['kurtosis'] = df[numeric_cols].kurtosis()
-        stats_df.loc['missing'] = df[numeric_cols].isnull().sum()
-        return stats_df.round(4), "✅ Statistical summary generated"
-    except Exception as e:
-        return None, f"❌ Error: {str(e)}"
-def create_correlation_matrix(df: pd.DataFrame) -> Tuple[str, str]:
-    """Create correlation matrix visualization"""
-    try:
-        if df is None or df.empty:
-            return None, "❌ No data available"
-        numeric_cols = df.select_dtypes(include=[np.number]).columns
-        if len(numeric_cols) < 2:
-            return None, "❌ Need at least 2 numeric columns for correlation"
-        # Calculate correlation matrix
-        corr_matrix = df[numeric_cols].corr()
-        # Create heatmap
-        plt.figure(figsize=(12, 8))
-        mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
-        sns.heatmap(corr_matrix, mask=mask, annot=True, cmap='coolwarm', center=0,
-                   square=True, linewidths=0.5, cbar_kws={"shrink": 0.8})
-        plt.title('Correlation Matrix Heatmap', fontsize=16, fontweight='bold')
-        plt.tight_layout()
-        # Save plot
-        plt.savefig('correlation_matrix.png', dpi=300, bbox_inches='tight')
-        plt.close()
-        return 'correlation_matrix.png', "✅ Correlation matrix created"
-    except Exception as e:
-        return None, f"❌ Error: {str(e)}"
-def create_distribution_plots(df: pd.DataFrame, column: str, plot_type: str = "histogram") -> Tuple[str, str]:
-    """Create distribution plots"""
-    try:
-        if df is None or df.empty:
-            return None, "❌ No data available"
-        if column not in df.columns:
-            return None, f"❌ Column '{column}' not found"
-        plt.figure(figsize=(12, 6))
-        if plot_type == "histogram":
-            plt.subplot(1, 2, 1)
-            df[column].hist(bins=30, edgecolor='black', alpha=0.7)
-            plt.title(f'Histogram of {column}')
-            plt.xlabel(column)
-            plt.ylabel('Frequency')
-            plt.subplot(1, 2, 2)
-            df.boxplot(column=column)
-            plt.title(f'Box Plot of {column}')
-        elif plot_type == "density":
-            plt.subplot(1, 2, 1)
-            df[column].plot(kind='density')
-            plt.title(f'Density Plot of {column}')
-            plt.xlabel(column)
-            plt.subplot(1, 2, 2)
-            df[column].plot(kind='box')
-            plt.title(f'Box Plot of {column}')
-        plt.tight_layout()
-        plt.savefig(f'distribution_{column}_{plot_type}.png', dpi=300, bbox_inches='tight')
-        plt.close()
-        return f'distribution_{column}_{plot_type}.png', f"✅ Distribution plot created for {column}"
-    except Exception as e:
-        return None, f"❌ Error: {str(e)}"
-# ===========================================
-# GRADIO INTERFACE SETUP
-# ===========================================
-def create_interface():
-    """Create the main Gradio interface"""
-    with gr.Blocks(title="Advanced CSV Manipulation Tool", theme=gr.themes.Soft()) as demo:
-        gr.HTML("""
-        <div style="text-align: center; padding: 20px;">
-            <h1 style="color: #2e7d32; margin-bottom: 10px;">🔥 Advanced CSV Manipulation Tool</h1>
-            <p style="font-size: 18px; color: #666;">Commercial-ready data processing with advanced analytics</p>
-            <hr style="margin: 20px 0;">
-        </div>
-        """)
-        # Global state variables
-        current_data = gr.State(None)
-        data_info = gr.State({})
-        with gr.Tabs():
-            # ===== FILE UPLOAD TAB =====
-            with gr.TabItem("📁 File Upload & Preview"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        file_upload = gr.File(
-                            label="Upload CSV/Excel/JSON file (Max 1GB)",
-                            file_types=[".csv", ".xlsx", ".xls", ".json"],
-                            file_count="single"
-                        )
-                        preview_rows = gr.Slider(
-                            minimum=0,
-                            maximum=1000,
-                            value=100,
-                            step=50,
-                            label="Preview Rows (0 = All)",
-                            info="Number of rows to display in preview"
-                        )
-                        upload_btn = gr.Button("📊 Load & Analyze Data", variant="primary", size="lg")
-                    with gr.Column(scale=2):
-                        upload_status = gr.Textbox(label="Status", lines=5, interactive=False)
-                        data_info_display = gr.Textbox(label="Data Information", lines=8, interactive=False)
-                data_preview = gr.DataFrame(label="Data Preview", interactive=False)
-                def load_file_handler(file, rows):
-                    if file is None:
-                        return None, "Please upload a file first", "", None, {}
-                    preview, status, info = processor.load_data(file, rows)
-                    info_text = get_data_info(processor.current_df) if processor.current_df is not None else ""
-                    return preview, status, info_text, processor.current_df, info
-                upload_btn.click(
-                    load_file_handler,
-                    inputs=[file_upload, preview_rows],
-                    outputs=[data_preview, upload_status, data_info_display, current_data, data_info]
-                )
-            # ===== VALUE REPLACEMENT TAB =====
-            with gr.TabItem("🔄 Value Replacement"):
-                gr.HTML("<h3>Replace values in one column based on conditions in another column</h3>")
-                with gr.Row():
-                    with gr.Column():
-                        target_col = gr.Dropdown(label="Target Column (to modify)", choices=[], interactive=True)
-                        condition_col = gr.Dropdown(label="Condition Column (to check)", choices=[], interactive=True)
-                        condition_value = gr.Textbox(label="Condition Value", placeholder="Value to match in condition column")
-                        new_value = gr.Textbox(label="New Value", placeholder="Replacement value for target column")
-                        match_type = gr.Radio(
-                            choices=["exact", "contains", "starts_with", "ends_with", "regex"],
-                            value="exact",
-                            label="Match Type"
-                        )
-                        replace_btn = gr.Button("🔄 Replace Values", variant="primary")
-                    with gr.Column():
-                        replace_status = gr.Textbox(label="Status", lines=3, interactive=False)
-                        # Update column choices when data changes
-                        def update_columns(df):
-                            if df is not None:
-                                cols = list(df.columns)
-                                return gr.Dropdown(choices=cols), gr.Dropdown(choices=cols)
-                            return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
-                        current_data.change(
-                            update_columns,
-                            inputs=[current_data],
-                            outputs=[target_col, condition_col]
-                        )
-                def replace_values_handler(df, tcol, ccol, cval, nval, mtype):
-                    if df is None:
-                        return None, "❌ No data loaded", ""
-                    result_df, status = rename_values_conditional(df, tcol, ccol, cval, nval, mtype)
-                    info_text = get_data_info(result_df) if result_df is not None else ""
-                    return result_df, status, info_text
-                replace_btn.click(
-                    replace_values_handler,
-                    inputs=[current_data, target_col, condition_col, condition_value, new_value, match_type],
-                    outputs=[current_data, replace_status, data_info_display]
-                )
-            # ===== CSV CONCATENATION TAB =====
-            with gr.TabItem("📋 CSV Concatenation"):
-                gr.HTML("<h3>Combine multiple CSV files with column selection</h3>")
-                with gr.Row():
-                    with gr.Column():
-                        multi_files = gr.File(
-                            label="Upload Multiple Files",
-                            file_types=[".csv", ".xlsx", ".xls"],
-                            file_count="multiple"
-                        )
-                        selected_columns = gr.Textbox(
-                            label="Columns to Include",
-                            placeholder="column1, column2, column3 (leave empty for all)",
-                            info="Comma-separated list of column names"
-                        )
-                        join_type = gr.Radio(
-                            choices=["outer", "inner"],
-                            value="outer",
-                            label="Join Type",
-                            info="Outer: keep all columns, Inner: only common columns"
-                        )
-                        concat_btn = gr.Button("📋 Concatenate Files", variant="primary")
-                    with gr.Column():
-                        concat_status = gr.Textbox(label="Status", lines=5, interactive=False)
-                def concat_handler(files, cols, jtype):
-                    if not files:
-                        return None, "❌ Please upload files first", ""
-                    result_df, status = concatenate_csvs(files, cols, jtype)
-                    info_text = get_data_info(result_df) if result_df is not None else ""
-                    return result_df, status, info_text
-                concat_btn.click(
-                    concat_handler,
-                    inputs=[multi_files, selected_columns, join_type],
-                    outputs=[current_data, concat_status, data_info_display]
-                )
-            # ===== VALUE COUNTS TAB =====
-            with gr.TabItem("📊 Value Analysis"):
-                gr.HTML("<h3>Analyze value frequencies and distributions</h3>")
-                with gr.Row():
-                    with gr.Column():
-                        analysis_col = gr.Dropdown(label="Column to Analyze", choices=[], interactive=True)
-                        top_n = gr.Slider(minimum=5, maximum=100, value=20, step=5, label="Top N Values")
-                        normalize_counts = gr.Checkbox(label="Show Percentages", value=False)
-                        analyze_btn = gr.Button("📊 Analyze Values", variant="primary")
-                    with gr.Column():
-                        analysis_status = gr.Textbox(label="Status", lines=3, interactive=False)
-                analysis_results = gr.DataFrame(label="Value Counts")
-                # Update analysis column choices
-                current_data.change(
-                    lambda df: gr.Dropdown(choices=list(df.columns) if df is not None else []),
-                    inputs=[current_data],
-                    outputs=[analysis_col]
-                )
-                def analysis_handler(df, col, n, norm):
-                    if df is None:
-                        return None, "❌ No data loaded"
-                    return get_value_counts(df, col, n, norm)
-                analyze_btn.click(
-                    analysis_handler,
-                    inputs=[current_data, analysis_col, top_n, normalize_counts],
-                    outputs=[analysis_results, analysis_status]
-                )
-            # ===== DATA CLEANING TAB =====
-            with gr.TabItem("🧹 Data Cleaning"):
-                gr.HTML("<h3>Clean and preprocess your data</h3>")
-                with gr.Tabs():
-                    # Missing Values
-                    with gr.TabItem("Missing Values"):
-                        with gr.Row():
-                            with gr.Column():
-                                missing_col = gr.Dropdown(label="Column", choices=["ALL"], value="ALL", interactive=True)
-                                missing_method = gr.Radio(
-                                    choices=["drop_rows", "fill_value", "fill_mean", "fill_median", "fill_mode", "forward_fill", "backward_fill"],
-                                    value="drop_rows",
-                                    label="Method"
-                                )
-                                fill_value_input = gr.Textbox(label="Fill Value", placeholder="For fill_value method")
-                                missing_btn = gr.Button("🧹 Handle Missing Values", variant="primary")
-                            with gr.Column():
-                                missing_status = gr.Textbox(label="Status", lines=4, interactive=False)
-                    # Duplicates
-                    with gr.TabItem("Duplicates"):
-                        with gr.Row():
-                            with gr.Column():
-                                duplicate_cols = gr.Textbox(
-                                    label="Columns to Check",
-                                    placeholder="column1, column2 (empty = all columns)"
-                                )
-                                keep_method = gr.Radio(
-                                    choices=["first", "last", "false"],
-                                    value="first",
-                                    label="Keep Method"
-                                )
-                                duplicate_btn = gr.Button("🗑️ Remove Duplicates", variant="primary")
-                            with gr.Column():
-                                duplicate_status = gr.Textbox(label="Status", lines=4, interactive=False)
-                    # Data Filtering
-                    with gr.TabItem("Filtering"):
-                        with gr.Row():
-                            with gr.Column():
-                                filter_col = gr.Dropdown(label="Column", choices=[], interactive=True)
-                                filter_condition = gr.Dropdown(
-                                    choices=["equals", "not_equals", "contains", "not_contains", "starts_with", "ends_with",
-                                           "greater_than", "less_than", "is_null", "is_not_null"],
-                                    value="equals",
-                                    label="Condition"
-                                )
-                                filter_value = gr.Textbox(label="Value")
-                                filter_btn = gr.Button("🔍 Filter Data", variant="primary")
-                            with gr.Column():
-                                filter_status = gr.Textbox(label="Status", lines=4, interactive=False)
-                # Update dropdown choices
-                current_data.change(
-                    lambda df: (
-                        gr.Dropdown(choices=["ALL"] + list(df.columns) if df is not None else ["ALL"]),
-                        gr.Dropdown(choices=list(df.columns) if df is not None else [])
-                    ),
-                    inputs=[current_data],
-                    outputs=[missing_col, filter_col]
-                )
-                # Event handlers
-                missing_btn.click(
-                    lambda df, col, method, val: handle_missing_values(df, col, method, val)[1] if df is not None else "❌ No data",
-                    inputs=[current_data, missing_col, missing_method, fill_value_input],
-                    outputs=[missing_status]
-                ).then(
-                    lambda: processor.current_df,
-                    outputs=[current_data]
-                ).then(
-                    lambda df: get_data_info(df),
-                    inputs=[current_data],
-                    outputs=[data_info_display]
-                )
-                duplicate_btn.click(
-                    lambda df, cols, keep: detect_and_remove_duplicates(df, cols, keep)[1] if df is not None else "❌ No data",
-                    inputs=[current_data, duplicate_cols, keep_method],
-                    outputs=[duplicate_status]
-                ).then(
-                    lambda: processor.current_df,
-                    outputs=[current_data]
-                ).then(
-                    lambda df: get_data_info(df),
-                    inputs=[current_data],
-                    outputs=[data_info_display]
-                )
-                filter_btn.click(
-                    lambda df, col, cond, val: filter_data(df, col, cond, val)[1] if df is not None else "❌ No data",
-                    inputs=[current_data, filter_col, filter_condition, filter_value],
-                    outputs=[filter_status]
-                ).then(
-                    lambda: processor.current_df,
-                    outputs=[current_data]
-                ).then(
-                    lambda df: get_data_info(df),
-                    inputs=[current_data],
-                    outputs=[data_info_display]
-                )
-            # ===== COLUMN OPERATIONS TAB =====
-            with gr.TabItem("⚙️ Column Operations"):
-                gr.HTML("<h3>Perform operations on columns</h3>")
-                with gr.Row():
-                    with gr.Column():
-                        op_type = gr.Dropdown(
-                            choices=["add", "subtract", "multiply", "divide", "concatenate",
-                                   "extract_numbers", "upper", "lower", "title", "length"],
-                            value="add",
-                            label="Operation"
-                        )
-                        op_col1 = gr.Dropdown(label="Primary Column", choices=[], interactive=True)
-                        op_col2 = gr.Dropdown(label="Second Column (optional)", choices=[], interactive=True)
-                        op_constant = gr.Textbox(label="Constant Value (optional)")
-                        op_new_name = gr.Textbox(label="New Column Name")
-                        op_btn = gr.Button("⚙️ Execute Operation", variant="primary")
-                    with gr.Column():
-                        op_status = gr.Textbox(label="Status", lines=5, interactive=False)
-                        # Data type conversion
-                        gr.HTML("<hr><h4>Data Type Conversion</h4>")
-                        convert_col = gr.Dropdown(label="Column", choices=[], interactive=True)
-                        convert_type = gr.Dropdown(
-                            choices=["string", "integer", "float", "datetime", "boolean", "category"],
-                            value="string",
-                            label="Target Type"
-                        )
-                        convert_btn = gr.Button("🔄 Convert Type", variant="secondary")
-                        convert_status = gr.Textbox(label="Conversion Status", lines=2, interactive=False)
-                # Update column choices
-                current_data.change(
-                    lambda df: (
-                        gr.Dropdown(choices=list(df.columns) if df is not None else []),
-                        gr.Dropdown(choices=list(df.columns) if df is not None else []),
-                        gr.Dropdown(choices=list(df.columns) if df is not None else [])
-                    ),
-                    inputs=[current_data],
-                    outputs=[op_col1, op_col2, convert_col]
-                )
-                # Event handlers
-                def operation_handler(df, op, col1, col2, const, new_name):
-                    if df is None:
-                        return None, "❌ No data loaded", ""
-                    result_df, status = perform_column_operations(df, op, col1, col2, new_name, const)
-                    info_text = get_data_info(result_df) if result_df is not None else ""
-                    return result_df, status, info_text
-                op_btn.click(
-                    operation_handler,
-                    inputs=[current_data, op_type, op_col1, op_col2, op_constant, op_new_name],
-                    outputs=[current_data, op_status, data_info_display]
-                )
-                def convert_handler(df, col, target_type):
-                    if df is None:
-                        return None, "❌ No data loaded", ""
-                    result_df, status = convert_data_types(df, col, target_type)
-                    info_text = get_data_info(result_df) if result_df is not None else ""
-                    return result_df, status, info_text
-                convert_btn.click(
-                    convert_handler,
-                    inputs=[current_data, convert_col, convert_type],
-                    outputs=[current_data, convert_status, data_info_display]
-                )
-            # ===== STATISTICS TAB =====
-            with gr.TabItem("📈 Statistics & Analysis"):
-                gr.HTML("<h3>Statistical analysis and insights</h3>")
-                with gr.Row():
-                    with gr.Column():
-                        stats_btn = gr.Button("📊 Generate Statistical Summary", variant="primary")
-                        corr_btn = gr.Button("🔗 Create Correlation Matrix", variant="secondary")
-                        # Distribution plots
-                        gr.HTML("<hr><h4>Distribution Analysis</h4>")
-                        dist_col = gr.Dropdown(label="Column", choices=[], interactive=True)
-                        plot_type = gr.Radio(choices=["histogram", "density"], value="histogram", label="Plot Type")
-                        dist_btn = gr.Button("📈 Create Distribution Plot", variant="secondary")
-                    with gr.Column():
-                        stats_status = gr.Textbox(label="Status", lines=3, interactive=False)
-                        plot_output = gr.Image(label="Visualization")
-                stats_results = gr.DataFrame(label="Statistical Summary")
-                # Update column choices
-                current_data.change(
-                    lambda df: gr.Dropdown(choices=list(df.select_dtypes(include=[np.number]).columns) if df is not None else []),
-                    inputs=[current_data],
-                    outputs=[dist_col]
-                )
-                # Event handlers
-                stats_btn.click(
-                    lambda df: generate_statistical_summary(df) if df is not None else (None, "❌ No data"),
-                    inputs=[current_data],
-                    outputs=[stats_results, stats_status]
-                )
-                corr_btn.click(
-                    lambda df: create_correlation_matrix(df) if df is not None else (None, "❌ No data"),
-                    inputs=[current_data],
-                    outputs=[plot_output, stats_status]
-                )
-                dist_btn.click(
-                    lambda df, col, ptype: create_distribution_plots(df, col, ptype) if df is not None else (None, "❌ No data"),
-                    inputs=[current_data, dist_col, plot_type],
-                    outputs=[plot_output, stats_status]
-                )
-            # ===== EXPORT TAB =====
-            with gr.TabItem("💾 Export & Download"):
-                gr.HTML("<h3>Export your processed data</h3>")
-                with gr.Row():
-                    with gr.Column():
-                        export_format = gr.Radio(
-                            choices=["csv", "excel", "json"],
-                            value="csv",
-                            label="Export Format"
-                        )
-                        export_filename = gr.Textbox(
-                            label="Filename (without extension)",
-                            value="processed_data",
-                            placeholder="Enter filename"
-                        )
-                        export_btn = gr.Button("💾 Create Download File", variant="primary", size="lg")
-                    with gr.Column():
-                        export_status = gr.Textbox(label="Status", lines=3, interactive=False)
-                        download_file = gr.File(label="Download", visible=False)
-                # History and Undo/Redo
-                with gr.Row():
-                    with gr.Column():
-                        gr.HTML("<hr><h4>History & Undo Operations</h4>")
-                        undo_btn = gr.Button("↶ Undo Last Operation", variant="secondary")
-                        reset_btn = gr.Button("🔄 Reset to Original", variant="secondary")
-                    with gr.Column():
-                        history_status = gr.Textbox(label="History Status", lines=3, interactive=False)
-                def export_handler(df, fmt, filename):
-                    if df is None:
-                        return None, "❌ No data to export", gr.File(visible=False)
-                    try:
-                        file_data, file_name = create_download_file(df, fmt, filename)
-                        # Save file temporarily
-                        with open(file_name, 'wb' if fmt == 'excel' else 'w', encoding=None if fmt == 'excel' else 'utf-8') as f:
-                            if fmt == 'excel':
-                                f.write(file_data)
-                            else:
-                                f.write(file_data)
-                        return file_name, f"✅ File created successfully: {file_name}", gr.File(value=file_name, visible=True)
-                    except Exception as e:
-                        return None, f"❌ Export error: {str(e)}", gr.File(visible=False)
-                export_btn.click(
-                    export_handler,
-                    inputs=[current_data, export_format, export_filename],
-                    outputs=[download_file, export_status, download_file]
-                )
-                def undo_handler():
-                    result_df, status = processor.undo_operation()
-                    info_text = get_data_info(result_df) if result_df is not None else ""
-                    return result_df, status, info_text
-                def reset_handler():
-                    result_df, status = processor.reset_to_original()
-                    info_text = get_data_info(result_df) if result_df is not None else ""
-                    return result_df, status, info_text
-                undo_btn.click(
-                    undo_handler,
-                    outputs=[current_data, history_status, data_info_display]
-                )
-                reset_btn.click(
-                    reset_handler,
-                    outputs=[current_data, history_status, data_info_display]
-                )
-        # Footer
-        gr.HTML("""
-        <div style="text-align: center; padding: 20px; margin-top: 30px; border-top: 1px solid #ddd;">
-            <p style="color: #666; font-size: 14px;">
-                🚀 <strong>Advanced CSV Manipulation Tool</strong> |
-                Commercial-ready data processing with enterprise features |
-                Built with Gradio & Python
-            </p>
-        </div>
-        """)
-    return demo
 if __name__ == "__main__":
-    # Create and launch the interface
-    demo = create_interface()
-    demo.launch(
-        share=True,
-        inbrowser=True,
-        server_name="0.0.0.0",
-        server_port=7860,
-        max_file_size="1gb"
-    )

 import os
+from huggingface_hub import hf_hub_download
+import importlib.util
+import sys
+# Get HF token from environment (set in Space secrets)
+HF_TOKEN = os.environ.get("HF_TOKEN")
+# Your private model/repo details
+REPO_ID = "limitedonly41/cv_all_src"
+FILENAME = "csv_manipulations_ui.py"
+def load_and_run():
+    try:
+        # Download the code file from private repo
+        file_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename=FILENAME,
+            token=HF_TOKEN,
+            repo_type="model"
+        )
+        # Load the module dynamically
+        spec = importlib.util.spec_from_file_location("csv_manipulations_module", file_path)
+        module = importlib.util.module_from_spec(spec)
+        sys.modules["csv_manipulations_module"] = module
+        spec.loader.exec_module(module)
+        # Try to find and launch the interface
+        if hasattr(module, 'interface'):
+            print("Found 'interface' object, launching...")
+            module.interface.launch()
+        elif hasattr(module, 'demo'):
+            print("Found 'demo' object, launching...")
+            module.demo.launch()
+        elif hasattr(module, 'create_interface'):
+            print("Found 'create_interface' function, creating and launching...")
+            interface = module.create_interface()
+            interface.launch()
         else:
+            print("Error: Could not find 'interface', 'demo', or 'create_interface' in the loaded module")
+            print("Available attributes:", dir(module))
     except Exception as e:
+        print(f"Error loading code: {e}")
+        import traceback
+        traceback.print_exc()
+        print("\nMake sure:")
+        print("1. HF_TOKEN is set in Space secrets")
+        print("2. REPO_ID points to your private repository")
+        print("3. The code file exists in the repository")
 if __name__ == "__main__":
+    load_and_run()