import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from smolagents import CodeAgent, DuckDuckGoSearchTool
import warnings
warnings.filterwarnings('ignore')

# Replace 'your_file.csv' with your CSV file path
csv_file_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"

def set_csv_file_path(new_path):
    """Update the CSV file path"""
    global csv_file_path
    csv_file_path = new_path
    print(f"✅ CSV file path updated to: {csv_file_path}")

def get_csv_file_path():
    """Get the current CSV file path"""
    return csv_file_path

class EnhancedDataExplorer:
    """Enhanced data explorer with SmoLagent AI capabilities"""
    
    def __init__(self, csv_path=csv_file_path):
        self.csv_path = csv_path
        self.df = None
        self.agent = None
        print("🚀 Enhanced Data Explorer initialized!")
        print("💡 AI setup will be done when first needed (option 4)")
        # Don't call setup_agent() here to avoid hanging
    
    def setup_agent(self):
        """Setup SmoLagent AI agent with simple configuration"""
        print("🤖 Setting up SmoLagent AI agent...")
        print("🔄 Trying multiple model configurations...")
        
        try:
            # Try with Ollama using direct ollama package (fast and local)
            try:
                print("🔄 Attempting Ollama setup...")
                import ollama
                # Quick test if Ollama is available (without generation test)
                models = ollama.list()
                if models and 'models' in models and len(models['models']) > 0:
                    print("✅ Ollama is running and accessible!")
                    print(f"📦 Found model: {models['models'][0].get('name', 'llama2')}")
                else:
                    raise Exception("No models found")
                
                # Create a custom model class for Ollama compatible with smolagents
                class OllamaModel:
                    def __init__(self, model_name="llama2"):
                        self.model_name = model_name
                        import ollama
                        self.ollama = ollama
                    
                    def __call__(self, messages, **kwargs):
                        try:
                            # Convert messages to Ollama format
                            if isinstance(messages, str):
                                prompt = messages
                            elif isinstance(messages, list):
                                # Handle different message formats
                                if len(messages) > 0 and isinstance(messages[0], dict):
                                    # Extract content from message dictionaries
                                    prompt = "\n".join([
                                        msg.get('content', str(msg)) if isinstance(msg, dict) else str(msg) 
                                        for msg in messages
                                    ])
                                else:
                                    prompt = "\n".join([str(msg) for msg in messages])
                            else:
                                prompt = str(messages)
                            
                            # Add timeout to prevent hanging
                            import signal
                            import time
                            
                            def timeout_handler(signum, frame):
                                raise TimeoutError("Ollama response timeout")
                            
                            # Set a 30-second timeout for Windows (using threading instead)
                            import threading
                            result = {'response': None, 'error': None}
                            
                            def generate_with_timeout():
                                try:
                                    response = self.ollama.generate(model=self.model_name, prompt=prompt)
                                    result['response'] = response['response']
                                except Exception as e:
                                    result['error'] = str(e)
                            
                            thread = threading.Thread(target=generate_with_timeout)
                            thread.daemon = True
                            thread.start()
                            thread.join(timeout=30)  # 30 second timeout
                            
                            if thread.is_alive():
                                return "Error: Ollama response timed out after 30 seconds. Try a simpler query."
                            elif result['error']:
                                return f"Error generating response with Ollama: {result['error']}"
                            elif result['response']:
                                return result['response']
                            else:
                                return "Error: No response received from Ollama"
                                
                        except Exception as e:
                            return f"Error generating response with Ollama: {e}"
                    
                    def generate(self, messages, **kwargs):
                        """Alternative method name that might be expected"""
                        return self.__call__(messages, **kwargs)
                
                model = OllamaModel("llama2")
                self.agent = CodeAgent(
                    tools=[DuckDuckGoSearchTool()],
                    model=model
                )
                print("✅ SmoLagent configured successfully with Ollama!")
                print("💡 Local AI model ready for analysis (with 30s timeout)")
                return
            except Exception as e:
                print(f"⚠️  Ollama setup failed: {e}")
                print("💡 Make sure Ollama is running: ollama serve")
                
            # Try OpenAI if API key is available
            try:
                print("🔄 Checking for OpenAI API key...")
                import os
                from smolagents import OpenAIModel
                if os.getenv('OPENAI_API_KEY'):
                    model = OpenAIModel(model_id="gpt-3.5-turbo")
                    self.agent = CodeAgent(
                        tools=[DuckDuckGoSearchTool()],
                        model=model
                    )
                    print("✅ SmoLagent configured successfully with OpenAI!")
                    return
                else:
                    print("⚠️  OpenAI API key not found")
            except Exception as e:
                print(f"⚠️  OpenAI setup failed: {e}")
                
            # Fallback to Transformers model (smaller version)
            try:
                print("🔄 Attempting HuggingFace Transformers model...")
                from smolagents import TransformersModel
                model = TransformersModel(model_id="microsoft/DialoGPT-small")  # Smaller model
                self.agent = CodeAgent(
                    tools=[DuckDuckGoSearchTool()],
                    model=model
                )
                print("✅ SmoLagent configured successfully with HuggingFace model!")
                print("💡 Note: First use may take time to download model")
                return
            except Exception as e:
                print(f"⚠️  HuggingFace setup failed: {e}")
                print("   Make sure transformers are installed: pip install 'smolagents[transformers]'")
            
            # If all models fail
            print("\n❌ No AI model could be configured.")
            print("📋 To fix this:")
            print("   1. For local AI: Install Ollama and run 'ollama serve'")
            print("   2. For OpenAI: Set OPENAI_API_KEY environment variable")
            print("   3. For basic use: pip install 'smolagents[transformers]'")
            print("\n✅ You can still use all non-AI features!")
            self.agent = None
        
        except Exception as e:
            print(f"⚠️  Agent setup failed: {e}")
            print("💡 Try using: python fixed_upload.py")
            self.agent = None
    
    def configure_model_helper(self):
        """Helper function to guide model configuration"""
        print("\n🤖 AI Model Configuration Helper")
        print("=" * 40)
        print("1. OpenAI (Recommended - Most capable)")
        print("2. Ollama (Free - Runs locally)")
        print("3. Hugging Face (Free - API based)")
        print("4. Skip AI features")
        
        choice = input("Choose your model (1-4): ").strip()
        
        if choice == "1":
            print("\n📝 OpenAI Setup:")
            print("1. Get API key from: https://platform.openai.com/")
            print("2. Set environment variable: OPENAI_API_KEY=your_key")
            print("3. Or edit the setup_agent() method with your key")
            
        elif choice == "2":
            print("\n📝 Ollama Setup:")
            print("1. Install Ollama from: https://ollama.ai/")
            print("2. Run: ollama pull llama2")
            print("3. Start server: ollama serve")
            print("4. Script is already configured to use SmoLagents' native OllamaModel")
            print("5. Just make sure Ollama is running and try the AI analysis!")
            
        elif choice == "3":
            print("\n📝 Hugging Face Setup:")
            print("1. Create account at: https://huggingface.co/")
            print("2. Get token from: https://huggingface.co/settings/tokens")
            print("3. Set environment variable: HF_TOKEN=your_token")
            print("4. Uncomment HF lines in setup_agent() method")
            
        elif choice == "4":
            print("✅ You can still use all non-AI features!")
        
        print("\n💡 Tip: Set environment variables in your system or use a .env file")
        return choice

    
    def load_data(self):
        """Load the CSV data (keeping your original functionality)"""
        print(f"\n📁 Loading data from: {self.csv_path}")
        
        try:
            # Check if file exists
            if not os.path.exists(self.csv_path):
                print(f"❌ Error: File not found at {self.csv_path}")
                print("💡 Use option 7 to change the file path")
                return None
            
            # Read the CSV file into a DataFrame
            self.df = pd.read_csv(self.csv_path)
            
            print("=== DATA LOADED SUCCESSFULLY ===")
            print(f"📁 File: {os.path.basename(self.csv_path)}")
            print(f"📊 Dataset shape: {self.df.shape}")
            print(f"📋 Columns: {list(self.df.columns)}")
            print("\n=== FIRST 5 ROWS ===")
            print(self.df.head())
            
            print("\n=== DATA TYPES ===")
            print(self.df.dtypes)
            
            print("\n=== MISSING VALUES ===")
            print(self.df.isnull().sum())
            
            print("\n=== BASIC STATISTICS ===")
            print(self.df.describe())
            
            return self.df
            
        except Exception as e:
            print(f"Error loading data: {str(e)}")
            return None
    
    def create_visualizations(self):
        """Create basic visualizations"""
        if self.df is None:
            print("❌ No data loaded. Run load_data() first.")
            return
        
        try:
            # Set up plotting style
            plt.style.use('seaborn-v0_8' if 'seaborn-v0_8' in plt.style.available else 'default')
            
            # Get numeric columns
            numeric_cols = self.df.select_dtypes(include=[np.number]).columns
            
            if len(numeric_cols) == 0:
                print("⚠️  No numeric columns found for visualization")
                return
            
            print(f"\n=== CREATING VISUALIZATIONS FOR {len(numeric_cols)} NUMERIC COLUMNS ===")
            
            # 1. Distribution plots
            n_cols = min(3, len(numeric_cols))
            n_rows = (len(numeric_cols) + n_cols - 1) // n_cols
            
            plt.figure(figsize=(15, 5*n_rows))
            for i, col in enumerate(numeric_cols):
                plt.subplot(n_rows, n_cols, i+1)
                self.df[col].hist(bins=30, alpha=0.7, edgecolor='black')
                plt.title(f'Distribution of {col}')
                plt.xlabel(col)
                plt.ylabel('Frequency')
            
            plt.tight_layout()
            plt.savefig('data_distributions.png', dpi=300, bbox_inches='tight')
            plt.show()
            print("✅ Distribution plots saved as 'data_distributions.png'")
            
            # 2. Correlation heatmap (if more than 1 numeric column)
            if len(numeric_cols) > 1:
                plt.figure(figsize=(12, 8))
                correlation_matrix = self.df[numeric_cols].corr()
                sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
                           square=True, linewidths=0.5)
                plt.title('Correlation Heatmap')
                plt.tight_layout()
                plt.savefig('correlation_heatmap.png', dpi=300, bbox_inches='tight')
                plt.show()
                print("✅ Correlation heatmap saved as 'correlation_heatmap.png'")
            
        except Exception as e:
            print(f"❌ Error creating visualizations: {e}")
    
    def analyze_data_quality(self):
        """Analyze data quality issues"""
        if self.df is None:
            print("❌ No data loaded. Run load_data() first.")
            return
        
        print("\n=== DATA QUALITY ANALYSIS ===")
        
        # Missing data analysis
        missing_data = self.df.isnull().sum()
        missing_percentage = (missing_data / len(self.df)) * 100
        
        quality_report = pd.DataFrame({
            'Column': self.df.columns,
            'Missing_Count': missing_data.values,
            'Missing_Percentage': missing_percentage.values,
            'Data_Type': self.df.dtypes.values
        })
        
        print("Missing Data Summary:")
        print(quality_report[quality_report['Missing_Count'] > 0])
        
        # Duplicate rows
        duplicates = self.df.duplicated().sum()
        print(f"\nDuplicate rows: {duplicates}")
        
        # Memory usage
        memory_usage = self.df.memory_usage(deep=True).sum() / 1024**2
        print(f"Memory usage: {memory_usage:.2f} MB")
        
        return quality_report
    
    def ai_analysis(self, query):
        """Use SmoLagent for AI-powered analysis"""
        print(f"\n🔍 Checking prerequisites for AI analysis...")
        
        if self.agent is None:
            print("❌ AI agent not configured. Please set up SmoLagent first.")
            print("💡 Try running one of these alternatives:")
            print("   • python fixed_upload.py")
            print("   • python quick_ai_demo.py")
            return
        
        if self.df is None:
            print("❌ No data loaded. Please load data first!")
            print("💡 Choose option 1 in the main menu to load your data.")
            return
        
        print("✅ Data loaded successfully")
        print("✅ AI agent configured")
        print(f"✅ Processing query: '{query}'")
        
        # Prepare context about the dataset
        try:
            data_context = f"""
            Dataset Analysis Request:
            - Dataset Shape: {self.df.shape}
            - Columns: {list(self.df.columns)}
            - Data Types: {dict(self.df.dtypes)}
            - Missing Values: {dict(self.df.isnull().sum())}
            
            Sample Data:
            {self.df.head(3).to_string()}
            
            Statistical Summary:
            {self.df.describe().to_string()}
            
            User Question: {query}
            """
            
            print(f"\n🤖 SmoLagent is analyzing your data...")
            print("⏳ This may take 5-15 seconds...")
            
            # Use the agent with the data context and query
            response = self.agent.run(data_context)
            
            print("\n" + "="*60)
            print("✅ AI ANALYSIS COMPLETE")
            print("="*60)
            print(response)
            print("="*60)
            return response
            
        except Exception as e:
            print(f"\n❌ AI analysis failed: {e}")
            print("\n💡 Troubleshooting suggestions:")
            print("   • Check your internet connection")
            print("   • Try: python fixed_upload.py")
            print("   • Use basic analysis features (options 2-3)")
            return None
    
    def check_status(self):
        """Check the status of data and AI setup"""
        print("\n🔍 SYSTEM STATUS CHECK")
        print("="*40)
        
        # Check file path
        print(f"📁 CSV File: {self.csv_path}")
        if os.path.exists(self.csv_path):
            print(f"✅ File exists: {os.path.basename(self.csv_path)}")
        else:
            print(f"❌ File not found")
            
        # Check data status
        if self.df is not None:
            print(f"✅ Data loaded: {self.df.shape[0]} rows, {self.df.shape[1]} columns")
            print(f"📋 Columns: {list(self.df.columns)}")
        else:
            print("❌ No data loaded")
            
        # Check AI agent status
        if self.agent is not None:
            print("✅ AI agent configured and ready")
        else:
            print("❌ AI agent not configured")
            
        print("="*40)
    
    def change_csv_file(self, new_path=None):
        """Change the CSV file path"""
        if new_path is None:
            print(f"\n📁 Current file path: {self.csv_path}")
            new_path = input("Enter new CSV file path: ").strip()
        
        if os.path.exists(new_path):
            self.csv_path = new_path
            self.df = None  # Clear current data
            print(f"✅ CSV file path updated to: {self.csv_path}")
            print("💡 Data cleared. Use option 1 to load the new file.")
        else:
            print(f"❌ File not found: {new_path}")
            print("💡 Please check the file path and try again.")
    
    def interactive_menu(self):
        """Interactive menu for data exploration"""
        # Show initial status
        self.check_status()
        
        while True:
            print("\n" + "="*50)
            print("🤖 ENHANCED DATA EXPLORER WITH AI")
            print("="*50)
            print("1. Load and explore data")
            print("2. Create visualizations")
            print("3. Analyze data quality")
            print("4. AI-powered analysis")
            print("5. Show data summary")
            print("6. Check system status")
            print("7. Change CSV file path")
            print("8. Exit")
            print("="*50)
            print(f"📁 Current file: {os.path.basename(self.csv_path)}")
            
            choice = input("Enter your choice (1-8): ").strip()
            
            if choice == '1':
                self.load_data()
            elif choice == '2':
                self.create_visualizations()
            elif choice == '3':
                self.analyze_data_quality()
            elif choice == '4':
                if self.df is None:
                    print("\n❌ No data loaded. Please load data first!")
                    print("💡 Choose option 1 to load your data before using AI analysis.")
                    input("\nPress Enter to continue...")
                else:
                    # Setup AI on demand if not already done
                    if self.agent is None:
                        print("\n🤖 Setting up AI for first use...")
                        self.setup_agent()
                    
                    if self.agent is None:
                        print("\n❌ AI features not available. Please configure a model first.")
                        print("Edit the setup_agent() method to add your API keys.")
                        self.configure_model_helper()
                    else:
                        print("\n🤖 AI Analysis - Ask me anything about your data!")
                        print("Example queries:")
                        print("  • 'What are the main trends in this data?'")
                        print("  • 'Find any outliers or anomalies'")
                        print("  • 'Suggest data quality improvements'")
                        print("  • 'Perform correlation analysis'")
                        print("  • 'Identify seasonal patterns'")
                        print("  • 'Recommend preprocessing steps'")
                        
                        query = input("\n💬 Your question: ").strip()
                        if query:
                            self.ai_analysis(query)
                            # Wait for user to read the results before returning to menu
                            input("\n📋 Press Enter to return to main menu...")
                        else:
                            print("❌ No question entered.")
                            input("\nPress Enter to continue...")
            elif choice == '5':
                if self.df is not None:
                    print(f"\n📊 Dataset Summary:")
                    print(f"Shape: {self.df.shape}")
                    print(f"Columns: {list(self.df.columns)}")
                    print(f"Memory: {self.df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
                else:
                    print("❌ No data loaded.")
            elif choice == '6':
                self.check_status()
            elif choice == '7':
                self.change_csv_file()
            elif choice == '8':
                print("👋 Goodbye!")
                break
            else:
                print("❌ Invalid choice. Please try again.")

def load_and_explore_data():
    """Load and explore the CSV data (keeping your original function)"""
    print(f"\n📁 Loading data from: {csv_file_path}")
    
    try:
        # Check if file exists
        if not os.path.exists(csv_file_path):
            print(f"❌ Error: File not found at {csv_file_path}")
            print("💡 Update the csv_file_path variable at the top of this file")
            return None
        
        # Read the CSV file into a DataFrame
        df = pd.read_csv(csv_file_path)
        
        print("=== DATA LOADED SUCCESSFULLY ===")
        print(f"📁 File: {os.path.basename(csv_file_path)}")
        print(f"📊 Dataset shape: {df.shape}")
        print(f"📋 Columns: {list(df.columns)}")
        print("\n=== FIRST 5 ROWS ===")
        print(df.head())
        
        print("\n=== DATA TYPES ===")
        print(df.dtypes)
        
        print("\n=== MISSING VALUES ===")
        print(df.isnull().sum())
        
        print("\n=== BASIC STATISTICS ===")
        print(df.describe())
        
        return df
        
    except Exception as e:
        print(f"Error loading data: {str(e)}")
        return None

if __name__ == "__main__":
    print("🚀 Enhanced Data Explorer with SmoLagent AI")
    print("Choose your preferred mode:")
    print("1. Original function (load_and_explore_data)")
    print("2. Enhanced interactive mode with AI")
    
    mode = input("Enter mode (1 or 2): ").strip()
    
    if mode == "1":
        # Run your original function
        df = load_and_explore_data()
    elif mode == "2":
        # Run enhanced mode with AI capabilities
        explorer = EnhancedDataExplorer()
        explorer.interactive_menu()
    else:
        print("Invalid choice. Running original function...")
        df = load_and_explore_data()