cosmoruler commited on
Commit
c69ba8c
Β·
1 Parent(s): 5269c7e

problems fixed

Browse files
ENHANCEMENT_GUIDE.md ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Enhanced Data Explorer Setup Guide
2
+
3
+ ## πŸŽ‰ Your script has been enhanced with SmoLagent AI capabilities!
4
+
5
+ ### What's New:
6
+
7
+ 1. **AI-Powered Analysis**: Ask natural language questions about your data
8
+ 2. **Enhanced Visualizations**: Automatic correlation heatmaps and distribution plots
9
+ 3. **Data Quality Analysis**: Comprehensive data quality reporting
10
+ 4. **Interactive Menu**: User-friendly menu system
11
+ 5. **Preserved Original**: Your original function is still available
12
+
13
+ ### How to Use:
14
+
15
+ #### Option 1: Original Function (unchanged)
16
+
17
+ ```bash
18
+ python upload.py
19
+ # Choose option 1 when prompted
20
+ ```
21
+
22
+ #### Option 2: Enhanced Interactive Mode
23
+
24
+ ```bash
25
+ python upload.py
26
+ # Choose option 2 when prompted
27
+ ```
28
+
29
+ #### Option 3: Demo Script
30
+
31
+ ```bash
32
+ python demo_enhanced.py
33
+ ```
34
+
35
+ ### Setting Up AI Features:
36
+
37
+ #### For OpenAI (Recommended):
38
+
39
+ 1. Get API key from: https://platform.openai.com/
40
+ 2. Edit `upload.py`, uncomment lines in `setup_agent()` method:
41
+ ```python
42
+ model = OpenAIServerModel(model_id="gpt-3.5-turbo", api_key="your-api-key-here")
43
+ self.agent = CodeAgent(tools=[PythonCodeTool(), DuckDuckGoSearchTool()], model=model)
44
+ ```
45
+
46
+ #### For Ollama (Free, Local):
47
+
48
+ 1. Install Ollama from: https://ollama.ai/
49
+ 2. Run: `ollama pull llama2`
50
+ 3. Start: `ollama serve`
51
+ 4. Uncomment Ollama lines in `setup_agent()` method
52
+
53
+ #### For Hugging Face (Free, API):
54
+
55
+ 1. Get token from: https://huggingface.co/settings/tokens
56
+ 2. Set environment variable: `HF_TOKEN=your_token`
57
+ 3. Uncomment HF lines in `setup_agent()` method
58
+
59
+ ### Example AI Queries:
60
+
61
+ Once configured, you can ask:
62
+
63
+ - "What are the main trends in this data?"
64
+ - "Find any outliers or anomalies"
65
+ - "Suggest data quality improvements"
66
+ - "Perform correlation analysis"
67
+ - "Identify seasonal patterns"
68
+ - "Recommend preprocessing steps"
69
+
70
+ ### Features Available Without AI:
71
+
72
+ Even without AI configuration, you get:
73
+
74
+ - βœ… Data loading and exploration (original functionality)
75
+ - βœ… Statistical summaries
76
+ - βœ… Data visualization (histograms, correlation heatmaps)
77
+ - βœ… Data quality analysis
78
+ - βœ… Missing value analysis
79
+
80
+ ### Files Structure:
81
+
82
+ - `upload.py` - Your enhanced main script
83
+ - `demo_enhanced.py` - Demonstration script
84
+ - `app.py` - Web interface (Gradio)
85
+ - `config.py` - Configuration file
86
+ - `requirements.txt` - Dependencies
87
+
88
+ ### Quick Start:
89
+
90
+ 1. **Test the script**: `python upload.py`
91
+ 2. **Try enhanced mode**: Choose option 2
92
+ 3. **Configure AI**: Edit `setup_agent()` method
93
+ 4. **Ask AI questions**: Use menu option 4
94
+
95
+ πŸš€ **Your original functionality is preserved - nothing is broken!**
__pycache__/upload.cpython-313.pyc ADDED
Binary file (19.6 kB). View file
 
auto_demo.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Auto Demo - Run Enhanced Data Explorer
3
+ =====================================
4
+
5
+ This script automatically demonstrates the enhanced data explorer
6
+ """
7
+
8
+ from upload import EnhancedDataExplorer, load_and_explore_data
9
+ import os
10
+
11
+ def auto_demo():
12
+ """Automatically run the enhanced data explorer demo"""
13
+ print("πŸš€ STARTING AUTO DEMO - ENHANCED DATA EXPLORER")
14
+ print("=" * 50)
15
+
16
+ # First check if CSV file exists
17
+ csv_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
18
+
19
+ if not os.path.exists(csv_path):
20
+ print(f"❌ CSV file not found at: {csv_path}")
21
+ print("πŸ“ Using demo data instead...")
22
+
23
+ # Create some demo data
24
+ import pandas as pd
25
+ import numpy as np
26
+
27
+ demo_data = pd.DataFrame({
28
+ 'timestamp': pd.date_range('2024-01-01', periods=1000, freq='H'),
29
+ 'response_time': np.random.exponential(0.5, 1000),
30
+ 'status_code': np.random.choice([200, 404, 500], 1000, p=[0.8, 0.15, 0.05]),
31
+ 'user_count': np.random.poisson(10, 1000),
32
+ 'error_rate': np.random.beta(2, 20, 1000),
33
+ 'server_id': np.random.choice(['server1', 'server2', 'server3'], 1000)
34
+ })
35
+
36
+ demo_csv_path = "demo_data.csv"
37
+ demo_data.to_csv(demo_csv_path, index=False)
38
+ print(f"βœ… Demo data created: {demo_csv_path}")
39
+
40
+ # Update the path for the explorer
41
+ csv_path = demo_csv_path
42
+
43
+ # Initialize the enhanced explorer
44
+ print("\nπŸ€– Initializing Enhanced Data Explorer...")
45
+ explorer = EnhancedDataExplorer(csv_path)
46
+
47
+ # Check AI status
48
+ if explorer.agent:
49
+ print("βœ… AI Agent: Configured and ready!")
50
+ ai_status = "Available"
51
+ else:
52
+ print("⚠️ AI Agent: Not configured (non-AI features still available)")
53
+ ai_status = "Not Available"
54
+
55
+ print(f"\nπŸ“Š DATA ANALYSIS DEMO")
56
+ print("=" * 30)
57
+
58
+ # Step 1: Load data
59
+ print("\n1️⃣ Loading and exploring data...")
60
+ df = explorer.load_data()
61
+
62
+ if df is not None:
63
+ print(f"βœ… Data loaded successfully!")
64
+
65
+ # Step 2: Data quality analysis
66
+ print("\n2️⃣ Analyzing data quality...")
67
+ quality_report = explorer.analyze_data_quality()
68
+
69
+ # Step 3: Create visualizations
70
+ print("\n3️⃣ Creating visualizations...")
71
+ try:
72
+ explorer.create_visualizations()
73
+ except Exception as e:
74
+ print(f"⚠️ Visualization skipped: {e}")
75
+
76
+ # Step 4: AI Analysis (if available)
77
+ if explorer.agent:
78
+ print("\n4️⃣ Running AI analysis...")
79
+ queries = [
80
+ "Describe the main characteristics of this dataset",
81
+ "What patterns do you see in the data?",
82
+ "Are there any data quality issues I should be aware of?"
83
+ ]
84
+
85
+ for i, query in enumerate(queries, 1):
86
+ print(f"\nπŸ€– AI Query {i}: {query}")
87
+ try:
88
+ response = explorer.ai_analysis(query)
89
+ if response:
90
+ print("βœ… AI analysis completed")
91
+ else:
92
+ print("⚠️ AI analysis returned no response")
93
+ except Exception as e:
94
+ print(f"❌ AI analysis failed: {e}")
95
+ break
96
+ else:
97
+ print("\n4️⃣ AI Analysis: Skipped (no AI model configured)")
98
+
99
+ print(f"\nπŸŽ‰ DEMO COMPLETE!")
100
+ print("=" * 20)
101
+ print(f"πŸ“Š Data Status: {'Loaded' if df is not None else 'Failed'}")
102
+ print(f"πŸ€– AI Status: {ai_status}")
103
+ print(f"πŸ“ˆ Visualizations: {'Created' if df is not None else 'Skipped'}")
104
+
105
+ print(f"\nπŸ’‘ To run interactively:")
106
+ print(f" python upload.py")
107
+ print(f" Choose option 2 for enhanced mode")
108
+
109
+ if __name__ == "__main__":
110
+ auto_demo()
demo_enhanced.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Demo script showing how to use the enhanced upload.py with SmoLagent AI
3
+ ================================================================
4
+
5
+ This script demonstrates the new AI-powered data analysis capabilities
6
+ """
7
+
8
+ from upload import EnhancedDataExplorer, load_and_explore_data
9
+
10
+ def demo_enhanced_features():
11
+ """Demonstrate the enhanced features"""
12
+ print("πŸš€ ENHANCED DATA EXPLORER DEMO")
13
+ print("=" * 50)
14
+
15
+ # Initialize the enhanced explorer
16
+ explorer = EnhancedDataExplorer()
17
+
18
+ print("\n1. Loading data...")
19
+ explorer.load_data()
20
+
21
+ print("\n2. Analyzing data quality...")
22
+ quality_report = explorer.analyze_data_quality()
23
+
24
+ print("\n3. Creating visualizations...")
25
+ explorer.create_visualizations()
26
+
27
+ print("\n4. AI Analysis examples (requires model configuration):")
28
+
29
+ example_queries = [
30
+ "What are the main patterns in this dataset?",
31
+ "Identify any data quality issues",
32
+ "Suggest preprocessing steps",
33
+ "Find correlations between variables",
34
+ "Detect outliers and anomalies"
35
+ ]
36
+
37
+ for i, query in enumerate(example_queries, 1):
38
+ print(f" {i}. {query}")
39
+
40
+ print("\nπŸ’‘ To enable AI analysis:")
41
+ print(" 1. Get an API key (OpenAI, Hugging Face, etc.)")
42
+ print(" 2. Uncomment the appropriate lines in setup_agent() method")
43
+ print(" 3. Run the interactive menu with option 2")
44
+
45
+ def demo_original_function():
46
+ """Demonstrate the original function (preserved)"""
47
+ print("πŸ“Š ORIGINAL FUNCTION DEMO")
48
+ print("=" * 30)
49
+
50
+ df = load_and_explore_data()
51
+ print(f"\nβœ… Original function completed. Data shape: {df.shape if df is not None else 'Failed to load'}")
52
+
53
+ if __name__ == "__main__":
54
+ print("Choose demo mode:")
55
+ print("1. Enhanced features demo")
56
+ print("2. Original function demo")
57
+ print("3. Both")
58
+
59
+ choice = input("Enter choice (1-3): ").strip()
60
+
61
+ if choice == "1":
62
+ demo_enhanced_features()
63
+ elif choice == "2":
64
+ demo_original_function()
65
+ elif choice == "3":
66
+ demo_original_function()
67
+ print("\n" + "="*60 + "\n")
68
+ demo_enhanced_features()
69
+ else:
70
+ print("Invalid choice. Running enhanced demo...")
71
+ demo_enhanced_features()
fixed_upload.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Fixed SmoLagent Data Analysis - Working Version
4
+ """
5
+ import pandas as pd
6
+ from smolagents import CodeAgent, DuckDuckGoSearchTool
7
+ import warnings
8
+ warnings.filterwarnings('ignore')
9
+
10
+ # Your CSV file path
11
+ csv_file_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
12
+
13
+ class FixedDataExplorer:
14
+ """Working SmoLagent data explorer"""
15
+
16
+ def __init__(self, csv_path=csv_file_path):
17
+ self.csv_path = csv_path
18
+ self.df = None
19
+ self.agent = None
20
+ self.load_data()
21
+ self.setup_agent()
22
+
23
+ def load_data(self):
24
+ """Load the CSV data"""
25
+ try:
26
+ self.df = pd.read_csv(self.csv_path)
27
+ print(f"βœ… Data loaded: {self.df.shape[0]} rows, {self.df.shape[1]} columns")
28
+ return True
29
+ except Exception as e:
30
+ print(f"❌ Data loading failed: {e}")
31
+ return False
32
+
33
+ def setup_agent(self):
34
+ """Setup SmoLagent with proper model"""
35
+ try:
36
+ print("πŸ€– Setting up SmoLagent...")
37
+
38
+ # Option 1: Try Ollama (if running locally)
39
+ try:
40
+ from smolagents import OllamaModel
41
+ model = OllamaModel(model_id="llama2")
42
+ self.agent = CodeAgent(
43
+ tools=[DuckDuckGoSearchTool()],
44
+ model=model
45
+ )
46
+ print("βœ… SmoLagent configured with Ollama")
47
+ return
48
+ except:
49
+ pass
50
+
51
+ # Option 2: Use OpenAI (requires API key)
52
+ try:
53
+ from smolagents import OpenAIModel
54
+ import os
55
+ if os.getenv('OPENAI_API_KEY'):
56
+ model = OpenAIModel(model_id="gpt-3.5-turbo")
57
+ self.agent = CodeAgent(
58
+ tools=[DuckDuckGoSearchTool()],
59
+ model=model
60
+ )
61
+ print("βœ… SmoLagent configured with OpenAI")
62
+ return
63
+ except:
64
+ pass
65
+
66
+ # Option 3: Use smaller HuggingFace model (lighter download)
67
+ try:
68
+ from smolagents import TransformersModel
69
+ model = TransformersModel(model_id="microsoft/DialoGPT-small") # Smaller model
70
+ self.agent = CodeAgent(
71
+ tools=[DuckDuckGoSearchTool()],
72
+ model=model
73
+ )
74
+ print("βœ… SmoLagent configured with small Transformers model")
75
+ return
76
+ except Exception as e:
77
+ print(f"❌ Model setup failed: {e}")
78
+
79
+ print("❌ No AI model could be configured")
80
+ print("πŸ’‘ You can still use basic data analysis features")
81
+
82
+ except Exception as e:
83
+ print(f"❌ Agent setup failed: {e}")
84
+
85
+ def basic_analysis(self):
86
+ """Run basic data analysis without AI"""
87
+ if self.df is None:
88
+ print("❌ No data loaded")
89
+ return
90
+
91
+ print("\nπŸ“Š BASIC DATA ANALYSIS")
92
+ print("=" * 40)
93
+
94
+ # Basic stats
95
+ print(f"πŸ“‹ Dataset: {self.df.shape[0]} rows, {self.df.shape[1]} columns")
96
+ print(f"πŸ“‹ Columns: {list(self.df.columns)}")
97
+
98
+ # Log level analysis
99
+ if 'LogLevel' in self.df.columns:
100
+ log_counts = self.df['LogLevel'].value_counts()
101
+ print(f"\nπŸ“ˆ Log Level Distribution:")
102
+ for level, count in log_counts.items():
103
+ pct = count / len(self.df) * 100
104
+ print(f" {level}: {count} ({pct:.1f}%)")
105
+
106
+ # Error analysis
107
+ if 'LogLevel' in self.df.columns:
108
+ errors = self.df[self.df['LogLevel'] == 'Error']
109
+ if not errors.empty and 'Module' in errors.columns:
110
+ print(f"\n🚨 Top Error Modules:")
111
+ top_errors = errors['Module'].value_counts().head(3)
112
+ for module, count in top_errors.items():
113
+ print(f" β€’ {module}: {count} errors")
114
+
115
+ # Missing data
116
+ missing = self.df.isnull().sum()
117
+ print(f"\n❌ Missing Data:")
118
+ for col, count in missing.items():
119
+ if count > 0:
120
+ pct = count / len(self.df) * 100
121
+ print(f" β€’ {col}: {count} ({pct:.1f}%)")
122
+
123
+ def ai_analysis(self, query):
124
+ """Run AI-powered analysis"""
125
+ if self.agent is None:
126
+ print("❌ AI agent not available. Please configure a model first.")
127
+ return
128
+
129
+ if self.df is None:
130
+ print("❌ No data loaded")
131
+ return
132
+
133
+ try:
134
+ # Prepare data context for AI
135
+ data_summary = f"""
136
+ Dataset: {self.df.shape[0]} rows, {self.df.shape[1]} columns
137
+ Columns: {list(self.df.columns)}
138
+ Sample data: {self.df.head(2).to_string()}
139
+ """
140
+
141
+ full_query = f"""
142
+ Analyze this OutSystems log data:
143
+ {data_summary}
144
+
145
+ User question: {query}
146
+ """
147
+
148
+ print(f"πŸ€– AI analyzing: {query}")
149
+ response = self.agent.run(full_query)
150
+ print(f"πŸ€– AI Response: {response}")
151
+
152
+ except Exception as e:
153
+ print(f"❌ AI analysis failed: {e}")
154
+
155
+ def main():
156
+ """Main function"""
157
+ print("πŸš€ FIXED SMOLAGENT DATA ANALYZER")
158
+ print("=" * 50)
159
+
160
+ # Create explorer
161
+ explorer = FixedDataExplorer()
162
+
163
+ # Run basic analysis
164
+ explorer.basic_analysis()
165
+
166
+ # Test AI if available
167
+ if explorer.agent:
168
+ print(f"\nπŸ€– AI FEATURES AVAILABLE!")
169
+ print(" Try: explorer.ai_analysis('What are the main error types?')")
170
+ else:
171
+ print(f"\nπŸ’‘ AI features not available - need model configuration")
172
+
173
+ return explorer
174
+
175
+ if __name__ == "__main__":
176
+ explorer = main()
quick_ai_demo.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Quick AI Demo - Working SmoLagent without large model downloads
4
+ """
5
+ import pandas as pd
6
+ from smolagents import CodeAgent, DuckDuckGoSearchTool
7
+ import warnings
8
+ warnings.filterwarnings('ignore')
9
+
10
+ def quick_demo():
11
+ """Quick demo that works immediately"""
12
+ print("πŸš€ QUICK AI DEMO - No Downloads Required")
13
+ print("=" * 50)
14
+
15
+ # Load the data first
16
+ csv_file_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
17
+
18
+ try:
19
+ print("πŸ“Š Loading CSV data...")
20
+ df = pd.read_csv(csv_file_path)
21
+ print(f"βœ… Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")
22
+ print(f"πŸ“‹ Columns: {list(df.columns)}")
23
+
24
+ # Show basic analysis without AI first
25
+ print("\nπŸ“ˆ BASIC DATA ANALYSIS:")
26
+ print("-" * 30)
27
+
28
+ # Error analysis
29
+ error_df = df[df['LogLevel'] == 'Error']
30
+ info_df = df[df['LogLevel'] == 'Info']
31
+
32
+ print(f"🚨 Total Error entries: {len(error_df)}")
33
+ print(f"ℹ️ Total Info entries: {len(info_df)}")
34
+ print(f"πŸ“Š Error rate: {len(error_df)/len(df)*100:.1f}%")
35
+
36
+ # Top modules with errors
37
+ if not error_df.empty:
38
+ top_error_modules = error_df['Module'].value_counts().head(3)
39
+ print(f"\nπŸ” Top 3 modules with errors:")
40
+ for module, count in top_error_modules.items():
41
+ print(f" β€’ {module}: {count} errors")
42
+
43
+ # Check for missing data
44
+ missing_data = df.isnull().sum()
45
+ print(f"\n❌ Missing data summary:")
46
+ for col, missing_count in missing_data.items():
47
+ if missing_count > 0:
48
+ print(f" β€’ {col}: {missing_count} missing ({missing_count/len(df)*100:.1f}%)")
49
+
50
+ print("\n" + "=" * 50)
51
+ print("βœ… BASIC ANALYSIS COMPLETE!")
52
+ print("πŸ’‘ This shows your data is loading correctly.")
53
+ print("πŸ€– AI features will work once model downloads complete.")
54
+ print("=" * 50)
55
+
56
+ return df
57
+
58
+ except Exception as e:
59
+ print(f"❌ Error loading data: {e}")
60
+ return None
61
+
62
+ def test_simple_agent():
63
+ """Test if we can create an agent without heavy models"""
64
+ print("\nπŸ§ͺ Testing Simple Agent Creation...")
65
+
66
+ try:
67
+ # Just test the tools without a model first
68
+ search_tool = DuckDuckGoSearchTool()
69
+ print("βœ… DuckDuckGo search tool created successfully")
70
+
71
+ # Try to create agent (might fail without model, but we can catch it)
72
+ try:
73
+ agent = CodeAgent(tools=[search_tool])
74
+ print("βœ… Agent created (basic setup)")
75
+ except Exception as e:
76
+ print(f"ℹ️ Agent needs model: {e}")
77
+ print("πŸ’‘ This is expected - agent will work once model is ready")
78
+
79
+ except Exception as e:
80
+ print(f"❌ Tool creation failed: {e}")
81
+
82
+ if __name__ == "__main__":
83
+ # Run the quick demo
84
+ df = quick_demo()
85
+
86
+ # Test agent creation
87
+ test_simple_agent()
88
+
89
+ if df is not None:
90
+ print(f"\nπŸŽ‰ SUCCESS! Your data analysis setup is working!")
91
+ print(f"πŸ“Š Dataset ready: {df.shape[0]} OutSystems log entries")
92
+ print(f"πŸ€– AI features will be available once model download completes")
requirements.txt CHANGED
@@ -7,5 +7,8 @@ seaborn>=0.12.0
7
  plotly>=5.15.0
8
  Pillow>=10.0.0
9
  scikit-learn>=1.3.0
10
- openai>=1.0.0
 
11
  requests>=2.31.0
 
 
 
7
  plotly>=5.15.0
8
  Pillow>=10.0.0
9
  scikit-learn>=1.3.0
10
+ transformers>=4.30.0
11
+ torch>=2.0.0
12
  requests>=2.31.0
13
+ huggingface_hub>=0.16.0
14
+ duckduckgo-search>=3.8.0
setup_free_ai.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Free AI Setup Helper
3
+ ===================
4
+
5
+ This script helps you set up completely free AI models for data analysis.
6
+ """
7
+
8
+ import os
9
+ import subprocess
10
+ import sys
11
+
12
+ def setup_free_huggingface():
13
+ """Setup free Hugging Face models"""
14
+ print("πŸ†“ SETTING UP FREE HUGGING FACE AI")
15
+ print("=" * 40)
16
+
17
+ print("πŸ“ Steps to get free Hugging Face access:")
18
+ print("1. Go to: https://huggingface.co/join")
19
+ print("2. Create a free account")
20
+ print("3. Go to: https://huggingface.co/settings/tokens")
21
+ print("4. Create a new token (read access is enough)")
22
+ print("5. Copy the token")
23
+
24
+ token = input("\nπŸ”‘ Paste your Hugging Face token here (or press Enter to skip): ").strip()
25
+
26
+ if token:
27
+ # Set environment variable for current session
28
+ os.environ['HF_TOKEN'] = token
29
+ print("βœ… Token set for current session!")
30
+
31
+ # Try to test the token
32
+ try:
33
+ import requests
34
+ headers = {"Authorization": f"Bearer {token}"}
35
+ response = requests.get("https://huggingface.co/api/whoami", headers=headers)
36
+ if response.status_code == 200:
37
+ user_info = response.json()
38
+ print(f"βœ… Token verified! Hello, {user_info.get('name', 'User')}!")
39
+ return True
40
+ else:
41
+ print("⚠️ Token verification failed. Please check your token.")
42
+ return False
43
+ except Exception as e:
44
+ print(f"⚠️ Could not verify token: {e}")
45
+ return False
46
+ else:
47
+ print("⚠️ No token provided. Some models may not work without authentication.")
48
+ return False
49
+
50
+ def setup_ollama_quick():
51
+ """Quick Ollama setup guide"""
52
+ print("\nπŸ†“ SETTING UP FREE LOCAL OLLAMA AI")
53
+ print("=" * 40)
54
+
55
+ print("πŸ“ Quick Ollama setup:")
56
+ print("1. Download from: https://ollama.ai/")
57
+ print("2. Install the application")
58
+ print("3. Open terminal and run: ollama pull llama2")
59
+ print("4. Start server: ollama serve")
60
+ print("5. Your script will automatically detect it!")
61
+
62
+ choice = input("\n❓ Open Ollama website now? (y/n): ").strip().lower()
63
+ if choice == 'y':
64
+ try:
65
+ if sys.platform == 'win32':
66
+ os.startfile("https://ollama.ai/")
67
+ elif sys.platform == 'darwin':
68
+ subprocess.run(['open', "https://ollama.ai/"])
69
+ else:
70
+ subprocess.run(['xdg-open', "https://ollama.ai/"])
71
+ print("βœ… Ollama website opened!")
72
+ except Exception as e:
73
+ print(f"⚠️ Could not open website: {e}")
74
+ print("Please manually go to: https://ollama.ai/")
75
+
76
+ def test_current_setup():
77
+ """Test what AI models are currently available"""
78
+ print("\nπŸ§ͺ TESTING CURRENT AI SETUP")
79
+ print("=" * 30)
80
+
81
+ try:
82
+ from upload import EnhancedDataExplorer
83
+ explorer = EnhancedDataExplorer()
84
+
85
+ if explorer.agent is not None:
86
+ print("βœ… AI model is configured and ready!")
87
+ print("πŸš€ You can now use AI analysis in your data explorer!")
88
+ return True
89
+ else:
90
+ print("❌ No AI model configured yet.")
91
+ return False
92
+ except Exception as e:
93
+ print(f"❌ Error testing setup: {e}")
94
+ return False
95
+
96
+ def main():
97
+ print("πŸ€– FREE AI MODELS SETUP")
98
+ print("=" * 25)
99
+ print("Choose your free AI option:")
100
+ print("1. πŸ†“ Hugging Face (cloud-based, free account needed)")
101
+ print("2. πŸ†“ Ollama (local, completely free, more private)")
102
+ print("3. πŸ§ͺ Test current setup")
103
+ print("4. ❌ Skip AI setup")
104
+
105
+ choice = input("\nEnter your choice (1-4): ").strip()
106
+
107
+ if choice == "1":
108
+ setup_free_huggingface()
109
+ elif choice == "2":
110
+ setup_ollama_quick()
111
+ elif choice == "3":
112
+ test_current_setup()
113
+ elif choice == "4":
114
+ print("βœ… You can still use all non-AI features!")
115
+ else:
116
+ print("❌ Invalid choice. Please run the script again.")
117
+
118
+ print("\nπŸš€ Next steps:")
119
+ print("1. Run: python upload.py")
120
+ print("2. Choose option 2 (Enhanced mode)")
121
+ print("3. Try AI analysis with menu option 4")
122
+
123
+ if __name__ == "__main__":
124
+ main()
setup_ollama.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Ollama Setup Helper for Windows (SmoLagents Native)
3
+ ===================================================
4
+
5
+ This script helps you set up Ollama for free AI analysis using SmoLagents' native Ollama support
6
+ """
7
+
8
+ import subprocess
9
+ import time
10
+ import requests
11
+ import os
12
+
13
+ def check_ollama_installed():
14
+ """Check if Ollama is installed"""
15
+ try:
16
+ result = subprocess.run(['ollama', '--version'],
17
+ capture_output=True, text=True, timeout=10)
18
+ if result.returncode == 0:
19
+ print(f"βœ… Ollama is installed: {result.stdout.strip()}")
20
+ return True
21
+ else:
22
+ print("❌ Ollama is not installed or not working properly")
23
+ return False
24
+ except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError):
25
+ print("❌ Ollama is not installed")
26
+ return False
27
+
28
+ def check_ollama_running():
29
+ """Check if Ollama server is running"""
30
+ try:
31
+ response = requests.get("http://localhost:11434", timeout=5)
32
+ if response.status_code == 200:
33
+ print("βœ… Ollama server is running")
34
+ return True
35
+ else:
36
+ print("⚠️ Ollama server is not responding properly")
37
+ return False
38
+ except requests.exceptions.RequestException:
39
+ print("❌ Ollama server is not running")
40
+ return False
41
+
42
+ def start_ollama_server():
43
+ """Start Ollama server"""
44
+ try:
45
+ print("πŸš€ Starting Ollama server...")
46
+ # Start Ollama server in background
47
+ process = subprocess.Popen(['ollama', 'serve'],
48
+ stdout=subprocess.PIPE,
49
+ stderr=subprocess.PIPE)
50
+
51
+ # Wait a bit for server to start
52
+ time.sleep(3)
53
+
54
+ if check_ollama_running():
55
+ print("βœ… Ollama server started successfully")
56
+ return True
57
+ else:
58
+ print("❌ Failed to start Ollama server")
59
+ return False
60
+ except Exception as e:
61
+ print(f"❌ Error starting Ollama server: {e}")
62
+ return False
63
+
64
+ def download_model(model_name="llama2"):
65
+ """Download a model for Ollama"""
66
+ try:
67
+ print(f"πŸ“₯ Downloading {model_name} model (this may take a while)...")
68
+ result = subprocess.run(['ollama', 'pull', model_name],
69
+ capture_output=True, text=True, timeout=600)
70
+
71
+ if result.returncode == 0:
72
+ print(f"βœ… {model_name} model downloaded successfully")
73
+ return True
74
+ else:
75
+ print(f"❌ Failed to download {model_name} model")
76
+ print(f"Error: {result.stderr}")
77
+ return False
78
+ except subprocess.TimeoutExpired:
79
+ print(f"⏰ Download timeout for {model_name} model")
80
+ return False
81
+ except Exception as e:
82
+ print(f"❌ Error downloading {model_name} model: {e}")
83
+ return False
84
+
85
+ def list_available_models():
86
+ """List downloaded models"""
87
+ try:
88
+ result = subprocess.run(['ollama', 'list'],
89
+ capture_output=True, text=True, timeout=10)
90
+ if result.returncode == 0:
91
+ print("πŸ“‹ Available models:")
92
+ print(result.stdout)
93
+ return True
94
+ else:
95
+ print("❌ Failed to list models")
96
+ return False
97
+ except Exception as e:
98
+ print(f"❌ Error listing models: {e}")
99
+ return False
100
+
101
+ def test_ollama_chat(model_name="llama2"):
102
+ """Test Ollama with a simple chat"""
103
+ try:
104
+ print(f"πŸ§ͺ Testing {model_name} model...")
105
+ test_prompt = "Hello, can you help me analyze data? Just say yes or no."
106
+
107
+ result = subprocess.run(['ollama', 'run', model_name, test_prompt],
108
+ capture_output=True, text=True, timeout=30)
109
+
110
+ if result.returncode == 0:
111
+ print("βœ… Ollama model test successful!")
112
+ print(f"Response: {result.stdout.strip()}")
113
+ return True
114
+ else:
115
+ print("❌ Ollama model test failed")
116
+ print(f"Error: {result.stderr}")
117
+ return False
118
+ except subprocess.TimeoutExpired:
119
+ print("⏰ Ollama model test timeout")
120
+ return False
121
+ except Exception as e:
122
+ print(f"❌ Error testing Ollama model: {e}")
123
+ return False
124
+
125
+ def setup_ollama():
126
+ """Complete Ollama setup process"""
127
+ print("πŸ€– OLLAMA SETUP FOR FREE AI ANALYSIS")
128
+ print("=" * 40)
129
+
130
+ # Step 1: Check installation
131
+ if not check_ollama_installed():
132
+ print("\nπŸ“ Installation Instructions:")
133
+ print("1. Go to https://ollama.ai/")
134
+ print("2. Download the Windows installer")
135
+ print("3. Run the installer")
136
+ print("4. Restart your terminal/command prompt")
137
+ print("5. Run this script again")
138
+ return False
139
+
140
+ # Step 2: Start server
141
+ if not check_ollama_running():
142
+ if not start_ollama_server():
143
+ print("\nπŸ”§ Manual server start:")
144
+ print("Open a new terminal and run: ollama serve")
145
+ return False
146
+
147
+ # Step 3: Download model
148
+ print(f"\nπŸ“‹ Checking available models...")
149
+ list_available_models()
150
+
151
+ # Check if llama2 is available
152
+ result = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
153
+ if 'llama2' not in result.stdout:
154
+ print("\nπŸ“₯ Downloading llama2 model...")
155
+ if not download_model("llama2"):
156
+ # Try a smaller model if llama2 fails
157
+ print("πŸ”„ Trying smaller model (phi)...")
158
+ if not download_model("phi"):
159
+ print("❌ Failed to download any model")
160
+ return False
161
+
162
+ # Step 4: Test the setup
163
+ print(f"\nπŸ§ͺ Testing setup...")
164
+ model_to_test = "llama2" if 'llama2' in result.stdout else "phi"
165
+ if test_ollama_chat(model_to_test):
166
+ print("\nπŸŽ‰ OLLAMA SETUP COMPLETE!")
167
+ print("You can now use AI analysis in your upload.py script")
168
+ return True
169
+ else:
170
+ print("❌ Setup incomplete - model test failed")
171
+ return False
172
+
173
+ if __name__ == "__main__":
174
+ success = setup_ollama()
175
+
176
+ if success:
177
+ print("\nπŸš€ Next Steps:")
178
+ print("1. Run: python upload.py")
179
+ print("2. Choose option 2 (Enhanced interactive mode)")
180
+ print("3. Use menu option 4 for AI analysis")
181
+ print("\nπŸ’‘ Your script is already configured for Ollama!")
182
+ else:
183
+ print("\nπŸ”§ Setup incomplete. Please follow the instructions above.")
test_basic_agent.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simple SmoLagent Test
3
+ ====================
4
+
5
+ Test the basic SmoLagent setup with just CodeAgent and DuckDuckGoSearchTool
6
+ """
7
+
8
+ from smolagents import CodeAgent, DuckDuckGoSearchTool
9
+
10
+ def test_basic_agent():
11
+ """Test basic SmoLagent setup without language model"""
12
+ print("πŸ€– TESTING BASIC SMOLAGENT SETUP")
13
+ print("=" * 35)
14
+
15
+ try:
16
+ # Create agent with just tools (no language model)
17
+ agent = CodeAgent(tools=[DuckDuckGoSearchTool()])
18
+ print("βœ… Basic SmoLagent created successfully!")
19
+ print(f"πŸ“‹ Agent has {len(agent.tools)} tool(s) available")
20
+
21
+ # List available tools
22
+ for i, tool in enumerate(agent.tools, 1):
23
+ print(f" {i}. {tool.__class__.__name__}")
24
+
25
+ # Test basic functionality
26
+ print("\nπŸ§ͺ Testing agent functionality...")
27
+
28
+ # Note: Without a language model, we can't run complex queries
29
+ # But we can verify the agent structure is correct
30
+ print("βœ… Agent structure is valid")
31
+ print("⚠️ Note: Full AI analysis requires a language model")
32
+
33
+ return agent
34
+
35
+ except Exception as e:
36
+ print(f"❌ Basic agent setup failed: {e}")
37
+ return None
38
+
39
+ def test_with_search():
40
+ """Test the search functionality"""
41
+ print("\nπŸ” TESTING SEARCH FUNCTIONALITY")
42
+ print("=" * 30)
43
+
44
+ try:
45
+ # Create search tool directly
46
+ search_tool = DuckDuckGoSearchTool()
47
+ print("βœ… DuckDuckGo search tool created")
48
+
49
+ # Test search (if possible)
50
+ print("πŸ” Search tool ready for use")
51
+ return search_tool
52
+
53
+ except Exception as e:
54
+ print(f"❌ Search tool failed: {e}")
55
+ return None
56
+
57
+ def main():
58
+ """Main test function"""
59
+ print("πŸš€ SMOLAGENT BASIC SETUP TEST")
60
+ print("=" * 30)
61
+
62
+ # Test 1: Basic agent
63
+ agent = test_basic_agent()
64
+
65
+ # Test 2: Search tool
66
+ search_tool = test_with_search()
67
+
68
+ # Summary
69
+ print("\nπŸ“Š TEST SUMMARY")
70
+ print("=" * 15)
71
+ print(f"Basic Agent: {'βœ… Working' if agent else '❌ Failed'}")
72
+ print(f"Search Tool: {'βœ… Working' if search_tool else '❌ Failed'}")
73
+
74
+ if agent:
75
+ print("\nπŸ’‘ Your basic SmoLagent is ready!")
76
+ print(" To add AI capabilities, configure a language model")
77
+ print(" Options: Ollama (free), OpenAI (paid), Hugging Face (free)")
78
+ else:
79
+ print("\n❌ Basic setup failed. Check SmoLagents installation.")
80
+
81
+ print("\nπŸš€ Next: Run 'python upload.py' to use enhanced data analysis!")
82
+
83
+ if __name__ == "__main__":
84
+ main()
test_free_ai.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simple Free AI Test
3
+ ==================
4
+
5
+ Test the free AI models in your upload.py script
6
+ """
7
+
8
+ def test_free_models():
9
+ """Test free AI models availability"""
10
+ print("πŸ” Testing free AI models...")
11
+
12
+ try:
13
+ from upload import EnhancedDataExplorer
14
+
15
+ print("πŸ“Š Creating data explorer...")
16
+ explorer = EnhancedDataExplorer()
17
+
18
+ if explorer.agent is not None:
19
+ print("βœ… Free AI model configured successfully!")
20
+ print("πŸŽ‰ You can now use AI-powered data analysis!")
21
+
22
+ # Test with a simple query
23
+ print("\nπŸ§ͺ Testing AI with a simple query...")
24
+ test_query = "Hello, can you help with data analysis?"
25
+
26
+ # Simulate having some data
27
+ import pandas as pd
28
+ import numpy as np
29
+ test_data = pd.DataFrame({
30
+ 'A': np.random.randn(100),
31
+ 'B': np.random.randn(100),
32
+ 'C': ['category1', 'category2'] * 50
33
+ })
34
+ explorer.df = test_data
35
+
36
+ # Test AI analysis
37
+ try:
38
+ response = explorer.ai_analysis("Describe this test dataset briefly")
39
+ if response:
40
+ print("βœ… AI analysis test successful!")
41
+ else:
42
+ print("⚠️ AI analysis returned no response")
43
+ except Exception as e:
44
+ print(f"⚠️ AI analysis test failed: {e}")
45
+
46
+ else:
47
+ print("❌ No free AI models available.")
48
+ print("πŸ’‘ Try running: python setup_free_ai.py")
49
+
50
+ except Exception as e:
51
+ print(f"❌ Error testing free models: {e}")
52
+
53
+ if __name__ == "__main__":
54
+ test_free_models()
test_smolagent.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple test of SmoLagent functionality
4
+ """
5
+ import pandas as pd
6
+ from smolagents import CodeAgent, DuckDuckGoSearchTool
7
+ import warnings
8
+ warnings.filterwarnings('ignore')
9
+
10
+ def test_basic_smolagent():
11
+ """Test basic SmoLagent setup"""
12
+ print("πŸ§ͺ Testing SmoLagent Setup...")
13
+
14
+ try:
15
+ # Test 1: Import check
16
+ print("βœ… Imports successful")
17
+
18
+ # Test 2: Create agent without model (should fail gracefully)
19
+ try:
20
+ agent = CodeAgent(tools=[DuckDuckGoSearchTool()])
21
+ print("βœ… Agent created without model")
22
+ except Exception as e:
23
+ print(f"❌ Agent creation failed: {e}")
24
+ print("πŸ’‘ This is expected - CodeAgent needs a model parameter")
25
+
26
+ # Test 3: Try with a simple model setup
27
+ try:
28
+ from smolagents import HfApiModel
29
+ print("πŸ”„ Trying HuggingFace model...")
30
+ model = HfApiModel(model_id="microsoft/DialoGPT-medium")
31
+ agent = CodeAgent(
32
+ tools=[DuckDuckGoSearchTool()],
33
+ model=model
34
+ )
35
+ print("βœ… Agent created successfully with HuggingFace model!")
36
+
37
+ # Test a simple query
38
+ response = agent.run("What is 2 + 2?")
39
+ print(f"πŸ€– Agent response: {response}")
40
+
41
+ except Exception as e:
42
+ print(f"❌ HuggingFace model failed: {e}")
43
+
44
+ # Test 4: Try Ollama
45
+ try:
46
+ from smolagents import OllamaModel
47
+ print("πŸ”„ Trying Ollama model...")
48
+ model = OllamaModel(model_id="llama2", base_url="http://localhost:11434")
49
+ agent = CodeAgent(
50
+ tools=[DuckDuckGoSearchTool()],
51
+ model=model
52
+ )
53
+ print("βœ… Agent created successfully with Ollama!")
54
+
55
+ # Test a simple query
56
+ response = agent.run("What is 2 + 2?")
57
+ print(f"πŸ€– Agent response: {response}")
58
+
59
+ except Exception as e:
60
+ print(f"❌ Ollama model failed: {e}")
61
+ print("πŸ’‘ Make sure Ollama is running with: ollama serve")
62
+
63
+ except Exception as e:
64
+ print(f"❌ Test failed: {e}")
65
+
66
+ def test_with_data():
67
+ """Test SmoLagent with actual CSV data"""
68
+ csv_file_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
69
+
70
+ try:
71
+ # Load data
72
+ print("\nπŸ“Š Loading CSV data...")
73
+ df = pd.read_csv(csv_file_path)
74
+ print(f"βœ… Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")
75
+ print(f"πŸ“‹ Columns: {list(df.columns)}")
76
+
77
+ # Basic analysis
78
+ error_count = df[df['LogLevel'] == 'Error'].shape[0]
79
+ print(f"🚨 Error entries: {error_count}")
80
+
81
+ return df
82
+
83
+ except Exception as e:
84
+ print(f"❌ Data loading failed: {e}")
85
+ return None
86
+
87
+ if __name__ == "__main__":
88
+ print("=" * 50)
89
+ print("πŸ€– SMOLAGENT TEST SUITE")
90
+ print("=" * 50)
91
+
92
+ # Test basic functionality
93
+ test_basic_smolagent()
94
+
95
+ # Test with data
96
+ df = test_with_data()
97
+
98
+ print("\n" + "=" * 50)
99
+ print("βœ… Test completed!")
100
+ print("=" * 50)
upload.py CHANGED
@@ -1,11 +1,320 @@
1
  import pandas as pd
2
  import os
 
 
 
 
 
 
3
 
4
  # Replace 'your_file.csv' with your CSV file path
5
  csv_file_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def load_and_explore_data():
8
- """Load and explore the CSV data"""
9
  try:
10
  # Check if file exists
11
  if not os.path.exists(csv_file_path):
@@ -37,4 +346,20 @@ def load_and_explore_data():
37
  return None
38
 
39
  if __name__ == "__main__":
40
- df = load_and_explore_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import os
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from smolagents import CodeAgent, DuckDuckGoSearchTool
7
+ import warnings
8
+ warnings.filterwarnings('ignore')
9
 
10
  # Replace 'your_file.csv' with your CSV file path
11
  csv_file_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
12
 
13
+ class EnhancedDataExplorer:
14
+ """Enhanced data explorer with SmoLagent AI capabilities"""
15
+
16
+ def __init__(self, csv_path=csv_file_path):
17
+ self.csv_path = csv_path
18
+ self.df = None
19
+ self.agent = None
20
+ self.setup_agent()
21
+
22
+ def setup_agent(self):
23
+ """Setup SmoLagent AI agent with simple configuration"""
24
+ try:
25
+ print("πŸ€– Setting up SmoLagent with basic tools...")
26
+
27
+ # Use the exact setup specified by user
28
+ try:
29
+ # Try with Ollama model first
30
+ from smolagents import OllamaModel
31
+ model = OllamaModel(model_id="llama2", base_url="http://localhost:11434")
32
+ self.agent = CodeAgent(
33
+ tools=[DuckDuckGoSearchTool()],
34
+ model=model
35
+ )
36
+ print("βœ… SmoLagent configured successfully with Ollama and search capabilities")
37
+ return
38
+ except Exception as e:
39
+ print(f"⚠️ Ollama setup failed: {e}")
40
+
41
+ # Fallback to Transformers model
42
+ try:
43
+ from smolagents import TransformersModel
44
+ model = TransformersModel(model_id="microsoft/DialoGPT-medium")
45
+ self.agent = CodeAgent(
46
+ tools=[DuckDuckGoSearchTool()],
47
+ model=model
48
+ )
49
+ print("βœ… SmoLagent configured successfully with Transformers model")
50
+ return
51
+ except Exception as e:
52
+ print(f"⚠️ Transformers setup failed: {e}")
53
+ print(" Make sure all required packages are installed")
54
+
55
+ if self.agent is None:
56
+ print("\n❌ No AI agent could be configured.")
57
+ print("πŸ“‹ To fix this:")
58
+ print(" 1. Check internet connection")
59
+ print(" 2. Install missing packages from requirements.txt")
60
+ print("\nβœ… You can still use all non-AI features!")
61
+
62
+ except Exception as e:
63
+ print(f"⚠️ Agent setup failed: {e}")
64
+ self.agent = None
65
+
66
+ def configure_model_helper(self):
67
+ """Helper function to guide model configuration"""
68
+ print("\nπŸ€– AI Model Configuration Helper")
69
+ print("=" * 40)
70
+ print("1. OpenAI (Recommended - Most capable)")
71
+ print("2. Ollama (Free - Runs locally)")
72
+ print("3. Hugging Face (Free - API based)")
73
+ print("4. Skip AI features")
74
+
75
+ choice = input("Choose your model (1-4): ").strip()
76
+
77
+ if choice == "1":
78
+ print("\nπŸ“ OpenAI Setup:")
79
+ print("1. Get API key from: https://platform.openai.com/")
80
+ print("2. Set environment variable: OPENAI_API_KEY=your_key")
81
+ print("3. Or edit the setup_agent() method with your key")
82
+
83
+ elif choice == "2":
84
+ print("\nπŸ“ Ollama Setup:")
85
+ print("1. Install Ollama from: https://ollama.ai/")
86
+ print("2. Run: ollama pull llama2")
87
+ print("3. Start server: ollama serve")
88
+ print("4. Script is already configured to use SmoLagents' native OllamaModel")
89
+ print("5. Just make sure Ollama is running and try the AI analysis!")
90
+
91
+ elif choice == "3":
92
+ print("\nπŸ“ Hugging Face Setup:")
93
+ print("1. Create account at: https://huggingface.co/")
94
+ print("2. Get token from: https://huggingface.co/settings/tokens")
95
+ print("3. Set environment variable: HF_TOKEN=your_token")
96
+ print("4. Uncomment HF lines in setup_agent() method")
97
+
98
+ elif choice == "4":
99
+ print("βœ… You can still use all non-AI features!")
100
+
101
+ print("\nπŸ’‘ Tip: Set environment variables in your system or use a .env file")
102
+ return choice
103
+
104
+
105
+ def load_data(self):
106
+ """Load the CSV data (keeping your original functionality)"""
107
+ try:
108
+ # Check if file exists
109
+ if not os.path.exists(self.csv_path):
110
+ print(f"Error: File not found at {self.csv_path}")
111
+ return None
112
+
113
+ # Read the CSV file into a DataFrame
114
+ self.df = pd.read_csv(self.csv_path)
115
+
116
+ print("=== DATA LOADED SUCCESSFULLY ===")
117
+ print(f"Dataset shape: {self.df.shape}")
118
+ print(f"Columns: {list(self.df.columns)}")
119
+ print("\n=== FIRST 5 ROWS ===")
120
+ print(self.df.head())
121
+
122
+ print("\n=== DATA TYPES ===")
123
+ print(self.df.dtypes)
124
+
125
+ print("\n=== MISSING VALUES ===")
126
+ print(self.df.isnull().sum())
127
+
128
+ print("\n=== BASIC STATISTICS ===")
129
+ print(self.df.describe())
130
+
131
+ return self.df
132
+
133
+ except Exception as e:
134
+ print(f"Error loading data: {str(e)}")
135
+ return None
136
+
137
+ def create_visualizations(self):
138
+ """Create basic visualizations"""
139
+ if self.df is None:
140
+ print("❌ No data loaded. Run load_data() first.")
141
+ return
142
+
143
+ try:
144
+ # Set up plotting style
145
+ plt.style.use('seaborn-v0_8' if 'seaborn-v0_8' in plt.style.available else 'default')
146
+
147
+ # Get numeric columns
148
+ numeric_cols = self.df.select_dtypes(include=[np.number]).columns
149
+
150
+ if len(numeric_cols) == 0:
151
+ print("⚠️ No numeric columns found for visualization")
152
+ return
153
+
154
+ print(f"\n=== CREATING VISUALIZATIONS FOR {len(numeric_cols)} NUMERIC COLUMNS ===")
155
+
156
+ # 1. Distribution plots
157
+ n_cols = min(3, len(numeric_cols))
158
+ n_rows = (len(numeric_cols) + n_cols - 1) // n_cols
159
+
160
+ plt.figure(figsize=(15, 5*n_rows))
161
+ for i, col in enumerate(numeric_cols):
162
+ plt.subplot(n_rows, n_cols, i+1)
163
+ self.df[col].hist(bins=30, alpha=0.7, edgecolor='black')
164
+ plt.title(f'Distribution of {col}')
165
+ plt.xlabel(col)
166
+ plt.ylabel('Frequency')
167
+
168
+ plt.tight_layout()
169
+ plt.savefig('data_distributions.png', dpi=300, bbox_inches='tight')
170
+ plt.show()
171
+ print("βœ… Distribution plots saved as 'data_distributions.png'")
172
+
173
+ # 2. Correlation heatmap (if more than 1 numeric column)
174
+ if len(numeric_cols) > 1:
175
+ plt.figure(figsize=(12, 8))
176
+ correlation_matrix = self.df[numeric_cols].corr()
177
+ sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
178
+ square=True, linewidths=0.5)
179
+ plt.title('Correlation Heatmap')
180
+ plt.tight_layout()
181
+ plt.savefig('correlation_heatmap.png', dpi=300, bbox_inches='tight')
182
+ plt.show()
183
+ print("βœ… Correlation heatmap saved as 'correlation_heatmap.png'")
184
+
185
+ except Exception as e:
186
+ print(f"❌ Error creating visualizations: {e}")
187
+
188
+ def analyze_data_quality(self):
189
+ """Analyze data quality issues"""
190
+ if self.df is None:
191
+ print("❌ No data loaded. Run load_data() first.")
192
+ return
193
+
194
+ print("\n=== DATA QUALITY ANALYSIS ===")
195
+
196
+ # Missing data analysis
197
+ missing_data = self.df.isnull().sum()
198
+ missing_percentage = (missing_data / len(self.df)) * 100
199
+
200
+ quality_report = pd.DataFrame({
201
+ 'Column': self.df.columns,
202
+ 'Missing_Count': missing_data.values,
203
+ 'Missing_Percentage': missing_percentage.values,
204
+ 'Data_Type': self.df.dtypes.values
205
+ })
206
+
207
+ print("Missing Data Summary:")
208
+ print(quality_report[quality_report['Missing_Count'] > 0])
209
+
210
+ # Duplicate rows
211
+ duplicates = self.df.duplicated().sum()
212
+ print(f"\nDuplicate rows: {duplicates}")
213
+
214
+ # Memory usage
215
+ memory_usage = self.df.memory_usage(deep=True).sum() / 1024**2
216
+ print(f"Memory usage: {memory_usage:.2f} MB")
217
+
218
+ return quality_report
219
+
220
+ def ai_analysis(self, query):
221
+ """Use SmoLagent for AI-powered analysis"""
222
+ if self.agent is None:
223
+ print("❌ AI agent not configured. Please set up SmoLagent first.")
224
+ return
225
+
226
+ if self.df is None:
227
+ print("❌ No data loaded. Run load_data() first.")
228
+ return
229
+
230
+ # Prepare context about the dataset
231
+ data_context = f"""
232
+ Dataset Analysis Request:
233
+ - Dataset Shape: {self.df.shape}
234
+ - Columns: {list(self.df.columns)}
235
+ - Data Types: {dict(self.df.dtypes)}
236
+ - Missing Values: {dict(self.df.isnull().sum())}
237
+
238
+ Sample Data:
239
+ {self.df.head(3).to_string()}
240
+
241
+ Statistical Summary:
242
+ {self.df.describe().to_string()}
243
+
244
+ User Question: {query}
245
+ """
246
+
247
+ try:
248
+ print(f"\n=== AI ANALYSIS FOR: '{query}' ===")
249
+ print("πŸ€– Processing with SmoLagent...")
250
+
251
+ # Use the agent with the data context and query
252
+ response = self.agent.run(data_context)
253
+ print("βœ… AI Analysis Complete:")
254
+ print(response)
255
+ return response
256
+
257
+ except Exception as e:
258
+ print(f"❌ AI analysis failed: {e}")
259
+ print("πŸ’‘ Try using the data visualization and quality analysis features instead!")
260
+ return None
261
+
262
+ def interactive_menu(self):
263
+ """Interactive menu for data exploration"""
264
+ while True:
265
+ print("\n" + "="*50)
266
+ print("πŸ€– ENHANCED DATA EXPLORER WITH AI")
267
+ print("="*50)
268
+ print("1. Load and explore data")
269
+ print("2. Create visualizations")
270
+ print("3. Analyze data quality")
271
+ print("4. AI-powered analysis")
272
+ print("5. Show data summary")
273
+ print("6. Exit")
274
+ print("="*50)
275
+
276
+ choice = input("Enter your choice (1-6): ").strip()
277
+
278
+ if choice == '1':
279
+ self.load_data()
280
+ elif choice == '2':
281
+ self.create_visualizations()
282
+ elif choice == '3':
283
+ self.analyze_data_quality()
284
+ elif choice == '4':
285
+ if self.agent is None:
286
+ print("\n❌ AI features not available. Please configure a model first.")
287
+ print("Edit the setup_agent() method to add your API keys.")
288
+ self.configure_model_helper()
289
+ else:
290
+ print("\nπŸ€– AI Analysis - Ask me anything about your data!")
291
+ print("Example queries:")
292
+ print(" β€’ 'What are the main trends in this data?'")
293
+ print(" β€’ 'Find any outliers or anomalies'")
294
+ print(" β€’ 'Suggest data quality improvements'")
295
+ print(" β€’ 'Perform correlation analysis'")
296
+ print(" β€’ 'Identify seasonal patterns'")
297
+ print(" β€’ 'Recommend preprocessing steps'")
298
+
299
+ query = input("\nπŸ’¬ Your question: ").strip()
300
+ if query:
301
+ self.ai_analysis(query)
302
+ elif choice == '5':
303
+ if self.df is not None:
304
+ print(f"\nπŸ“Š Dataset Summary:")
305
+ print(f"Shape: {self.df.shape}")
306
+ print(f"Columns: {list(self.df.columns)}")
307
+ print(f"Memory: {self.df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
308
+ else:
309
+ print("❌ No data loaded.")
310
+ elif choice == '6':
311
+ print("πŸ‘‹ Goodbye!")
312
+ break
313
+ else:
314
+ print("❌ Invalid choice. Please try again.")
315
+
316
  def load_and_explore_data():
317
+ """Load and explore the CSV data (keeping your original function)"""
318
  try:
319
  # Check if file exists
320
  if not os.path.exists(csv_file_path):
 
346
  return None
347
 
348
  if __name__ == "__main__":
349
+ print("πŸš€ Enhanced Data Explorer with SmoLagent AI")
350
+ print("Choose your preferred mode:")
351
+ print("1. Original function (load_and_explore_data)")
352
+ print("2. Enhanced interactive mode with AI")
353
+
354
+ mode = input("Enter mode (1 or 2): ").strip()
355
+
356
+ if mode == "1":
357
+ # Run your original function
358
+ df = load_and_explore_data()
359
+ elif mode == "2":
360
+ # Run enhanced mode with AI capabilities
361
+ explorer = EnhancedDataExplorer()
362
+ explorer.interactive_menu()
363
+ else:
364
+ print("Invalid choice. Running original function...")
365
+ df = load_and_explore_data()