Spaces:
Runtime error
Runtime error
cosmoruler
commited on
Commit
Β·
c69ba8c
1
Parent(s):
5269c7e
problems fixed
Browse files- ENHANCEMENT_GUIDE.md +95 -0
- __pycache__/upload.cpython-313.pyc +0 -0
- auto_demo.py +110 -0
- demo_enhanced.py +71 -0
- fixed_upload.py +176 -0
- quick_ai_demo.py +92 -0
- requirements.txt +4 -1
- setup_free_ai.py +124 -0
- setup_ollama.py +183 -0
- test_basic_agent.py +84 -0
- test_free_ai.py +54 -0
- test_smolagent.py +100 -0
- upload.py +327 -2
ENHANCEMENT_GUIDE.md
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Enhanced Data Explorer Setup Guide
|
2 |
+
|
3 |
+
## π Your script has been enhanced with SmoLagent AI capabilities!
|
4 |
+
|
5 |
+
### What's New:
|
6 |
+
|
7 |
+
1. **AI-Powered Analysis**: Ask natural language questions about your data
|
8 |
+
2. **Enhanced Visualizations**: Automatic correlation heatmaps and distribution plots
|
9 |
+
3. **Data Quality Analysis**: Comprehensive data quality reporting
|
10 |
+
4. **Interactive Menu**: User-friendly menu system
|
11 |
+
5. **Preserved Original**: Your original function is still available
|
12 |
+
|
13 |
+
### How to Use:
|
14 |
+
|
15 |
+
#### Option 1: Original Function (unchanged)
|
16 |
+
|
17 |
+
```bash
|
18 |
+
python upload.py
|
19 |
+
# Choose option 1 when prompted
|
20 |
+
```
|
21 |
+
|
22 |
+
#### Option 2: Enhanced Interactive Mode
|
23 |
+
|
24 |
+
```bash
|
25 |
+
python upload.py
|
26 |
+
# Choose option 2 when prompted
|
27 |
+
```
|
28 |
+
|
29 |
+
#### Option 3: Demo Script
|
30 |
+
|
31 |
+
```bash
|
32 |
+
python demo_enhanced.py
|
33 |
+
```
|
34 |
+
|
35 |
+
### Setting Up AI Features:
|
36 |
+
|
37 |
+
#### For OpenAI (Recommended):
|
38 |
+
|
39 |
+
1. Get API key from: https://platform.openai.com/
|
40 |
+
2. Edit `upload.py`, uncomment lines in `setup_agent()` method:
|
41 |
+
```python
|
42 |
+
model = OpenAIServerModel(model_id="gpt-3.5-turbo", api_key="your-api-key-here")
|
43 |
+
self.agent = CodeAgent(tools=[PythonCodeTool(), DuckDuckGoSearchTool()], model=model)
|
44 |
+
```
|
45 |
+
|
46 |
+
#### For Ollama (Free, Local):
|
47 |
+
|
48 |
+
1. Install Ollama from: https://ollama.ai/
|
49 |
+
2. Run: `ollama pull llama2`
|
50 |
+
3. Start: `ollama serve`
|
51 |
+
4. Uncomment Ollama lines in `setup_agent()` method
|
52 |
+
|
53 |
+
#### For Hugging Face (Free, API):
|
54 |
+
|
55 |
+
1. Get token from: https://huggingface.co/settings/tokens
|
56 |
+
2. Set environment variable: `HF_TOKEN=your_token`
|
57 |
+
3. Uncomment HF lines in `setup_agent()` method
|
58 |
+
|
59 |
+
### Example AI Queries:
|
60 |
+
|
61 |
+
Once configured, you can ask:
|
62 |
+
|
63 |
+
- "What are the main trends in this data?"
|
64 |
+
- "Find any outliers or anomalies"
|
65 |
+
- "Suggest data quality improvements"
|
66 |
+
- "Perform correlation analysis"
|
67 |
+
- "Identify seasonal patterns"
|
68 |
+
- "Recommend preprocessing steps"
|
69 |
+
|
70 |
+
### Features Available Without AI:
|
71 |
+
|
72 |
+
Even without AI configuration, you get:
|
73 |
+
|
74 |
+
- β
Data loading and exploration (original functionality)
|
75 |
+
- β
Statistical summaries
|
76 |
+
- β
Data visualization (histograms, correlation heatmaps)
|
77 |
+
- β
Data quality analysis
|
78 |
+
- β
Missing value analysis
|
79 |
+
|
80 |
+
### Files Structure:
|
81 |
+
|
82 |
+
- `upload.py` - Your enhanced main script
|
83 |
+
- `demo_enhanced.py` - Demonstration script
|
84 |
+
- `app.py` - Web interface (Gradio)
|
85 |
+
- `config.py` - Configuration file
|
86 |
+
- `requirements.txt` - Dependencies
|
87 |
+
|
88 |
+
### Quick Start:
|
89 |
+
|
90 |
+
1. **Test the script**: `python upload.py`
|
91 |
+
2. **Try enhanced mode**: Choose option 2
|
92 |
+
3. **Configure AI**: Edit `setup_agent()` method
|
93 |
+
4. **Ask AI questions**: Use menu option 4
|
94 |
+
|
95 |
+
π **Your original functionality is preserved - nothing is broken!**
|
__pycache__/upload.cpython-313.pyc
ADDED
Binary file (19.6 kB). View file
|
|
auto_demo.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Auto Demo - Run Enhanced Data Explorer
|
3 |
+
=====================================
|
4 |
+
|
5 |
+
This script automatically demonstrates the enhanced data explorer
|
6 |
+
"""
|
7 |
+
|
8 |
+
from upload import EnhancedDataExplorer, load_and_explore_data
|
9 |
+
import os
|
10 |
+
|
11 |
+
def auto_demo():
|
12 |
+
"""Automatically run the enhanced data explorer demo"""
|
13 |
+
print("π STARTING AUTO DEMO - ENHANCED DATA EXPLORER")
|
14 |
+
print("=" * 50)
|
15 |
+
|
16 |
+
# First check if CSV file exists
|
17 |
+
csv_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
|
18 |
+
|
19 |
+
if not os.path.exists(csv_path):
|
20 |
+
print(f"β CSV file not found at: {csv_path}")
|
21 |
+
print("π Using demo data instead...")
|
22 |
+
|
23 |
+
# Create some demo data
|
24 |
+
import pandas as pd
|
25 |
+
import numpy as np
|
26 |
+
|
27 |
+
demo_data = pd.DataFrame({
|
28 |
+
'timestamp': pd.date_range('2024-01-01', periods=1000, freq='H'),
|
29 |
+
'response_time': np.random.exponential(0.5, 1000),
|
30 |
+
'status_code': np.random.choice([200, 404, 500], 1000, p=[0.8, 0.15, 0.05]),
|
31 |
+
'user_count': np.random.poisson(10, 1000),
|
32 |
+
'error_rate': np.random.beta(2, 20, 1000),
|
33 |
+
'server_id': np.random.choice(['server1', 'server2', 'server3'], 1000)
|
34 |
+
})
|
35 |
+
|
36 |
+
demo_csv_path = "demo_data.csv"
|
37 |
+
demo_data.to_csv(demo_csv_path, index=False)
|
38 |
+
print(f"β
Demo data created: {demo_csv_path}")
|
39 |
+
|
40 |
+
# Update the path for the explorer
|
41 |
+
csv_path = demo_csv_path
|
42 |
+
|
43 |
+
# Initialize the enhanced explorer
|
44 |
+
print("\nπ€ Initializing Enhanced Data Explorer...")
|
45 |
+
explorer = EnhancedDataExplorer(csv_path)
|
46 |
+
|
47 |
+
# Check AI status
|
48 |
+
if explorer.agent:
|
49 |
+
print("β
AI Agent: Configured and ready!")
|
50 |
+
ai_status = "Available"
|
51 |
+
else:
|
52 |
+
print("β οΈ AI Agent: Not configured (non-AI features still available)")
|
53 |
+
ai_status = "Not Available"
|
54 |
+
|
55 |
+
print(f"\nπ DATA ANALYSIS DEMO")
|
56 |
+
print("=" * 30)
|
57 |
+
|
58 |
+
# Step 1: Load data
|
59 |
+
print("\n1οΈβ£ Loading and exploring data...")
|
60 |
+
df = explorer.load_data()
|
61 |
+
|
62 |
+
if df is not None:
|
63 |
+
print(f"β
Data loaded successfully!")
|
64 |
+
|
65 |
+
# Step 2: Data quality analysis
|
66 |
+
print("\n2οΈβ£ Analyzing data quality...")
|
67 |
+
quality_report = explorer.analyze_data_quality()
|
68 |
+
|
69 |
+
# Step 3: Create visualizations
|
70 |
+
print("\n3οΈβ£ Creating visualizations...")
|
71 |
+
try:
|
72 |
+
explorer.create_visualizations()
|
73 |
+
except Exception as e:
|
74 |
+
print(f"β οΈ Visualization skipped: {e}")
|
75 |
+
|
76 |
+
# Step 4: AI Analysis (if available)
|
77 |
+
if explorer.agent:
|
78 |
+
print("\n4οΈβ£ Running AI analysis...")
|
79 |
+
queries = [
|
80 |
+
"Describe the main characteristics of this dataset",
|
81 |
+
"What patterns do you see in the data?",
|
82 |
+
"Are there any data quality issues I should be aware of?"
|
83 |
+
]
|
84 |
+
|
85 |
+
for i, query in enumerate(queries, 1):
|
86 |
+
print(f"\nπ€ AI Query {i}: {query}")
|
87 |
+
try:
|
88 |
+
response = explorer.ai_analysis(query)
|
89 |
+
if response:
|
90 |
+
print("β
AI analysis completed")
|
91 |
+
else:
|
92 |
+
print("β οΈ AI analysis returned no response")
|
93 |
+
except Exception as e:
|
94 |
+
print(f"β AI analysis failed: {e}")
|
95 |
+
break
|
96 |
+
else:
|
97 |
+
print("\n4οΈβ£ AI Analysis: Skipped (no AI model configured)")
|
98 |
+
|
99 |
+
print(f"\nπ DEMO COMPLETE!")
|
100 |
+
print("=" * 20)
|
101 |
+
print(f"π Data Status: {'Loaded' if df is not None else 'Failed'}")
|
102 |
+
print(f"π€ AI Status: {ai_status}")
|
103 |
+
print(f"π Visualizations: {'Created' if df is not None else 'Skipped'}")
|
104 |
+
|
105 |
+
print(f"\nπ‘ To run interactively:")
|
106 |
+
print(f" python upload.py")
|
107 |
+
print(f" Choose option 2 for enhanced mode")
|
108 |
+
|
109 |
+
if __name__ == "__main__":
|
110 |
+
auto_demo()
|
demo_enhanced.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Demo script showing how to use the enhanced upload.py with SmoLagent AI
|
3 |
+
================================================================
|
4 |
+
|
5 |
+
This script demonstrates the new AI-powered data analysis capabilities
|
6 |
+
"""
|
7 |
+
|
8 |
+
from upload import EnhancedDataExplorer, load_and_explore_data
|
9 |
+
|
10 |
+
def demo_enhanced_features():
|
11 |
+
"""Demonstrate the enhanced features"""
|
12 |
+
print("π ENHANCED DATA EXPLORER DEMO")
|
13 |
+
print("=" * 50)
|
14 |
+
|
15 |
+
# Initialize the enhanced explorer
|
16 |
+
explorer = EnhancedDataExplorer()
|
17 |
+
|
18 |
+
print("\n1. Loading data...")
|
19 |
+
explorer.load_data()
|
20 |
+
|
21 |
+
print("\n2. Analyzing data quality...")
|
22 |
+
quality_report = explorer.analyze_data_quality()
|
23 |
+
|
24 |
+
print("\n3. Creating visualizations...")
|
25 |
+
explorer.create_visualizations()
|
26 |
+
|
27 |
+
print("\n4. AI Analysis examples (requires model configuration):")
|
28 |
+
|
29 |
+
example_queries = [
|
30 |
+
"What are the main patterns in this dataset?",
|
31 |
+
"Identify any data quality issues",
|
32 |
+
"Suggest preprocessing steps",
|
33 |
+
"Find correlations between variables",
|
34 |
+
"Detect outliers and anomalies"
|
35 |
+
]
|
36 |
+
|
37 |
+
for i, query in enumerate(example_queries, 1):
|
38 |
+
print(f" {i}. {query}")
|
39 |
+
|
40 |
+
print("\nπ‘ To enable AI analysis:")
|
41 |
+
print(" 1. Get an API key (OpenAI, Hugging Face, etc.)")
|
42 |
+
print(" 2. Uncomment the appropriate lines in setup_agent() method")
|
43 |
+
print(" 3. Run the interactive menu with option 2")
|
44 |
+
|
45 |
+
def demo_original_function():
|
46 |
+
"""Demonstrate the original function (preserved)"""
|
47 |
+
print("π ORIGINAL FUNCTION DEMO")
|
48 |
+
print("=" * 30)
|
49 |
+
|
50 |
+
df = load_and_explore_data()
|
51 |
+
print(f"\nβ
Original function completed. Data shape: {df.shape if df is not None else 'Failed to load'}")
|
52 |
+
|
53 |
+
if __name__ == "__main__":
|
54 |
+
print("Choose demo mode:")
|
55 |
+
print("1. Enhanced features demo")
|
56 |
+
print("2. Original function demo")
|
57 |
+
print("3. Both")
|
58 |
+
|
59 |
+
choice = input("Enter choice (1-3): ").strip()
|
60 |
+
|
61 |
+
if choice == "1":
|
62 |
+
demo_enhanced_features()
|
63 |
+
elif choice == "2":
|
64 |
+
demo_original_function()
|
65 |
+
elif choice == "3":
|
66 |
+
demo_original_function()
|
67 |
+
print("\n" + "="*60 + "\n")
|
68 |
+
demo_enhanced_features()
|
69 |
+
else:
|
70 |
+
print("Invalid choice. Running enhanced demo...")
|
71 |
+
demo_enhanced_features()
|
fixed_upload.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Fixed SmoLagent Data Analysis - Working Version
|
4 |
+
"""
|
5 |
+
import pandas as pd
|
6 |
+
from smolagents import CodeAgent, DuckDuckGoSearchTool
|
7 |
+
import warnings
|
8 |
+
warnings.filterwarnings('ignore')
|
9 |
+
|
10 |
+
# Your CSV file path
|
11 |
+
csv_file_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
|
12 |
+
|
13 |
+
class FixedDataExplorer:
|
14 |
+
"""Working SmoLagent data explorer"""
|
15 |
+
|
16 |
+
def __init__(self, csv_path=csv_file_path):
|
17 |
+
self.csv_path = csv_path
|
18 |
+
self.df = None
|
19 |
+
self.agent = None
|
20 |
+
self.load_data()
|
21 |
+
self.setup_agent()
|
22 |
+
|
23 |
+
def load_data(self):
|
24 |
+
"""Load the CSV data"""
|
25 |
+
try:
|
26 |
+
self.df = pd.read_csv(self.csv_path)
|
27 |
+
print(f"β
Data loaded: {self.df.shape[0]} rows, {self.df.shape[1]} columns")
|
28 |
+
return True
|
29 |
+
except Exception as e:
|
30 |
+
print(f"β Data loading failed: {e}")
|
31 |
+
return False
|
32 |
+
|
33 |
+
def setup_agent(self):
|
34 |
+
"""Setup SmoLagent with proper model"""
|
35 |
+
try:
|
36 |
+
print("π€ Setting up SmoLagent...")
|
37 |
+
|
38 |
+
# Option 1: Try Ollama (if running locally)
|
39 |
+
try:
|
40 |
+
from smolagents import OllamaModel
|
41 |
+
model = OllamaModel(model_id="llama2")
|
42 |
+
self.agent = CodeAgent(
|
43 |
+
tools=[DuckDuckGoSearchTool()],
|
44 |
+
model=model
|
45 |
+
)
|
46 |
+
print("β
SmoLagent configured with Ollama")
|
47 |
+
return
|
48 |
+
except:
|
49 |
+
pass
|
50 |
+
|
51 |
+
# Option 2: Use OpenAI (requires API key)
|
52 |
+
try:
|
53 |
+
from smolagents import OpenAIModel
|
54 |
+
import os
|
55 |
+
if os.getenv('OPENAI_API_KEY'):
|
56 |
+
model = OpenAIModel(model_id="gpt-3.5-turbo")
|
57 |
+
self.agent = CodeAgent(
|
58 |
+
tools=[DuckDuckGoSearchTool()],
|
59 |
+
model=model
|
60 |
+
)
|
61 |
+
print("β
SmoLagent configured with OpenAI")
|
62 |
+
return
|
63 |
+
except:
|
64 |
+
pass
|
65 |
+
|
66 |
+
# Option 3: Use smaller HuggingFace model (lighter download)
|
67 |
+
try:
|
68 |
+
from smolagents import TransformersModel
|
69 |
+
model = TransformersModel(model_id="microsoft/DialoGPT-small") # Smaller model
|
70 |
+
self.agent = CodeAgent(
|
71 |
+
tools=[DuckDuckGoSearchTool()],
|
72 |
+
model=model
|
73 |
+
)
|
74 |
+
print("β
SmoLagent configured with small Transformers model")
|
75 |
+
return
|
76 |
+
except Exception as e:
|
77 |
+
print(f"β Model setup failed: {e}")
|
78 |
+
|
79 |
+
print("β No AI model could be configured")
|
80 |
+
print("π‘ You can still use basic data analysis features")
|
81 |
+
|
82 |
+
except Exception as e:
|
83 |
+
print(f"β Agent setup failed: {e}")
|
84 |
+
|
85 |
+
def basic_analysis(self):
|
86 |
+
"""Run basic data analysis without AI"""
|
87 |
+
if self.df is None:
|
88 |
+
print("β No data loaded")
|
89 |
+
return
|
90 |
+
|
91 |
+
print("\nπ BASIC DATA ANALYSIS")
|
92 |
+
print("=" * 40)
|
93 |
+
|
94 |
+
# Basic stats
|
95 |
+
print(f"π Dataset: {self.df.shape[0]} rows, {self.df.shape[1]} columns")
|
96 |
+
print(f"π Columns: {list(self.df.columns)}")
|
97 |
+
|
98 |
+
# Log level analysis
|
99 |
+
if 'LogLevel' in self.df.columns:
|
100 |
+
log_counts = self.df['LogLevel'].value_counts()
|
101 |
+
print(f"\nπ Log Level Distribution:")
|
102 |
+
for level, count in log_counts.items():
|
103 |
+
pct = count / len(self.df) * 100
|
104 |
+
print(f" {level}: {count} ({pct:.1f}%)")
|
105 |
+
|
106 |
+
# Error analysis
|
107 |
+
if 'LogLevel' in self.df.columns:
|
108 |
+
errors = self.df[self.df['LogLevel'] == 'Error']
|
109 |
+
if not errors.empty and 'Module' in errors.columns:
|
110 |
+
print(f"\nπ¨ Top Error Modules:")
|
111 |
+
top_errors = errors['Module'].value_counts().head(3)
|
112 |
+
for module, count in top_errors.items():
|
113 |
+
print(f" β’ {module}: {count} errors")
|
114 |
+
|
115 |
+
# Missing data
|
116 |
+
missing = self.df.isnull().sum()
|
117 |
+
print(f"\nβ Missing Data:")
|
118 |
+
for col, count in missing.items():
|
119 |
+
if count > 0:
|
120 |
+
pct = count / len(self.df) * 100
|
121 |
+
print(f" β’ {col}: {count} ({pct:.1f}%)")
|
122 |
+
|
123 |
+
def ai_analysis(self, query):
|
124 |
+
"""Run AI-powered analysis"""
|
125 |
+
if self.agent is None:
|
126 |
+
print("β AI agent not available. Please configure a model first.")
|
127 |
+
return
|
128 |
+
|
129 |
+
if self.df is None:
|
130 |
+
print("β No data loaded")
|
131 |
+
return
|
132 |
+
|
133 |
+
try:
|
134 |
+
# Prepare data context for AI
|
135 |
+
data_summary = f"""
|
136 |
+
Dataset: {self.df.shape[0]} rows, {self.df.shape[1]} columns
|
137 |
+
Columns: {list(self.df.columns)}
|
138 |
+
Sample data: {self.df.head(2).to_string()}
|
139 |
+
"""
|
140 |
+
|
141 |
+
full_query = f"""
|
142 |
+
Analyze this OutSystems log data:
|
143 |
+
{data_summary}
|
144 |
+
|
145 |
+
User question: {query}
|
146 |
+
"""
|
147 |
+
|
148 |
+
print(f"π€ AI analyzing: {query}")
|
149 |
+
response = self.agent.run(full_query)
|
150 |
+
print(f"π€ AI Response: {response}")
|
151 |
+
|
152 |
+
except Exception as e:
|
153 |
+
print(f"β AI analysis failed: {e}")
|
154 |
+
|
155 |
+
def main():
|
156 |
+
"""Main function"""
|
157 |
+
print("π FIXED SMOLAGENT DATA ANALYZER")
|
158 |
+
print("=" * 50)
|
159 |
+
|
160 |
+
# Create explorer
|
161 |
+
explorer = FixedDataExplorer()
|
162 |
+
|
163 |
+
# Run basic analysis
|
164 |
+
explorer.basic_analysis()
|
165 |
+
|
166 |
+
# Test AI if available
|
167 |
+
if explorer.agent:
|
168 |
+
print(f"\nπ€ AI FEATURES AVAILABLE!")
|
169 |
+
print(" Try: explorer.ai_analysis('What are the main error types?')")
|
170 |
+
else:
|
171 |
+
print(f"\nπ‘ AI features not available - need model configuration")
|
172 |
+
|
173 |
+
return explorer
|
174 |
+
|
175 |
+
if __name__ == "__main__":
|
176 |
+
explorer = main()
|
quick_ai_demo.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Quick AI Demo - Working SmoLagent without large model downloads
|
4 |
+
"""
|
5 |
+
import pandas as pd
|
6 |
+
from smolagents import CodeAgent, DuckDuckGoSearchTool
|
7 |
+
import warnings
|
8 |
+
warnings.filterwarnings('ignore')
|
9 |
+
|
10 |
+
def quick_demo():
|
11 |
+
"""Quick demo that works immediately"""
|
12 |
+
print("π QUICK AI DEMO - No Downloads Required")
|
13 |
+
print("=" * 50)
|
14 |
+
|
15 |
+
# Load the data first
|
16 |
+
csv_file_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
|
17 |
+
|
18 |
+
try:
|
19 |
+
print("π Loading CSV data...")
|
20 |
+
df = pd.read_csv(csv_file_path)
|
21 |
+
print(f"β
Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")
|
22 |
+
print(f"π Columns: {list(df.columns)}")
|
23 |
+
|
24 |
+
# Show basic analysis without AI first
|
25 |
+
print("\nπ BASIC DATA ANALYSIS:")
|
26 |
+
print("-" * 30)
|
27 |
+
|
28 |
+
# Error analysis
|
29 |
+
error_df = df[df['LogLevel'] == 'Error']
|
30 |
+
info_df = df[df['LogLevel'] == 'Info']
|
31 |
+
|
32 |
+
print(f"π¨ Total Error entries: {len(error_df)}")
|
33 |
+
print(f"βΉοΈ Total Info entries: {len(info_df)}")
|
34 |
+
print(f"π Error rate: {len(error_df)/len(df)*100:.1f}%")
|
35 |
+
|
36 |
+
# Top modules with errors
|
37 |
+
if not error_df.empty:
|
38 |
+
top_error_modules = error_df['Module'].value_counts().head(3)
|
39 |
+
print(f"\nπ Top 3 modules with errors:")
|
40 |
+
for module, count in top_error_modules.items():
|
41 |
+
print(f" β’ {module}: {count} errors")
|
42 |
+
|
43 |
+
# Check for missing data
|
44 |
+
missing_data = df.isnull().sum()
|
45 |
+
print(f"\nβ Missing data summary:")
|
46 |
+
for col, missing_count in missing_data.items():
|
47 |
+
if missing_count > 0:
|
48 |
+
print(f" β’ {col}: {missing_count} missing ({missing_count/len(df)*100:.1f}%)")
|
49 |
+
|
50 |
+
print("\n" + "=" * 50)
|
51 |
+
print("β
BASIC ANALYSIS COMPLETE!")
|
52 |
+
print("π‘ This shows your data is loading correctly.")
|
53 |
+
print("π€ AI features will work once model downloads complete.")
|
54 |
+
print("=" * 50)
|
55 |
+
|
56 |
+
return df
|
57 |
+
|
58 |
+
except Exception as e:
|
59 |
+
print(f"β Error loading data: {e}")
|
60 |
+
return None
|
61 |
+
|
62 |
+
def test_simple_agent():
|
63 |
+
"""Test if we can create an agent without heavy models"""
|
64 |
+
print("\nπ§ͺ Testing Simple Agent Creation...")
|
65 |
+
|
66 |
+
try:
|
67 |
+
# Just test the tools without a model first
|
68 |
+
search_tool = DuckDuckGoSearchTool()
|
69 |
+
print("β
DuckDuckGo search tool created successfully")
|
70 |
+
|
71 |
+
# Try to create agent (might fail without model, but we can catch it)
|
72 |
+
try:
|
73 |
+
agent = CodeAgent(tools=[search_tool])
|
74 |
+
print("β
Agent created (basic setup)")
|
75 |
+
except Exception as e:
|
76 |
+
print(f"βΉοΈ Agent needs model: {e}")
|
77 |
+
print("π‘ This is expected - agent will work once model is ready")
|
78 |
+
|
79 |
+
except Exception as e:
|
80 |
+
print(f"β Tool creation failed: {e}")
|
81 |
+
|
82 |
+
if __name__ == "__main__":
|
83 |
+
# Run the quick demo
|
84 |
+
df = quick_demo()
|
85 |
+
|
86 |
+
# Test agent creation
|
87 |
+
test_simple_agent()
|
88 |
+
|
89 |
+
if df is not None:
|
90 |
+
print(f"\nπ SUCCESS! Your data analysis setup is working!")
|
91 |
+
print(f"π Dataset ready: {df.shape[0]} OutSystems log entries")
|
92 |
+
print(f"π€ AI features will be available once model download completes")
|
requirements.txt
CHANGED
@@ -7,5 +7,8 @@ seaborn>=0.12.0
|
|
7 |
plotly>=5.15.0
|
8 |
Pillow>=10.0.0
|
9 |
scikit-learn>=1.3.0
|
10 |
-
|
|
|
11 |
requests>=2.31.0
|
|
|
|
|
|
7 |
plotly>=5.15.0
|
8 |
Pillow>=10.0.0
|
9 |
scikit-learn>=1.3.0
|
10 |
+
transformers>=4.30.0
|
11 |
+
torch>=2.0.0
|
12 |
requests>=2.31.0
|
13 |
+
huggingface_hub>=0.16.0
|
14 |
+
duckduckgo-search>=3.8.0
|
setup_free_ai.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Free AI Setup Helper
|
3 |
+
===================
|
4 |
+
|
5 |
+
This script helps you set up completely free AI models for data analysis.
|
6 |
+
"""
|
7 |
+
|
8 |
+
import os
|
9 |
+
import subprocess
|
10 |
+
import sys
|
11 |
+
|
12 |
+
def setup_free_huggingface():
|
13 |
+
"""Setup free Hugging Face models"""
|
14 |
+
print("π SETTING UP FREE HUGGING FACE AI")
|
15 |
+
print("=" * 40)
|
16 |
+
|
17 |
+
print("π Steps to get free Hugging Face access:")
|
18 |
+
print("1. Go to: https://huggingface.co/join")
|
19 |
+
print("2. Create a free account")
|
20 |
+
print("3. Go to: https://huggingface.co/settings/tokens")
|
21 |
+
print("4. Create a new token (read access is enough)")
|
22 |
+
print("5. Copy the token")
|
23 |
+
|
24 |
+
token = input("\nπ Paste your Hugging Face token here (or press Enter to skip): ").strip()
|
25 |
+
|
26 |
+
if token:
|
27 |
+
# Set environment variable for current session
|
28 |
+
os.environ['HF_TOKEN'] = token
|
29 |
+
print("β
Token set for current session!")
|
30 |
+
|
31 |
+
# Try to test the token
|
32 |
+
try:
|
33 |
+
import requests
|
34 |
+
headers = {"Authorization": f"Bearer {token}"}
|
35 |
+
response = requests.get("https://huggingface.co/api/whoami", headers=headers)
|
36 |
+
if response.status_code == 200:
|
37 |
+
user_info = response.json()
|
38 |
+
print(f"β
Token verified! Hello, {user_info.get('name', 'User')}!")
|
39 |
+
return True
|
40 |
+
else:
|
41 |
+
print("β οΈ Token verification failed. Please check your token.")
|
42 |
+
return False
|
43 |
+
except Exception as e:
|
44 |
+
print(f"β οΈ Could not verify token: {e}")
|
45 |
+
return False
|
46 |
+
else:
|
47 |
+
print("β οΈ No token provided. Some models may not work without authentication.")
|
48 |
+
return False
|
49 |
+
|
50 |
+
def setup_ollama_quick():
|
51 |
+
"""Quick Ollama setup guide"""
|
52 |
+
print("\nπ SETTING UP FREE LOCAL OLLAMA AI")
|
53 |
+
print("=" * 40)
|
54 |
+
|
55 |
+
print("π Quick Ollama setup:")
|
56 |
+
print("1. Download from: https://ollama.ai/")
|
57 |
+
print("2. Install the application")
|
58 |
+
print("3. Open terminal and run: ollama pull llama2")
|
59 |
+
print("4. Start server: ollama serve")
|
60 |
+
print("5. Your script will automatically detect it!")
|
61 |
+
|
62 |
+
choice = input("\nβ Open Ollama website now? (y/n): ").strip().lower()
|
63 |
+
if choice == 'y':
|
64 |
+
try:
|
65 |
+
if sys.platform == 'win32':
|
66 |
+
os.startfile("https://ollama.ai/")
|
67 |
+
elif sys.platform == 'darwin':
|
68 |
+
subprocess.run(['open', "https://ollama.ai/"])
|
69 |
+
else:
|
70 |
+
subprocess.run(['xdg-open', "https://ollama.ai/"])
|
71 |
+
print("β
Ollama website opened!")
|
72 |
+
except Exception as e:
|
73 |
+
print(f"β οΈ Could not open website: {e}")
|
74 |
+
print("Please manually go to: https://ollama.ai/")
|
75 |
+
|
76 |
+
def test_current_setup():
|
77 |
+
"""Test what AI models are currently available"""
|
78 |
+
print("\nπ§ͺ TESTING CURRENT AI SETUP")
|
79 |
+
print("=" * 30)
|
80 |
+
|
81 |
+
try:
|
82 |
+
from upload import EnhancedDataExplorer
|
83 |
+
explorer = EnhancedDataExplorer()
|
84 |
+
|
85 |
+
if explorer.agent is not None:
|
86 |
+
print("β
AI model is configured and ready!")
|
87 |
+
print("π You can now use AI analysis in your data explorer!")
|
88 |
+
return True
|
89 |
+
else:
|
90 |
+
print("β No AI model configured yet.")
|
91 |
+
return False
|
92 |
+
except Exception as e:
|
93 |
+
print(f"β Error testing setup: {e}")
|
94 |
+
return False
|
95 |
+
|
96 |
+
def main():
|
97 |
+
print("π€ FREE AI MODELS SETUP")
|
98 |
+
print("=" * 25)
|
99 |
+
print("Choose your free AI option:")
|
100 |
+
print("1. π Hugging Face (cloud-based, free account needed)")
|
101 |
+
print("2. π Ollama (local, completely free, more private)")
|
102 |
+
print("3. π§ͺ Test current setup")
|
103 |
+
print("4. β Skip AI setup")
|
104 |
+
|
105 |
+
choice = input("\nEnter your choice (1-4): ").strip()
|
106 |
+
|
107 |
+
if choice == "1":
|
108 |
+
setup_free_huggingface()
|
109 |
+
elif choice == "2":
|
110 |
+
setup_ollama_quick()
|
111 |
+
elif choice == "3":
|
112 |
+
test_current_setup()
|
113 |
+
elif choice == "4":
|
114 |
+
print("β
You can still use all non-AI features!")
|
115 |
+
else:
|
116 |
+
print("β Invalid choice. Please run the script again.")
|
117 |
+
|
118 |
+
print("\nπ Next steps:")
|
119 |
+
print("1. Run: python upload.py")
|
120 |
+
print("2. Choose option 2 (Enhanced mode)")
|
121 |
+
print("3. Try AI analysis with menu option 4")
|
122 |
+
|
123 |
+
if __name__ == "__main__":
|
124 |
+
main()
|
setup_ollama.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ollama Setup Helper for Windows (SmoLagents Native)
|
3 |
+
===================================================
|
4 |
+
|
5 |
+
This script helps you set up Ollama for free AI analysis using SmoLagents' native Ollama support
|
6 |
+
"""
|
7 |
+
|
8 |
+
import subprocess
|
9 |
+
import time
|
10 |
+
import requests
|
11 |
+
import os
|
12 |
+
|
13 |
+
def check_ollama_installed():
|
14 |
+
"""Check if Ollama is installed"""
|
15 |
+
try:
|
16 |
+
result = subprocess.run(['ollama', '--version'],
|
17 |
+
capture_output=True, text=True, timeout=10)
|
18 |
+
if result.returncode == 0:
|
19 |
+
print(f"β
Ollama is installed: {result.stdout.strip()}")
|
20 |
+
return True
|
21 |
+
else:
|
22 |
+
print("β Ollama is not installed or not working properly")
|
23 |
+
return False
|
24 |
+
except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError):
|
25 |
+
print("β Ollama is not installed")
|
26 |
+
return False
|
27 |
+
|
28 |
+
def check_ollama_running():
|
29 |
+
"""Check if Ollama server is running"""
|
30 |
+
try:
|
31 |
+
response = requests.get("http://localhost:11434", timeout=5)
|
32 |
+
if response.status_code == 200:
|
33 |
+
print("β
Ollama server is running")
|
34 |
+
return True
|
35 |
+
else:
|
36 |
+
print("β οΈ Ollama server is not responding properly")
|
37 |
+
return False
|
38 |
+
except requests.exceptions.RequestException:
|
39 |
+
print("β Ollama server is not running")
|
40 |
+
return False
|
41 |
+
|
42 |
+
def start_ollama_server():
|
43 |
+
"""Start Ollama server"""
|
44 |
+
try:
|
45 |
+
print("π Starting Ollama server...")
|
46 |
+
# Start Ollama server in background
|
47 |
+
process = subprocess.Popen(['ollama', 'serve'],
|
48 |
+
stdout=subprocess.PIPE,
|
49 |
+
stderr=subprocess.PIPE)
|
50 |
+
|
51 |
+
# Wait a bit for server to start
|
52 |
+
time.sleep(3)
|
53 |
+
|
54 |
+
if check_ollama_running():
|
55 |
+
print("β
Ollama server started successfully")
|
56 |
+
return True
|
57 |
+
else:
|
58 |
+
print("β Failed to start Ollama server")
|
59 |
+
return False
|
60 |
+
except Exception as e:
|
61 |
+
print(f"β Error starting Ollama server: {e}")
|
62 |
+
return False
|
63 |
+
|
64 |
+
def download_model(model_name="llama2"):
|
65 |
+
"""Download a model for Ollama"""
|
66 |
+
try:
|
67 |
+
print(f"π₯ Downloading {model_name} model (this may take a while)...")
|
68 |
+
result = subprocess.run(['ollama', 'pull', model_name],
|
69 |
+
capture_output=True, text=True, timeout=600)
|
70 |
+
|
71 |
+
if result.returncode == 0:
|
72 |
+
print(f"β
{model_name} model downloaded successfully")
|
73 |
+
return True
|
74 |
+
else:
|
75 |
+
print(f"β Failed to download {model_name} model")
|
76 |
+
print(f"Error: {result.stderr}")
|
77 |
+
return False
|
78 |
+
except subprocess.TimeoutExpired:
|
79 |
+
print(f"β° Download timeout for {model_name} model")
|
80 |
+
return False
|
81 |
+
except Exception as e:
|
82 |
+
print(f"β Error downloading {model_name} model: {e}")
|
83 |
+
return False
|
84 |
+
|
85 |
+
def list_available_models():
|
86 |
+
"""List downloaded models"""
|
87 |
+
try:
|
88 |
+
result = subprocess.run(['ollama', 'list'],
|
89 |
+
capture_output=True, text=True, timeout=10)
|
90 |
+
if result.returncode == 0:
|
91 |
+
print("π Available models:")
|
92 |
+
print(result.stdout)
|
93 |
+
return True
|
94 |
+
else:
|
95 |
+
print("β Failed to list models")
|
96 |
+
return False
|
97 |
+
except Exception as e:
|
98 |
+
print(f"β Error listing models: {e}")
|
99 |
+
return False
|
100 |
+
|
101 |
+
def test_ollama_chat(model_name="llama2"):
|
102 |
+
"""Test Ollama with a simple chat"""
|
103 |
+
try:
|
104 |
+
print(f"π§ͺ Testing {model_name} model...")
|
105 |
+
test_prompt = "Hello, can you help me analyze data? Just say yes or no."
|
106 |
+
|
107 |
+
result = subprocess.run(['ollama', 'run', model_name, test_prompt],
|
108 |
+
capture_output=True, text=True, timeout=30)
|
109 |
+
|
110 |
+
if result.returncode == 0:
|
111 |
+
print("β
Ollama model test successful!")
|
112 |
+
print(f"Response: {result.stdout.strip()}")
|
113 |
+
return True
|
114 |
+
else:
|
115 |
+
print("β Ollama model test failed")
|
116 |
+
print(f"Error: {result.stderr}")
|
117 |
+
return False
|
118 |
+
except subprocess.TimeoutExpired:
|
119 |
+
print("β° Ollama model test timeout")
|
120 |
+
return False
|
121 |
+
except Exception as e:
|
122 |
+
print(f"β Error testing Ollama model: {e}")
|
123 |
+
return False
|
124 |
+
|
125 |
+
def setup_ollama():
|
126 |
+
"""Complete Ollama setup process"""
|
127 |
+
print("π€ OLLAMA SETUP FOR FREE AI ANALYSIS")
|
128 |
+
print("=" * 40)
|
129 |
+
|
130 |
+
# Step 1: Check installation
|
131 |
+
if not check_ollama_installed():
|
132 |
+
print("\nπ Installation Instructions:")
|
133 |
+
print("1. Go to https://ollama.ai/")
|
134 |
+
print("2. Download the Windows installer")
|
135 |
+
print("3. Run the installer")
|
136 |
+
print("4. Restart your terminal/command prompt")
|
137 |
+
print("5. Run this script again")
|
138 |
+
return False
|
139 |
+
|
140 |
+
# Step 2: Start server
|
141 |
+
if not check_ollama_running():
|
142 |
+
if not start_ollama_server():
|
143 |
+
print("\nπ§ Manual server start:")
|
144 |
+
print("Open a new terminal and run: ollama serve")
|
145 |
+
return False
|
146 |
+
|
147 |
+
# Step 3: Download model
|
148 |
+
print(f"\nπ Checking available models...")
|
149 |
+
list_available_models()
|
150 |
+
|
151 |
+
# Check if llama2 is available
|
152 |
+
result = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
|
153 |
+
if 'llama2' not in result.stdout:
|
154 |
+
print("\nπ₯ Downloading llama2 model...")
|
155 |
+
if not download_model("llama2"):
|
156 |
+
# Try a smaller model if llama2 fails
|
157 |
+
print("π Trying smaller model (phi)...")
|
158 |
+
if not download_model("phi"):
|
159 |
+
print("β Failed to download any model")
|
160 |
+
return False
|
161 |
+
|
162 |
+
# Step 4: Test the setup
|
163 |
+
print(f"\nπ§ͺ Testing setup...")
|
164 |
+
model_to_test = "llama2" if 'llama2' in result.stdout else "phi"
|
165 |
+
if test_ollama_chat(model_to_test):
|
166 |
+
print("\nπ OLLAMA SETUP COMPLETE!")
|
167 |
+
print("You can now use AI analysis in your upload.py script")
|
168 |
+
return True
|
169 |
+
else:
|
170 |
+
print("β Setup incomplete - model test failed")
|
171 |
+
return False
|
172 |
+
|
173 |
+
if __name__ == "__main__":
|
174 |
+
success = setup_ollama()
|
175 |
+
|
176 |
+
if success:
|
177 |
+
print("\nπ Next Steps:")
|
178 |
+
print("1. Run: python upload.py")
|
179 |
+
print("2. Choose option 2 (Enhanced interactive mode)")
|
180 |
+
print("3. Use menu option 4 for AI analysis")
|
181 |
+
print("\nπ‘ Your script is already configured for Ollama!")
|
182 |
+
else:
|
183 |
+
print("\nπ§ Setup incomplete. Please follow the instructions above.")
|
test_basic_agent.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Simple SmoLagent Test
|
3 |
+
====================
|
4 |
+
|
5 |
+
Test the basic SmoLagent setup with just CodeAgent and DuckDuckGoSearchTool
|
6 |
+
"""
|
7 |
+
|
8 |
+
from smolagents import CodeAgent, DuckDuckGoSearchTool
|
9 |
+
|
10 |
+
def test_basic_agent():
|
11 |
+
"""Test basic SmoLagent setup without language model"""
|
12 |
+
print("π€ TESTING BASIC SMOLAGENT SETUP")
|
13 |
+
print("=" * 35)
|
14 |
+
|
15 |
+
try:
|
16 |
+
# Create agent with just tools (no language model)
|
17 |
+
agent = CodeAgent(tools=[DuckDuckGoSearchTool()])
|
18 |
+
print("β
Basic SmoLagent created successfully!")
|
19 |
+
print(f"π Agent has {len(agent.tools)} tool(s) available")
|
20 |
+
|
21 |
+
# List available tools
|
22 |
+
for i, tool in enumerate(agent.tools, 1):
|
23 |
+
print(f" {i}. {tool.__class__.__name__}")
|
24 |
+
|
25 |
+
# Test basic functionality
|
26 |
+
print("\nπ§ͺ Testing agent functionality...")
|
27 |
+
|
28 |
+
# Note: Without a language model, we can't run complex queries
|
29 |
+
# But we can verify the agent structure is correct
|
30 |
+
print("β
Agent structure is valid")
|
31 |
+
print("β οΈ Note: Full AI analysis requires a language model")
|
32 |
+
|
33 |
+
return agent
|
34 |
+
|
35 |
+
except Exception as e:
|
36 |
+
print(f"β Basic agent setup failed: {e}")
|
37 |
+
return None
|
38 |
+
|
39 |
+
def test_with_search():
|
40 |
+
"""Test the search functionality"""
|
41 |
+
print("\nπ TESTING SEARCH FUNCTIONALITY")
|
42 |
+
print("=" * 30)
|
43 |
+
|
44 |
+
try:
|
45 |
+
# Create search tool directly
|
46 |
+
search_tool = DuckDuckGoSearchTool()
|
47 |
+
print("β
DuckDuckGo search tool created")
|
48 |
+
|
49 |
+
# Test search (if possible)
|
50 |
+
print("π Search tool ready for use")
|
51 |
+
return search_tool
|
52 |
+
|
53 |
+
except Exception as e:
|
54 |
+
print(f"β Search tool failed: {e}")
|
55 |
+
return None
|
56 |
+
|
57 |
+
def main():
|
58 |
+
"""Main test function"""
|
59 |
+
print("π SMOLAGENT BASIC SETUP TEST")
|
60 |
+
print("=" * 30)
|
61 |
+
|
62 |
+
# Test 1: Basic agent
|
63 |
+
agent = test_basic_agent()
|
64 |
+
|
65 |
+
# Test 2: Search tool
|
66 |
+
search_tool = test_with_search()
|
67 |
+
|
68 |
+
# Summary
|
69 |
+
print("\nπ TEST SUMMARY")
|
70 |
+
print("=" * 15)
|
71 |
+
print(f"Basic Agent: {'β
Working' if agent else 'β Failed'}")
|
72 |
+
print(f"Search Tool: {'β
Working' if search_tool else 'β Failed'}")
|
73 |
+
|
74 |
+
if agent:
|
75 |
+
print("\nπ‘ Your basic SmoLagent is ready!")
|
76 |
+
print(" To add AI capabilities, configure a language model")
|
77 |
+
print(" Options: Ollama (free), OpenAI (paid), Hugging Face (free)")
|
78 |
+
else:
|
79 |
+
print("\nβ Basic setup failed. Check SmoLagents installation.")
|
80 |
+
|
81 |
+
print("\nπ Next: Run 'python upload.py' to use enhanced data analysis!")
|
82 |
+
|
83 |
+
if __name__ == "__main__":
|
84 |
+
main()
|
test_free_ai.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Simple Free AI Test
|
3 |
+
==================
|
4 |
+
|
5 |
+
Test the free AI models in your upload.py script
|
6 |
+
"""
|
7 |
+
|
8 |
+
def test_free_models():
|
9 |
+
"""Test free AI models availability"""
|
10 |
+
print("π Testing free AI models...")
|
11 |
+
|
12 |
+
try:
|
13 |
+
from upload import EnhancedDataExplorer
|
14 |
+
|
15 |
+
print("π Creating data explorer...")
|
16 |
+
explorer = EnhancedDataExplorer()
|
17 |
+
|
18 |
+
if explorer.agent is not None:
|
19 |
+
print("β
Free AI model configured successfully!")
|
20 |
+
print("π You can now use AI-powered data analysis!")
|
21 |
+
|
22 |
+
# Test with a simple query
|
23 |
+
print("\nπ§ͺ Testing AI with a simple query...")
|
24 |
+
test_query = "Hello, can you help with data analysis?"
|
25 |
+
|
26 |
+
# Simulate having some data
|
27 |
+
import pandas as pd
|
28 |
+
import numpy as np
|
29 |
+
test_data = pd.DataFrame({
|
30 |
+
'A': np.random.randn(100),
|
31 |
+
'B': np.random.randn(100),
|
32 |
+
'C': ['category1', 'category2'] * 50
|
33 |
+
})
|
34 |
+
explorer.df = test_data
|
35 |
+
|
36 |
+
# Test AI analysis
|
37 |
+
try:
|
38 |
+
response = explorer.ai_analysis("Describe this test dataset briefly")
|
39 |
+
if response:
|
40 |
+
print("β
AI analysis test successful!")
|
41 |
+
else:
|
42 |
+
print("β οΈ AI analysis returned no response")
|
43 |
+
except Exception as e:
|
44 |
+
print(f"β οΈ AI analysis test failed: {e}")
|
45 |
+
|
46 |
+
else:
|
47 |
+
print("β No free AI models available.")
|
48 |
+
print("π‘ Try running: python setup_free_ai.py")
|
49 |
+
|
50 |
+
except Exception as e:
|
51 |
+
print(f"β Error testing free models: {e}")
|
52 |
+
|
53 |
+
if __name__ == "__main__":
|
54 |
+
test_free_models()
|
test_smolagent.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Simple test of SmoLagent functionality
|
4 |
+
"""
|
5 |
+
import pandas as pd
|
6 |
+
from smolagents import CodeAgent, DuckDuckGoSearchTool
|
7 |
+
import warnings
|
8 |
+
warnings.filterwarnings('ignore')
|
9 |
+
|
10 |
+
def test_basic_smolagent():
|
11 |
+
"""Test basic SmoLagent setup"""
|
12 |
+
print("π§ͺ Testing SmoLagent Setup...")
|
13 |
+
|
14 |
+
try:
|
15 |
+
# Test 1: Import check
|
16 |
+
print("β
Imports successful")
|
17 |
+
|
18 |
+
# Test 2: Create agent without model (should fail gracefully)
|
19 |
+
try:
|
20 |
+
agent = CodeAgent(tools=[DuckDuckGoSearchTool()])
|
21 |
+
print("β
Agent created without model")
|
22 |
+
except Exception as e:
|
23 |
+
print(f"β Agent creation failed: {e}")
|
24 |
+
print("π‘ This is expected - CodeAgent needs a model parameter")
|
25 |
+
|
26 |
+
# Test 3: Try with a simple model setup
|
27 |
+
try:
|
28 |
+
from smolagents import HfApiModel
|
29 |
+
print("π Trying HuggingFace model...")
|
30 |
+
model = HfApiModel(model_id="microsoft/DialoGPT-medium")
|
31 |
+
agent = CodeAgent(
|
32 |
+
tools=[DuckDuckGoSearchTool()],
|
33 |
+
model=model
|
34 |
+
)
|
35 |
+
print("β
Agent created successfully with HuggingFace model!")
|
36 |
+
|
37 |
+
# Test a simple query
|
38 |
+
response = agent.run("What is 2 + 2?")
|
39 |
+
print(f"π€ Agent response: {response}")
|
40 |
+
|
41 |
+
except Exception as e:
|
42 |
+
print(f"β HuggingFace model failed: {e}")
|
43 |
+
|
44 |
+
# Test 4: Try Ollama
|
45 |
+
try:
|
46 |
+
from smolagents import OllamaModel
|
47 |
+
print("π Trying Ollama model...")
|
48 |
+
model = OllamaModel(model_id="llama2", base_url="http://localhost:11434")
|
49 |
+
agent = CodeAgent(
|
50 |
+
tools=[DuckDuckGoSearchTool()],
|
51 |
+
model=model
|
52 |
+
)
|
53 |
+
print("β
Agent created successfully with Ollama!")
|
54 |
+
|
55 |
+
# Test a simple query
|
56 |
+
response = agent.run("What is 2 + 2?")
|
57 |
+
print(f"π€ Agent response: {response}")
|
58 |
+
|
59 |
+
except Exception as e:
|
60 |
+
print(f"β Ollama model failed: {e}")
|
61 |
+
print("π‘ Make sure Ollama is running with: ollama serve")
|
62 |
+
|
63 |
+
except Exception as e:
|
64 |
+
print(f"β Test failed: {e}")
|
65 |
+
|
66 |
+
def test_with_data():
|
67 |
+
"""Test SmoLagent with actual CSV data"""
|
68 |
+
csv_file_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
|
69 |
+
|
70 |
+
try:
|
71 |
+
# Load data
|
72 |
+
print("\nπ Loading CSV data...")
|
73 |
+
df = pd.read_csv(csv_file_path)
|
74 |
+
print(f"β
Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")
|
75 |
+
print(f"π Columns: {list(df.columns)}")
|
76 |
+
|
77 |
+
# Basic analysis
|
78 |
+
error_count = df[df['LogLevel'] == 'Error'].shape[0]
|
79 |
+
print(f"π¨ Error entries: {error_count}")
|
80 |
+
|
81 |
+
return df
|
82 |
+
|
83 |
+
except Exception as e:
|
84 |
+
print(f"β Data loading failed: {e}")
|
85 |
+
return None
|
86 |
+
|
87 |
+
if __name__ == "__main__":
|
88 |
+
print("=" * 50)
|
89 |
+
print("π€ SMOLAGENT TEST SUITE")
|
90 |
+
print("=" * 50)
|
91 |
+
|
92 |
+
# Test basic functionality
|
93 |
+
test_basic_smolagent()
|
94 |
+
|
95 |
+
# Test with data
|
96 |
+
df = test_with_data()
|
97 |
+
|
98 |
+
print("\n" + "=" * 50)
|
99 |
+
print("β
Test completed!")
|
100 |
+
print("=" * 50)
|
upload.py
CHANGED
@@ -1,11 +1,320 @@
|
|
1 |
import pandas as pd
|
2 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
# Replace 'your_file.csv' with your CSV file path
|
5 |
csv_file_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
def load_and_explore_data():
|
8 |
-
"""Load and explore the CSV data"""
|
9 |
try:
|
10 |
# Check if file exists
|
11 |
if not os.path.exists(csv_file_path):
|
@@ -37,4 +346,20 @@ def load_and_explore_data():
|
|
37 |
return None
|
38 |
|
39 |
if __name__ == "__main__":
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import os
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
from smolagents import CodeAgent, DuckDuckGoSearchTool
|
7 |
+
import warnings
|
8 |
+
warnings.filterwarnings('ignore')
|
9 |
|
10 |
# Replace 'your_file.csv' with your CSV file path
|
11 |
csv_file_path = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
|
12 |
|
13 |
+
class EnhancedDataExplorer:
|
14 |
+
"""Enhanced data explorer with SmoLagent AI capabilities"""
|
15 |
+
|
16 |
+
def __init__(self, csv_path=csv_file_path):
|
17 |
+
self.csv_path = csv_path
|
18 |
+
self.df = None
|
19 |
+
self.agent = None
|
20 |
+
self.setup_agent()
|
21 |
+
|
22 |
+
def setup_agent(self):
|
23 |
+
"""Setup SmoLagent AI agent with simple configuration"""
|
24 |
+
try:
|
25 |
+
print("π€ Setting up SmoLagent with basic tools...")
|
26 |
+
|
27 |
+
# Use the exact setup specified by user
|
28 |
+
try:
|
29 |
+
# Try with Ollama model first
|
30 |
+
from smolagents import OllamaModel
|
31 |
+
model = OllamaModel(model_id="llama2", base_url="http://localhost:11434")
|
32 |
+
self.agent = CodeAgent(
|
33 |
+
tools=[DuckDuckGoSearchTool()],
|
34 |
+
model=model
|
35 |
+
)
|
36 |
+
print("β
SmoLagent configured successfully with Ollama and search capabilities")
|
37 |
+
return
|
38 |
+
except Exception as e:
|
39 |
+
print(f"β οΈ Ollama setup failed: {e}")
|
40 |
+
|
41 |
+
# Fallback to Transformers model
|
42 |
+
try:
|
43 |
+
from smolagents import TransformersModel
|
44 |
+
model = TransformersModel(model_id="microsoft/DialoGPT-medium")
|
45 |
+
self.agent = CodeAgent(
|
46 |
+
tools=[DuckDuckGoSearchTool()],
|
47 |
+
model=model
|
48 |
+
)
|
49 |
+
print("β
SmoLagent configured successfully with Transformers model")
|
50 |
+
return
|
51 |
+
except Exception as e:
|
52 |
+
print(f"β οΈ Transformers setup failed: {e}")
|
53 |
+
print(" Make sure all required packages are installed")
|
54 |
+
|
55 |
+
if self.agent is None:
|
56 |
+
print("\nβ No AI agent could be configured.")
|
57 |
+
print("π To fix this:")
|
58 |
+
print(" 1. Check internet connection")
|
59 |
+
print(" 2. Install missing packages from requirements.txt")
|
60 |
+
print("\nβ
You can still use all non-AI features!")
|
61 |
+
|
62 |
+
except Exception as e:
|
63 |
+
print(f"β οΈ Agent setup failed: {e}")
|
64 |
+
self.agent = None
|
65 |
+
|
66 |
+
def configure_model_helper(self):
|
67 |
+
"""Helper function to guide model configuration"""
|
68 |
+
print("\nπ€ AI Model Configuration Helper")
|
69 |
+
print("=" * 40)
|
70 |
+
print("1. OpenAI (Recommended - Most capable)")
|
71 |
+
print("2. Ollama (Free - Runs locally)")
|
72 |
+
print("3. Hugging Face (Free - API based)")
|
73 |
+
print("4. Skip AI features")
|
74 |
+
|
75 |
+
choice = input("Choose your model (1-4): ").strip()
|
76 |
+
|
77 |
+
if choice == "1":
|
78 |
+
print("\nπ OpenAI Setup:")
|
79 |
+
print("1. Get API key from: https://platform.openai.com/")
|
80 |
+
print("2. Set environment variable: OPENAI_API_KEY=your_key")
|
81 |
+
print("3. Or edit the setup_agent() method with your key")
|
82 |
+
|
83 |
+
elif choice == "2":
|
84 |
+
print("\nπ Ollama Setup:")
|
85 |
+
print("1. Install Ollama from: https://ollama.ai/")
|
86 |
+
print("2. Run: ollama pull llama2")
|
87 |
+
print("3. Start server: ollama serve")
|
88 |
+
print("4. Script is already configured to use SmoLagents' native OllamaModel")
|
89 |
+
print("5. Just make sure Ollama is running and try the AI analysis!")
|
90 |
+
|
91 |
+
elif choice == "3":
|
92 |
+
print("\nπ Hugging Face Setup:")
|
93 |
+
print("1. Create account at: https://huggingface.co/")
|
94 |
+
print("2. Get token from: https://huggingface.co/settings/tokens")
|
95 |
+
print("3. Set environment variable: HF_TOKEN=your_token")
|
96 |
+
print("4. Uncomment HF lines in setup_agent() method")
|
97 |
+
|
98 |
+
elif choice == "4":
|
99 |
+
print("β
You can still use all non-AI features!")
|
100 |
+
|
101 |
+
print("\nπ‘ Tip: Set environment variables in your system or use a .env file")
|
102 |
+
return choice
|
103 |
+
|
104 |
+
|
105 |
+
def load_data(self):
|
106 |
+
"""Load the CSV data (keeping your original functionality)"""
|
107 |
+
try:
|
108 |
+
# Check if file exists
|
109 |
+
if not os.path.exists(self.csv_path):
|
110 |
+
print(f"Error: File not found at {self.csv_path}")
|
111 |
+
return None
|
112 |
+
|
113 |
+
# Read the CSV file into a DataFrame
|
114 |
+
self.df = pd.read_csv(self.csv_path)
|
115 |
+
|
116 |
+
print("=== DATA LOADED SUCCESSFULLY ===")
|
117 |
+
print(f"Dataset shape: {self.df.shape}")
|
118 |
+
print(f"Columns: {list(self.df.columns)}")
|
119 |
+
print("\n=== FIRST 5 ROWS ===")
|
120 |
+
print(self.df.head())
|
121 |
+
|
122 |
+
print("\n=== DATA TYPES ===")
|
123 |
+
print(self.df.dtypes)
|
124 |
+
|
125 |
+
print("\n=== MISSING VALUES ===")
|
126 |
+
print(self.df.isnull().sum())
|
127 |
+
|
128 |
+
print("\n=== BASIC STATISTICS ===")
|
129 |
+
print(self.df.describe())
|
130 |
+
|
131 |
+
return self.df
|
132 |
+
|
133 |
+
except Exception as e:
|
134 |
+
print(f"Error loading data: {str(e)}")
|
135 |
+
return None
|
136 |
+
|
137 |
+
def create_visualizations(self):
|
138 |
+
"""Create basic visualizations"""
|
139 |
+
if self.df is None:
|
140 |
+
print("β No data loaded. Run load_data() first.")
|
141 |
+
return
|
142 |
+
|
143 |
+
try:
|
144 |
+
# Set up plotting style
|
145 |
+
plt.style.use('seaborn-v0_8' if 'seaborn-v0_8' in plt.style.available else 'default')
|
146 |
+
|
147 |
+
# Get numeric columns
|
148 |
+
numeric_cols = self.df.select_dtypes(include=[np.number]).columns
|
149 |
+
|
150 |
+
if len(numeric_cols) == 0:
|
151 |
+
print("β οΈ No numeric columns found for visualization")
|
152 |
+
return
|
153 |
+
|
154 |
+
print(f"\n=== CREATING VISUALIZATIONS FOR {len(numeric_cols)} NUMERIC COLUMNS ===")
|
155 |
+
|
156 |
+
# 1. Distribution plots
|
157 |
+
n_cols = min(3, len(numeric_cols))
|
158 |
+
n_rows = (len(numeric_cols) + n_cols - 1) // n_cols
|
159 |
+
|
160 |
+
plt.figure(figsize=(15, 5*n_rows))
|
161 |
+
for i, col in enumerate(numeric_cols):
|
162 |
+
plt.subplot(n_rows, n_cols, i+1)
|
163 |
+
self.df[col].hist(bins=30, alpha=0.7, edgecolor='black')
|
164 |
+
plt.title(f'Distribution of {col}')
|
165 |
+
plt.xlabel(col)
|
166 |
+
plt.ylabel('Frequency')
|
167 |
+
|
168 |
+
plt.tight_layout()
|
169 |
+
plt.savefig('data_distributions.png', dpi=300, bbox_inches='tight')
|
170 |
+
plt.show()
|
171 |
+
print("β
Distribution plots saved as 'data_distributions.png'")
|
172 |
+
|
173 |
+
# 2. Correlation heatmap (if more than 1 numeric column)
|
174 |
+
if len(numeric_cols) > 1:
|
175 |
+
plt.figure(figsize=(12, 8))
|
176 |
+
correlation_matrix = self.df[numeric_cols].corr()
|
177 |
+
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
|
178 |
+
square=True, linewidths=0.5)
|
179 |
+
plt.title('Correlation Heatmap')
|
180 |
+
plt.tight_layout()
|
181 |
+
plt.savefig('correlation_heatmap.png', dpi=300, bbox_inches='tight')
|
182 |
+
plt.show()
|
183 |
+
print("β
Correlation heatmap saved as 'correlation_heatmap.png'")
|
184 |
+
|
185 |
+
except Exception as e:
|
186 |
+
print(f"β Error creating visualizations: {e}")
|
187 |
+
|
188 |
+
def analyze_data_quality(self):
|
189 |
+
"""Analyze data quality issues"""
|
190 |
+
if self.df is None:
|
191 |
+
print("β No data loaded. Run load_data() first.")
|
192 |
+
return
|
193 |
+
|
194 |
+
print("\n=== DATA QUALITY ANALYSIS ===")
|
195 |
+
|
196 |
+
# Missing data analysis
|
197 |
+
missing_data = self.df.isnull().sum()
|
198 |
+
missing_percentage = (missing_data / len(self.df)) * 100
|
199 |
+
|
200 |
+
quality_report = pd.DataFrame({
|
201 |
+
'Column': self.df.columns,
|
202 |
+
'Missing_Count': missing_data.values,
|
203 |
+
'Missing_Percentage': missing_percentage.values,
|
204 |
+
'Data_Type': self.df.dtypes.values
|
205 |
+
})
|
206 |
+
|
207 |
+
print("Missing Data Summary:")
|
208 |
+
print(quality_report[quality_report['Missing_Count'] > 0])
|
209 |
+
|
210 |
+
# Duplicate rows
|
211 |
+
duplicates = self.df.duplicated().sum()
|
212 |
+
print(f"\nDuplicate rows: {duplicates}")
|
213 |
+
|
214 |
+
# Memory usage
|
215 |
+
memory_usage = self.df.memory_usage(deep=True).sum() / 1024**2
|
216 |
+
print(f"Memory usage: {memory_usage:.2f} MB")
|
217 |
+
|
218 |
+
return quality_report
|
219 |
+
|
220 |
+
def ai_analysis(self, query):
|
221 |
+
"""Use SmoLagent for AI-powered analysis"""
|
222 |
+
if self.agent is None:
|
223 |
+
print("β AI agent not configured. Please set up SmoLagent first.")
|
224 |
+
return
|
225 |
+
|
226 |
+
if self.df is None:
|
227 |
+
print("β No data loaded. Run load_data() first.")
|
228 |
+
return
|
229 |
+
|
230 |
+
# Prepare context about the dataset
|
231 |
+
data_context = f"""
|
232 |
+
Dataset Analysis Request:
|
233 |
+
- Dataset Shape: {self.df.shape}
|
234 |
+
- Columns: {list(self.df.columns)}
|
235 |
+
- Data Types: {dict(self.df.dtypes)}
|
236 |
+
- Missing Values: {dict(self.df.isnull().sum())}
|
237 |
+
|
238 |
+
Sample Data:
|
239 |
+
{self.df.head(3).to_string()}
|
240 |
+
|
241 |
+
Statistical Summary:
|
242 |
+
{self.df.describe().to_string()}
|
243 |
+
|
244 |
+
User Question: {query}
|
245 |
+
"""
|
246 |
+
|
247 |
+
try:
|
248 |
+
print(f"\n=== AI ANALYSIS FOR: '{query}' ===")
|
249 |
+
print("π€ Processing with SmoLagent...")
|
250 |
+
|
251 |
+
# Use the agent with the data context and query
|
252 |
+
response = self.agent.run(data_context)
|
253 |
+
print("β
AI Analysis Complete:")
|
254 |
+
print(response)
|
255 |
+
return response
|
256 |
+
|
257 |
+
except Exception as e:
|
258 |
+
print(f"β AI analysis failed: {e}")
|
259 |
+
print("π‘ Try using the data visualization and quality analysis features instead!")
|
260 |
+
return None
|
261 |
+
|
262 |
+
def interactive_menu(self):
|
263 |
+
"""Interactive menu for data exploration"""
|
264 |
+
while True:
|
265 |
+
print("\n" + "="*50)
|
266 |
+
print("π€ ENHANCED DATA EXPLORER WITH AI")
|
267 |
+
print("="*50)
|
268 |
+
print("1. Load and explore data")
|
269 |
+
print("2. Create visualizations")
|
270 |
+
print("3. Analyze data quality")
|
271 |
+
print("4. AI-powered analysis")
|
272 |
+
print("5. Show data summary")
|
273 |
+
print("6. Exit")
|
274 |
+
print("="*50)
|
275 |
+
|
276 |
+
choice = input("Enter your choice (1-6): ").strip()
|
277 |
+
|
278 |
+
if choice == '1':
|
279 |
+
self.load_data()
|
280 |
+
elif choice == '2':
|
281 |
+
self.create_visualizations()
|
282 |
+
elif choice == '3':
|
283 |
+
self.analyze_data_quality()
|
284 |
+
elif choice == '4':
|
285 |
+
if self.agent is None:
|
286 |
+
print("\nβ AI features not available. Please configure a model first.")
|
287 |
+
print("Edit the setup_agent() method to add your API keys.")
|
288 |
+
self.configure_model_helper()
|
289 |
+
else:
|
290 |
+
print("\nπ€ AI Analysis - Ask me anything about your data!")
|
291 |
+
print("Example queries:")
|
292 |
+
print(" β’ 'What are the main trends in this data?'")
|
293 |
+
print(" β’ 'Find any outliers or anomalies'")
|
294 |
+
print(" β’ 'Suggest data quality improvements'")
|
295 |
+
print(" β’ 'Perform correlation analysis'")
|
296 |
+
print(" β’ 'Identify seasonal patterns'")
|
297 |
+
print(" β’ 'Recommend preprocessing steps'")
|
298 |
+
|
299 |
+
query = input("\n㪠Your question: ").strip()
|
300 |
+
if query:
|
301 |
+
self.ai_analysis(query)
|
302 |
+
elif choice == '5':
|
303 |
+
if self.df is not None:
|
304 |
+
print(f"\nπ Dataset Summary:")
|
305 |
+
print(f"Shape: {self.df.shape}")
|
306 |
+
print(f"Columns: {list(self.df.columns)}")
|
307 |
+
print(f"Memory: {self.df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
|
308 |
+
else:
|
309 |
+
print("β No data loaded.")
|
310 |
+
elif choice == '6':
|
311 |
+
print("π Goodbye!")
|
312 |
+
break
|
313 |
+
else:
|
314 |
+
print("β Invalid choice. Please try again.")
|
315 |
+
|
316 |
def load_and_explore_data():
|
317 |
+
"""Load and explore the CSV data (keeping your original function)"""
|
318 |
try:
|
319 |
# Check if file exists
|
320 |
if not os.path.exists(csv_file_path):
|
|
|
346 |
return None
|
347 |
|
348 |
if __name__ == "__main__":
|
349 |
+
print("π Enhanced Data Explorer with SmoLagent AI")
|
350 |
+
print("Choose your preferred mode:")
|
351 |
+
print("1. Original function (load_and_explore_data)")
|
352 |
+
print("2. Enhanced interactive mode with AI")
|
353 |
+
|
354 |
+
mode = input("Enter mode (1 or 2): ").strip()
|
355 |
+
|
356 |
+
if mode == "1":
|
357 |
+
# Run your original function
|
358 |
+
df = load_and_explore_data()
|
359 |
+
elif mode == "2":
|
360 |
+
# Run enhanced mode with AI capabilities
|
361 |
+
explorer = EnhancedDataExplorer()
|
362 |
+
explorer.interactive_menu()
|
363 |
+
else:
|
364 |
+
print("Invalid choice. Running original function...")
|
365 |
+
df = load_and_explore_data()
|