antitheft159 commited on
Commit
f75de9a
·
verified ·
1 Parent(s): 683ec33

Upload application21.py

Browse files
Files changed (1) hide show
  1. application21.py +131 -0
application21.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ast import increment_lineno
2
+ from statistics import LinearRegression
3
+ import numpy as np
4
+ import pandas as pd
5
+ import matplotlib
6
+ matplotlib.use('Agg')
7
+ import matplotlib.pyplot as plt
8
+ plt.switch_backend('Agg')
9
+ import seaborn as sns
10
+ import warnings
11
+ warnings.filterwarnings('ignore')
12
+
13
+ sns.set(style='whitegrid')
14
+
15
+ print('Import and setup completed successfully.')
16
+
17
+ file_path = ''
18
+
19
+ file_path = r'C:\Users\Donte Patton\Downloads\dataset_2191_sleep.csv'
20
+ df = pd.read_csv(file_path, encoding='ascii', delimiter=',')
21
+
22
+ print('Dataset loaded successfull. Showing first few rows:')
23
+ print(df.head())
24
+
25
+ print('Dataset Info:')
26
+ df.info()
27
+
28
+ print('\nMissing values in each column:')
29
+ print(df.isnull().sum())
30
+
31
+ df.dropna(inplace=True)
32
+ print('\nDataframe shape after dropping missing values:', df.shape)
33
+
34
+ # Removed Year conversion as the column doesn't exist in the dataset
35
+
36
+ print('\nData types after conversion:')
37
+ print(df.dtypes)
38
+
39
+ numeric_df = df.select_dtypes(include=[np.number])
40
+
41
+ if numeric_df.shape[1] >= 4:
42
+ plt.figure(figsize=(12, 10))
43
+ corr = numeric_df.corr()
44
+ sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
45
+ plt.title('Correlation Heatmap of Numeric Variables')
46
+ plt.show()
47
+ else:
48
+ print('Not enough numeric columns for a correlation heatmap.')
49
+
50
+ # Using available numeric columns for pairplot
51
+ numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
52
+ if len(numeric_cols) > 1:
53
+ sns.pairplot(df[numeric_cols])
54
+ plt.suptitle('Pair Plot of Numeric Features', y=1.02)
55
+ plt.show()
56
+ else:
57
+ print('Not enough numeric columns for pair plot.')
58
+
59
+ # Plotting distribution of body_weight instead of CO2
60
+ plt.figure(figsize=(8, 6))
61
+ sns.histplot(df['body_weight'], kde=True, bins=30)
62
+ plt.title('Distribution of Body Weight')
63
+ plt.xlabel('Body Weight (kg)')
64
+ plt.ylabel('Frequency')
65
+ plt.show()
66
+
67
+ # Plotting mean body weight by predation index
68
+ plt.figure(figsize=(10, 6))
69
+ body_weight_by_predation = df.groupby('predation_index')['body_weight'].mean().reset_index()
70
+ sns.barplot(x='predation_index', y='body_weight', data=body_weight_by_predation, palette='viridis')
71
+ plt.title('Average Body Weight by Predation Index')
72
+ plt.xlabel('Predation Index')
73
+ plt.ylabel('Average Body Weight (kg)')
74
+ plt.show()
75
+
76
+ # Create a count plot for predation_index instead of Emissions Category
77
+ plt.figure(figsize=(8, 6))
78
+ sns.countplot(x='predation_index', data=df, palette='Set2')
79
+ plt.title('Count of Records by Predation Index')
80
+ plt.xlabel('Predation Index')
81
+ plt.ylabel('Count')
82
+ plt.show()
83
+
84
+ from sklearn.model_selection import train_test_split
85
+ from sklearn.linear_model import LinearRegression
86
+ from sklearn.metrics import r2_score, mean_squared_error
87
+
88
+ # Update features to use existing numeric columns
89
+ features = ['body_weight', 'brain_weight', 'predation_index', 'sleep_exposure_index', 'danger_index']
90
+
91
+ # Convert string columns to numeric where needed
92
+ model_df = df.copy()
93
+
94
+ # Convert total_sleep to numeric (it's currently an object/string)
95
+ model_df['total_sleep'] = pd.to_numeric(model_df['total_sleep'], errors='coerce')
96
+
97
+ # Drop any rows with missing values
98
+ model_df = model_df.dropna()
99
+
100
+ # Use available numeric features for prediction
101
+ # We'll predict 'total_sleep' using other numeric features
102
+ X = model_df[['body_weight', 'brain_weight', 'predation_index', 'sleep_exposure_index', 'danger_index']]
103
+ y = model_df['total_sleep']
104
+
105
+ # Split the data
106
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
107
+
108
+ print('Training set shape:', X_train.shape)
109
+ print('Testing set shape:', X_test.shape)
110
+
111
+ # Train the model
112
+ regressor = LinearRegression()
113
+ regressor.fit(X_train, y_train)
114
+
115
+ # Make predictions
116
+ y_pred = regressor.predict(X_test)
117
+
118
+ r2 = r2_score(y_test, y_pred)
119
+ rmse = np.sqrt(mean_squared_error(y_test, y_pred))
120
+
121
+ print(f'R^2 score for the predictor: {r2:.3f}')
122
+ print(f'RMSE for the predictor: {rmse:.3f}')
123
+
124
+ plt.figure(figsize=(8, 6))
125
+ plt.scatter(y_test, y_pred, alpha=0.5, color='teal')
126
+ plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
127
+ plt.xlabel('Actual CO2')
128
+ plt.ylabel('Predicted CO2')
129
+ plt.title('Actual vs Predicted CO2 Emissions')
130
+ plt.show()
131
+