raymondEDS commited on
Commit
ae38d1c
·
1 Parent(s): faeb953

Updating lesson 5

Browse files
Data/Submissions.csv ADDED
The diff for this file is too large to render. See raw diff
 
Data/decision.csv ADDED
The diff for this file is too large to render. See raw diff
 
Data/reviews.csv ADDED
The diff for this file is too large to render. See raw diff
 
Data/submission_keyword.csv ADDED
The diff for this file is too large to render. See raw diff
 
app/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/app/__pycache__/__init__.cpython-311.pyc and b/app/__pycache__/__init__.cpython-311.pyc differ
 
app/__pycache__/main.cpython-311.pyc CHANGED
Binary files a/app/__pycache__/main.cpython-311.pyc and b/app/__pycache__/main.cpython-311.pyc differ
 
app/components/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/app/components/__pycache__/__init__.cpython-311.pyc and b/app/components/__pycache__/__init__.cpython-311.pyc differ
 
app/components/__pycache__/login.cpython-311.pyc CHANGED
Binary files a/app/components/__pycache__/login.cpython-311.pyc and b/app/components/__pycache__/login.cpython-311.pyc differ
 
app/main.py CHANGED
@@ -22,6 +22,7 @@ from app.pages import week_1
22
  from app.pages import week_2
23
  from app.pages import week_3
24
  from app.pages import week_4
 
25
  # Page configuration
26
  st.set_page_config(
27
  page_title="Data Science Course App",
@@ -146,6 +147,8 @@ def show_week_content():
146
  week_3.show()
147
  elif st.session_state.current_week == 4:
148
  week_4.show()
 
 
149
  else:
150
  st.warning("Content for this week is not yet available.")
151
 
@@ -158,7 +161,7 @@ def main():
158
  return
159
 
160
  # User is logged in, show course content
161
- if st.session_state.current_week in [1, 2, 3, 4]:
162
  show_week_content()
163
  else:
164
  st.title("Data Science Research Paper Course")
 
22
  from app.pages import week_2
23
  from app.pages import week_3
24
  from app.pages import week_4
25
+ from app.pages import week_5
26
  # Page configuration
27
  st.set_page_config(
28
  page_title="Data Science Course App",
 
147
  week_3.show()
148
  elif st.session_state.current_week == 4:
149
  week_4.show()
150
+ elif st.session_state.current_week == 5:
151
+ week_5.show()
152
  else:
153
  st.warning("Content for this week is not yet available.")
154
 
 
161
  return
162
 
163
  # User is logged in, show course content
164
+ if st.session_state.current_week in [1, 2, 3, 4, 5]:
165
  show_week_content()
166
  else:
167
  st.title("Data Science Research Paper Course")
app/pages/__pycache__/week_1.cpython-311.pyc CHANGED
Binary files a/app/pages/__pycache__/week_1.cpython-311.pyc and b/app/pages/__pycache__/week_1.cpython-311.pyc differ
 
app/pages/__pycache__/week_2.cpython-311.pyc CHANGED
Binary files a/app/pages/__pycache__/week_2.cpython-311.pyc and b/app/pages/__pycache__/week_2.cpython-311.pyc differ
 
app/pages/__pycache__/week_3.cpython-311.pyc CHANGED
Binary files a/app/pages/__pycache__/week_3.cpython-311.pyc and b/app/pages/__pycache__/week_3.cpython-311.pyc differ
 
app/pages/__pycache__/week_4.cpython-311.pyc CHANGED
Binary files a/app/pages/__pycache__/week_4.cpython-311.pyc and b/app/pages/__pycache__/week_4.cpython-311.pyc differ
 
app/pages/__pycache__/week_5.cpython-311.pyc ADDED
Binary file (18.4 kB). View file
 
app/pages/week_5.py CHANGED
@@ -7,6 +7,70 @@ from sklearn.linear_model import LinearRegression
7
  from sklearn.metrics import r2_score
8
  import scipy.stats as stats
9
  from nltk.tokenize import word_tokenize
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def show():
12
  st.title("Week 5: Introduction to Machine Learning and Linear Regression")
@@ -28,7 +92,7 @@ def show():
28
  """)
29
 
30
  # Learning Path
31
- st.subheader("Key Concepts You'll Master")
32
  st.write("""
33
  1. **Linear Regression (线性回归):**
34
  - Definition: A statistical method that models the relationship between a dependent variable and one or more independent variables
@@ -46,226 +110,231 @@ def show():
46
  - Confidence intervals: Range where true coefficient likely lies
47
  """)
48
 
49
- # Module 1: Setting Up Your Data Science Toolkit
50
- st.header("Module 1: Setting Up Your Data Science Toolkit")
51
- st.write("""
52
- Let's start by importing the necessary libraries for our analysis:
53
- """)
54
-
55
- st.code("""
56
- import numpy as np
57
- import pandas as pd
58
- import scipy.stats as stats
59
- import matplotlib.pyplot as plt
60
- import sklearn
61
- from nltk.tokenize import word_tokenize
62
- import seaborn as sns
63
-
64
- # Set up visualization style
65
- sns.set_style("whitegrid")
66
- sns.set_context("poster")
67
- """)
68
-
69
- # Module 2: Loading and Understanding Data
70
- st.header("Module 2: Loading and Understanding Data")
71
- st.write("""
72
- Before diving into analysis, we need to understand our data structure. What information do we have about each review? Each submission?
73
- """)
74
-
75
- if st.button("Load Sample Data"):
76
- # Create sample data for demonstration
77
- sample_reviews = pd.DataFrame({
78
- 'rating_int': [6, 6, 5, 6, 8],
79
- 'confidence_int': [4.0, 4.0, 4.0, 3.0, 3.0],
80
- 'review': [
81
- 'There is a lot of recent work on link-prediction...',
82
- 'Pros: The different attention techniques...',
83
- 'Overview of the paper: This paper studies...',
84
- 'Summary: The authors propose a near minimax...',
85
- 'This paper introduces a GPU-friendly variant...'
86
- ],
87
- 'forum': ['tGZu6DlbreV', 'uKhGRvM8QNH', 'IrM64DGB21', 'ww-7bdU6GA9', 'r1VGvBcxl']
88
- })
89
 
90
- st.write("Sample Reviews Data:")
91
- st.dataframe(sample_reviews)
92
-
93
- # Module 3: Feature Engineering
94
- st.header("Module 3: Feature Engineering")
95
- st.write("""
96
- We'll create features from our text data that can help predict paper acceptance:
97
- - Review length (word count)
98
- - Review rating
99
- - Reviewer confidence
100
- - Number of keywords in the paper
101
- """)
102
-
103
- # Interactive Feature Engineering
104
- st.subheader("Try Feature Engineering")
105
- st.write("""
106
- Let's create some features from a review:
107
- """)
108
-
109
- review_text = st.text_area(
110
- "Enter a review to analyze:",
111
- "This paper introduces a novel approach to machine learning. The methodology is sound and the results are promising.",
112
- key="review_text"
113
- )
114
-
115
- if st.button("Extract Features"):
116
- # Calculate features
117
- word_count = len(word_tokenize(review_text))
118
- sentence_count = len(review_text.split('.'))
119
 
120
- st.write("Extracted Features:")
121
- st.write(f"Word Count: {word_count}")
122
- st.write(f"Sentence Count: {sentence_count}")
123
-
124
- # Module 4: Linear Regression Analysis
125
- st.header("Module 4: Linear Regression Analysis")
126
- st.write("""
127
- Let's build a simple linear regression model to predict paper ratings based on review features.
128
- """)
129
-
130
- # Interactive Regression
131
- st.subheader("Try Linear Regression")
132
- st.write("""
133
- Let's create a simple regression model:
134
- """)
135
-
136
- if st.button("Run Sample Regression"):
137
- # Create sample data
138
- np.random.seed(42)
139
- X = np.random.rand(100, 1) * 10 # Review length
140
- y = 2 * X + np.random.randn(100, 1) * 2 # Rating with some noise
141
 
142
- # Fit regression model
143
- model = LinearRegression()
144
- model.fit(X, y)
 
 
 
 
 
145
 
146
- # Create visualization
147
- plt.figure(figsize=(10, 6))
148
- plt.scatter(X, y, color='blue', alpha=0.5)
149
- plt.plot(X, model.predict(X), color='red', linewidth=2)
150
- plt.xlabel('Review Length')
151
- plt.ylabel('Rating')
152
- plt.title('Linear Regression: Review Length vs Rating')
153
- st.pyplot(plt)
154
 
155
- # Show model metrics
156
- st.write(f"R-squared: {r2_score(y, model.predict(X)):.3f}")
157
- st.write(f"Coefficient: {model.coef_[0][0]:.3f}")
158
- st.write(f"Intercept: {model.intercept_[0]:.3f}")
159
-
160
- # Practice Exercises
161
- st.header("Practice Exercises")
162
-
163
- with st.expander("Exercise 1: Feature Engineering"):
164
  st.write("""
165
- 1. Load the reviews dataset
166
- 2. Create features from review text
167
- 3. Calculate correlation between features
168
- 4. Visualize relationships
 
169
  """)
170
 
171
- st.code("""
172
- # Solution
173
- import pandas as pd
174
- import numpy as np
175
- from nltk.tokenize import word_tokenize
176
-
177
- # Load data
178
- df_reviews = pd.read_csv('reviews.csv')
179
 
180
- # Create features
181
- df_reviews['word_count'] = df_reviews['review'].apply(
182
- lambda x: len(word_tokenize(x)))
183
- df_reviews['sentence_count'] = df_reviews['review'].apply(
184
- lambda x: len(x.split('.')))
 
 
 
 
 
 
 
 
185
 
186
- # Calculate correlation
187
- correlation = df_reviews[['word_count', 'rating_int',
188
- 'confidence_int']].corr()
189
-
190
- # Visualize
191
- sns.heatmap(correlation, annot=True)
192
- plt.show()
193
- """)
194
-
195
- with st.expander("Exercise 2: Building a Predictive Model"):
196
  st.write("""
197
- 1. Prepare features for modeling
198
- 2. Split data into training and test sets
199
- 3. Train a linear regression model
200
- 4. Evaluate model performance
201
  """)
202
 
203
- st.code("""
204
- # Solution
205
- from sklearn.model_selection import train_test_split
206
- from sklearn.linear_model import LinearRegression
207
-
208
- # Prepare features
209
  X = df_reviews[['word_count', 'confidence_int']]
210
  y = df_reviews['rating_int']
211
 
212
- # Split data
213
- X_train, X_test, y_train, y_test = train_test_split(
214
- X, y, test_size=0.2, random_state=42)
215
-
216
- # Train model
217
  model = LinearRegression()
218
- model.fit(X_train, y_train)
219
 
220
- # Evaluate
221
- train_score = model.score(X_train, y_train)
222
- test_score = model.score(X_test, y_test)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
- print(f"Training R²: {train_score:.3f}")
225
- print(f"Testing R²: {test_score:.3f}")
226
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
- # Weekly Assignment
229
- username = st.session_state.get("username", "Student")
230
- st.header(f"{username}'s Weekly Assignment")
231
-
232
- if username == "manxiii":
233
- st.markdown("""
234
- Hello **manxiii**, here is your Assignment 5: Machine Learning Analysis.
235
- 1. Complete the feature engineering pipeline for the ICLR dataset
236
- 2. Build a linear regression model to predict paper ratings
237
- 3. Analyze the relationship between review features and acceptance
238
- 4. Submit your findings in a Jupyter notebook
239
 
240
- **Due Date:** End of Week 5
241
- """)
242
- elif username == "zhu":
243
- st.markdown("""
244
- Hello **zhu**, here is your Assignment 5: Machine Learning Analysis.
245
- 1. Implement the complete machine learning workflow
246
- 2. Create insightful visualizations of model results
247
- 3. Draw conclusions from your analysis
248
- 4. Submit your work in a Jupyter notebook
249
 
250
- **Due Date:** End of Week 5
251
- """)
252
- elif username == "WK":
253
- st.markdown("""
254
- Hello **WK**, here is your Assignment 5: Machine Learning Analysis.
255
- 1. Complete the feature engineering pipeline
256
- 2. Build and evaluate a linear regression model
257
- 3. Analyze patterns in the data
258
- 4. Submit your findings
259
 
260
- **Due Date:** End of Week 5
261
- """)
262
- else:
263
- st.markdown(f"""
264
- Hello **{username}**, here is your Assignment 5: Machine Learning Analysis.
265
- 1. Complete the feature engineering pipeline
266
- 2. Build and evaluate a linear regression model
267
- 3. Analyze patterns in the data
268
- 4. Submit your findings
269
 
270
- **Due Date:** End of Week 5
271
- """)
 
 
 
 
 
7
  from sklearn.metrics import r2_score
8
  import scipy.stats as stats
9
  from nltk.tokenize import word_tokenize
10
+ import plotly.express as px
11
+ import plotly.graph_objects as go
12
+ from pathlib import Path
13
+ import os
14
+
15
+ # Set up the style for all plots
16
+ plt.style.use('default')
17
+ sns.set_theme(style="whitegrid", palette="husl")
18
+
19
+ def load_data():
20
+ """Load and prepare the data"""
21
+ # Get the current file's directory
22
+ current_dir = Path(__file__).parent
23
+
24
+ # Navigate to the Data directory (two levels up from the pages directory)
25
+ data_dir = current_dir.parent.parent / "Data"
26
+
27
+ # Load the datasets
28
+ try:
29
+ df_reviews = pd.read_csv(data_dir / "reviews.csv")
30
+ df_submissions = pd.read_csv(data_dir / "Submissions.csv")
31
+ df_dec = pd.read_csv(data_dir / "decision.csv")
32
+ df_keyword = pd.read_csv(data_dir / "submission_keyword.csv")
33
+
34
+ return df_reviews, df_submissions, df_dec, df_keyword
35
+ except FileNotFoundError as e:
36
+ st.error(f"Data files not found. Please make sure the data files are in the correct location: {data_dir}")
37
+ st.error(f"Error details: {str(e)}")
38
+ return None, None, None, None
39
+
40
+ def create_feature_plot(df, x_col, y_col, title):
41
+ """Create an interactive scatter plot using plotly"""
42
+ fig = px.scatter(df, x=x_col, y=y_col,
43
+ title=title,
44
+ labels={x_col: x_col.replace('_', ' ').title(),
45
+ y_col: y_col.replace('_', ' ').title()},
46
+ template="plotly_white")
47
+ fig.update_layout(
48
+ title_x=0.5,
49
+ title_font_size=20,
50
+ showlegend=True,
51
+ plot_bgcolor='white',
52
+ paper_bgcolor='white'
53
+ )
54
+ return fig
55
+
56
+ def create_correlation_heatmap(df, columns):
57
+ """Create a correlation heatmap using plotly"""
58
+ corr = df[columns].corr()
59
+ fig = go.Figure(data=go.Heatmap(
60
+ z=corr,
61
+ x=corr.columns,
62
+ y=corr.columns,
63
+ colorscale='RdBu',
64
+ zmin=-1, zmax=1
65
+ ))
66
+ fig.update_layout(
67
+ title='Feature Correlation Heatmap',
68
+ title_x=0.5,
69
+ title_font_size=20,
70
+ plot_bgcolor='white',
71
+ paper_bgcolor='white'
72
+ )
73
+ return fig
74
 
75
  def show():
76
  st.title("Week 5: Introduction to Machine Learning and Linear Regression")
 
92
  """)
93
 
94
  # Learning Path
95
+ st.subheader("Key Concepts You'll Learn")
96
  st.write("""
97
  1. **Linear Regression (线性回归):**
98
  - Definition: A statistical method that models the relationship between a dependent variable and one or more independent variables
 
110
  - Confidence intervals: Range where true coefficient likely lies
111
  """)
112
 
113
+ # Load the data
114
+ try:
115
+ df_reviews, df_submissions, df_dec, df_keyword = load_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # Module 1: Data Exploration
118
+ st.header("Module 1: Data Exploration")
119
+ st.write("Let's explore our dataset to understand the review patterns:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ # Create features from review text
122
+ df_reviews['word_count'] = df_reviews['review'].apply(lambda x: len(str(x).split()))
123
+ df_reviews['sentence_count'] = df_reviews['review'].apply(lambda x: len(str(x).split('.')))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ # Show basic statistics
126
+ col1, col2 = st.columns(2)
127
+ with col1:
128
+ st.metric("Total Reviews", len(df_reviews))
129
+ st.metric("Average Rating", f"{df_reviews['rating_int'].mean():.2f}")
130
+ with col2:
131
+ st.metric("Average Word Count", f"{df_reviews['word_count'].mean():.0f}")
132
+ st.metric("Average Confidence", f"{df_reviews['confidence_int'].mean():.2f}")
133
 
134
+ # Create interactive visualizations
135
+ st.subheader("Review Length vs Rating")
136
+ fig = create_feature_plot(df_reviews, 'word_count', 'rating_int',
137
+ 'Relationship between Review Length and Rating')
138
+ st.plotly_chart(fig, use_container_width=True)
 
 
 
139
 
140
+ # Correlation analysis
141
+ st.subheader("Feature Correlations")
142
+ corr_fig = create_correlation_heatmap(df_reviews,
143
+ ['word_count', 'rating_int', 'confidence_int'])
144
+ st.plotly_chart(corr_fig, use_container_width=True)
145
+
146
+ # Module 2: Feature Engineering
147
+ st.header("Module 2: Feature Engineering")
 
148
  st.write("""
149
+ Let's create more sophisticated features from our review data:
150
+ - Review length (word count)
151
+ - Review rating
152
+ - Reviewer confidence
153
+ - Number of keywords in the paper
154
  """)
155
 
156
+ # Interactive Feature Engineering
157
+ st.subheader("Try Feature Engineering")
158
+ review_text = st.text_area(
159
+ "Enter a review to analyze:",
160
+ "This paper introduces a novel approach to machine learning. The methodology is sound and the results are promising.",
161
+ key="review_text"
162
+ )
 
163
 
164
+ if st.button("Extract Features"):
165
+ # Calculate features
166
+ word_count = len(word_tokenize(review_text))
167
+ sentence_count = len(review_text.split('.'))
168
+
169
+ # Create a nice display of features
170
+ col1, col2, col3 = st.columns(3)
171
+ with col1:
172
+ st.metric("Word Count", word_count)
173
+ with col2:
174
+ st.metric("Sentence Count", sentence_count)
175
+ with col3:
176
+ st.metric("Average Words per Sentence", f"{word_count/sentence_count:.1f}")
177
 
178
+ # Module 3: Linear Regression Analysis
179
+ st.header("Module 3: Linear Regression Analysis")
 
 
 
 
 
 
 
 
180
  st.write("""
181
+ Let's build a linear regression model to predict paper ratings based on review features.
 
 
 
182
  """)
183
 
184
+ # Prepare data for modeling
 
 
 
 
 
185
  X = df_reviews[['word_count', 'confidence_int']]
186
  y = df_reviews['rating_int']
187
 
188
+ # Fit regression model
 
 
 
 
189
  model = LinearRegression()
190
+ model.fit(X, y)
191
 
192
+ # Create 3D visualization of the regression
193
+ st.subheader("3D Visualization of Review Features")
194
+ fig = px.scatter_3d(df_reviews.sample(1000),
195
+ x='word_count',
196
+ y='confidence_int',
197
+ z='rating_int',
198
+ title='Review Features in 3D Space',
199
+ labels={
200
+ 'word_count': 'Word Count',
201
+ 'confidence_int': 'Confidence',
202
+ 'rating_int': 'Rating'
203
+ })
204
+ fig.update_layout(
205
+ title_x=0.5,
206
+ title_font_size=20,
207
+ scene = dict(
208
+ xaxis_title='Word Count',
209
+ yaxis_title='Confidence',
210
+ zaxis_title='Rating'
211
+ )
212
+ )
213
+ st.plotly_chart(fig, use_container_width=True)
214
 
215
+ # Show model metrics
216
+ st.subheader("Model Performance")
217
+ col1, col2, col3 = st.columns(3)
218
+ with col1:
219
+ st.metric("R-squared", f"{model.score(X, y):.3f}")
220
+ with col2:
221
+ st.metric("Word Count Coefficient", f"{model.coef_[0]:.3f}")
222
+ with col3:
223
+ st.metric("Confidence Coefficient", f"{model.coef_[1]:.3f}")
224
+
225
+ # Practice Exercises
226
+ st.header("Practice Exercises")
227
+
228
+ with st.expander("Exercise 1: Feature Engineering"):
229
+ st.write("""
230
+ 1. Load the reviews dataset
231
+ 2. Create features from review text
232
+ 3. Calculate correlation between features
233
+ 4. Visualize relationships
234
+ """)
235
+
236
+ st.code("""
237
+ # Solution
238
+ import pandas as pd
239
+ import numpy as np
240
+ from nltk.tokenize import word_tokenize
241
+
242
+ # Load data
243
+ df_reviews = pd.read_csv('reviews.csv')
244
+
245
+ # Create features
246
+ df_reviews['word_count'] = df_reviews['review'].apply(
247
+ lambda x: len(word_tokenize(x)))
248
+ df_reviews['sentence_count'] = df_reviews['review'].apply(
249
+ lambda x: len(x.split('.')))
250
+
251
+ # Calculate correlation
252
+ correlation = df_reviews[['word_count', 'rating_int',
253
+ 'confidence_int']].corr()
254
+
255
+ # Visualize
256
+ sns.heatmap(correlation, annot=True)
257
+ plt.show()
258
+ """)
259
+
260
+ with st.expander("Exercise 2: Building a Predictive Model"):
261
+ st.write("""
262
+ 1. Prepare features for modeling
263
+ 2. Split data into training and test sets
264
+ 3. Train a linear regression model
265
+ 4. Evaluate model performance
266
+ """)
267
+
268
+ st.code("""
269
+ # Solution
270
+ from sklearn.model_selection import train_test_split
271
+ from sklearn.linear_model import LinearRegression
272
+
273
+ # Prepare features
274
+ X = df_reviews[['word_count', 'confidence_int']]
275
+ y = df_reviews['rating_int']
276
+
277
+ # Split data
278
+ X_train, X_test, y_train, y_test = train_test_split(
279
+ X, y, test_size=0.2, random_state=42)
280
+
281
+ # Train model
282
+ model = LinearRegression()
283
+ model.fit(X_train, y_train)
284
+
285
+ # Evaluate
286
+ train_score = model.score(X_train, y_train)
287
+ test_score = model.score(X_test, y_test)
288
+
289
+ print(f"Training R²: {train_score:.3f}")
290
+ print(f"Testing R²: {test_score:.3f}")
291
+ """)
292
 
293
+ # Weekly Assignment
294
+ username = st.session_state.get("username", "Student")
295
+ st.header(f"{username}'s Weekly Assignment")
296
+
297
+ if username == "manxiii":
298
+ st.markdown("""
299
+ Hello **manxiii**, here is your Assignment 5: Machine Learning Analysis.
300
+ 1. Complete the feature engineering pipeline for the ICLR dataset
301
+ 2. Build a linear regression model to predict paper ratings
302
+ 3. Analyze the relationship between review features and acceptance
303
+ 4. Submit your findings in a Jupyter notebook
304
 
305
+ **Due Date:** End of Week 5
306
+ """)
307
+ elif username == "zhu":
308
+ st.markdown("""
309
+ Hello **zhu**, here is your Assignment 5: Machine Learning Analysis.
310
+ 1. Implement the complete machine learning workflow
311
+ 2. Create insightful visualizations of model results
312
+ 3. Draw conclusions from your analysis
313
+ 4. Submit your work in a Jupyter notebook
314
 
315
+ **Due Date:** End of Week 5
316
+ """)
317
+ elif username == "WK":
318
+ st.markdown("""
319
+ Hello **WK**, here is your Assignment 5: Machine Learning Analysis.
320
+ 1. Complete the feature engineering pipeline
321
+ 2. Build and evaluate a linear regression model
322
+ 3. Analyze patterns in the data
323
+ 4. Submit your findings
324
 
325
+ **Due Date:** End of Week 5
326
+ """)
327
+ else:
328
+ st.markdown(f"""
329
+ Hello **{username}**, here is your Assignment 5: Machine Learning Analysis.
330
+ 1. Complete the feature engineering pipeline
331
+ 2. Build and evaluate a linear regression model
332
+ 3. Analyze patterns in the data
333
+ 4. Submit your findings
334
 
335
+ **Due Date:** End of Week 5
336
+ """)
337
+
338
+ except Exception as e:
339
+ st.error(f"Error loading data: {str(e)}")
340
+ st.write("Please make sure the data files are in the correct location.")