import streamlit as st
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load dataset
file_path = "student-por.csv"
data = pd.read_csv(file_path, sep=';')

# Select features and target
features = ["studytime", "absences", "G1", "G2"]
X = data[features]
y = data["G3"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Save model
joblib.dump(model, "student_performance_model.pkl")

# Load model
model = joblib.load("student_performance_model.pkl")

# Streamlit App
st.title("📚 Student Performance Predictor")
st.write("Predict final exam scores based on study time, absences, and previous grades.")

# Input fields
studytime = st.number_input("Study Time (hours per week)", min_value=0, max_value=20, value=5)
absences = st.number_input("Number of Absences", min_value=0, max_value=100, value=2)
G1 = st.number_input("Grade 1", min_value=0, max_value=20, value=10)
G2 = st.number_input("Grade 2", min_value=0, max_value=20, value=10)

# Prediction function
if st.button("Predict Final Grade"):
    input_data = np.array([[studytime, absences, G1, G2]])
    prediction = model.predict(input_data)[0]
    st.success(f"Predicted Final Grade: {round(prediction, 2)}")