cpv2280's picture
Upload 3 files
2b8c7ec verified
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
# Load the trained models & transformers
linear_model = joblib.load("linear_model.pkl")
poly_model = joblib.load("poly_model.pkl")
poly_features = joblib.load("poly_features.pkl")
scaler = joblib.load("scaler.pkl") # Load the saved StandardScaler
knn_model = joblib.load("KNN.pkl")
randforests_model = joblib.load("random_forests.pkl")
# Function to load and preview CSV data
def load_data(file):
df = pd.read_csv(file)
print("DEBUG: CSV Data in Gradio:\n", df.head()) # Print first 5 rows
print("DEBUG: Data Types in Gradio:\n", df.dtypes) # Check column types
return df.head() # Show first 5 rows
# Function to visualize population trends
def plot_population_trend(file, model_choice):
df = pd.read_csv(file)
plt.figure(figsize=(8,5))
sns.scatterplot(x=df.iloc[:, 0], y=df.iloc[:, 1])
plt.xlabel("Years")
plt.ylabel("Population")
plt.title("Population Growth Trend")
plt.grid()
plt.figure(figsize=(8,5))
plt.scatter(df["Year"], df["Population"], label="Actual Population", color="blue", alpha=0.6)
X = df["Year"].values.reshape(-1, 1) # Extract Year column
if model_choice == "Linear Regression":
X_scaled = scaler.transform(X)
predictions = linear_model.predict(X_scaled)
plt.plot(df["Year"], predictions, label="Linear Regression", color="red", linestyle="dashed")
elif model_choice == "Polynomial Regression": # Polynomial Regression
X_scaled = scaler.transform(X) # Apply scaling
X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression
predictions = poly_model.predict(X_poly)
plt.plot(df["Year"], predictions, label="Polynomial Regression", color="green")
elif model_choice == "KNN": # K-Nearest Neighbors (KNN)
predictions = knn_model.predict(X)
label = "KNN"
color = "blue"
linestyle = "dotted"
plt.plot(df["Year"], predictions, label="KNN", color="blue")
else: #Random Forests
predictions = randforests_model.predict(X)
label = "Random Forests"
color = "yellow"
linestyle = "dotted"
plt.plot(df["Year"], predictions, label="Random Forests", color="yellow")
plt.xlabel("Year")
plt.ylabel("Population")
plt.title(f"Population Growth Prediction ({model_choice})")
plt.legend()
plt.grid()
#plt.savefig("population_trend.png")
#return "population_trend.png"
plt.savefig("population_trend.png")
return "population_trend.png"
# Function to predict population using the selected model
def predict_population(file, model_choice):
df = pd.read_csv(file)
# Ensure correct column format
if df.shape[1] < 2:
return None, "ERROR: CSV must contain two columns (Year, Population).", None
df.columns = ["Year", "Population"]
df = df.astype({"Year": int, "Population": float}) # Convert data types
X = df["Year"].values.reshape(-1, 1) # Extract Year column
if model_choice == "Linear Regression":
# Do NOT scale X for Linear Regression
X_scaled = scaler.transform(X)
predictions = linear_model.predict(X_scaled)
elif model_choice== "Polynomial Regression": # Polynomial Regression
X_scaled = scaler.transform(X) # Apply the same scaling as training
X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression
predictions = poly_model.predict(X_poly)
elif model_choice == "KNN":
predictions = knn_model.predict(X)
else:#random forests
predictions = randforests_model.predict(X)
df["Predicted Population"] = predictions # Append predictions to DataFrame
# Extract the test set (2016-2020) before computing MSE & R²
test_mask = df["Year"].between(2016, 2020) # Select only test years
X_test = df.loc[test_mask, "Year"].values.reshape(-1, 1)
y_test = df.loc[test_mask, "Population"].values
y_pred_test = df.loc[test_mask, "Predicted Population"].values
# Compute metrics only on the test set
mse = mean_squared_error(y_test, y_pred_test)
r2 = r2_score(y_test, y_pred_test)
print("DEBUG: Model Choice =", model_choice)
print("DEBUG: X Values for Prediction:\n", X[:5]) # Print first 5 inputs
print("DEBUG: Predictions:\n", predictions[:5]) # Print first 5 predictions
return df, "population_trend.png", f"{model_choice} Results: MSE = {mse:.2f}, R² Score = {r2:.2f}"
# Wrapper function for Gradio
def gradio_interface(file, model_choice):
preview = load_data(file)
trend_image = plot_population_trend(file, model_choice)
predictions, _, performance = predict_population(file, model_choice)
return preview, trend_image, predictions, performance
# Define the Gradio interface
interface = gr.Interface(
fn=gradio_interface, # Use the single wrapper function
inputs=[
gr.File(label="Upload CSV File"),
gr.Radio(["Linear Regression", "Polynomial Regression","KNN", "Random Forests"], label="Choose Model")
],
outputs=[
gr.Dataframe(label="Preview Data"),
gr.Image(label="Population Trend"),
gr.Dataframe(label="Predictions"),
gr.Textbox(label="Model Performance")
],
title="Population Prediction Tool",
description="Upload a CSV file with Year and Population data. Choose a model (Linear or Polynomial Regression, KNN or Random Forests) to predict future population trends."
)
# Launch the Gradio App
interface.launch()