Spaces:
Sleeping
Sleeping
File size: 5,960 Bytes
b3b5f1a 2b8c7ec b3b5f1a 2b8c7ec b3b5f1a 2b8c7ec b3b5f1a 2b8c7ec b3b5f1a 2b8c7ec b3b5f1a 2b8c7ec b3b5f1a 2b8c7ec b3b5f1a 2b8c7ec b3b5f1a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
# Load the trained models & transformers
linear_model = joblib.load("linear_model.pkl")
poly_model = joblib.load("poly_model.pkl")
poly_features = joblib.load("poly_features.pkl")
scaler = joblib.load("scaler.pkl") # Load the saved StandardScaler
knn_model = joblib.load("KNN.pkl")
randforests_model = joblib.load("random_forests.pkl")
# Function to load and preview CSV data
def load_data(file):
df = pd.read_csv(file)
print("DEBUG: CSV Data in Gradio:\n", df.head()) # Print first 5 rows
print("DEBUG: Data Types in Gradio:\n", df.dtypes) # Check column types
return df.head() # Show first 5 rows
# Function to visualize population trends
def plot_population_trend(file, model_choice):
df = pd.read_csv(file)
plt.figure(figsize=(8,5))
sns.scatterplot(x=df.iloc[:, 0], y=df.iloc[:, 1])
plt.xlabel("Years")
plt.ylabel("Population")
plt.title("Population Growth Trend")
plt.grid()
plt.figure(figsize=(8,5))
plt.scatter(df["Year"], df["Population"], label="Actual Population", color="blue", alpha=0.6)
X = df["Year"].values.reshape(-1, 1) # Extract Year column
if model_choice == "Linear Regression":
X_scaled = scaler.transform(X)
predictions = linear_model.predict(X_scaled)
plt.plot(df["Year"], predictions, label="Linear Regression", color="red", linestyle="dashed")
elif model_choice == "Polynomial Regression": # Polynomial Regression
X_scaled = scaler.transform(X) # Apply scaling
X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression
predictions = poly_model.predict(X_poly)
plt.plot(df["Year"], predictions, label="Polynomial Regression", color="green")
elif model_choice == "KNN": # K-Nearest Neighbors (KNN)
predictions = knn_model.predict(X)
label = "KNN"
color = "blue"
linestyle = "dotted"
plt.plot(df["Year"], predictions, label="KNN", color="blue")
else: #Random Forests
predictions = randforests_model.predict(X)
label = "Random Forests"
color = "yellow"
linestyle = "dotted"
plt.plot(df["Year"], predictions, label="Random Forests", color="yellow")
plt.xlabel("Year")
plt.ylabel("Population")
plt.title(f"Population Growth Prediction ({model_choice})")
plt.legend()
plt.grid()
#plt.savefig("population_trend.png")
#return "population_trend.png"
plt.savefig("population_trend.png")
return "population_trend.png"
# Function to predict population using the selected model
def predict_population(file, model_choice):
df = pd.read_csv(file)
# Ensure correct column format
if df.shape[1] < 2:
return None, "ERROR: CSV must contain two columns (Year, Population).", None
df.columns = ["Year", "Population"]
df = df.astype({"Year": int, "Population": float}) # Convert data types
X = df["Year"].values.reshape(-1, 1) # Extract Year column
if model_choice == "Linear Regression":
# Do NOT scale X for Linear Regression
X_scaled = scaler.transform(X)
predictions = linear_model.predict(X_scaled)
elif model_choice== "Polynomial Regression": # Polynomial Regression
X_scaled = scaler.transform(X) # Apply the same scaling as training
X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression
predictions = poly_model.predict(X_poly)
elif model_choice == "KNN":
predictions = knn_model.predict(X)
else:#random forests
predictions = randforests_model.predict(X)
df["Predicted Population"] = predictions # Append predictions to DataFrame
# Extract the test set (2016-2020) before computing MSE & R²
test_mask = df["Year"].between(2016, 2020) # Select only test years
X_test = df.loc[test_mask, "Year"].values.reshape(-1, 1)
y_test = df.loc[test_mask, "Population"].values
y_pred_test = df.loc[test_mask, "Predicted Population"].values
# Compute metrics only on the test set
mse = mean_squared_error(y_test, y_pred_test)
r2 = r2_score(y_test, y_pred_test)
print("DEBUG: Model Choice =", model_choice)
print("DEBUG: X Values for Prediction:\n", X[:5]) # Print first 5 inputs
print("DEBUG: Predictions:\n", predictions[:5]) # Print first 5 predictions
return df, "population_trend.png", f"{model_choice} Results: MSE = {mse:.2f}, R² Score = {r2:.2f}"
# Wrapper function for Gradio
def gradio_interface(file, model_choice):
preview = load_data(file)
trend_image = plot_population_trend(file, model_choice)
predictions, _, performance = predict_population(file, model_choice)
return preview, trend_image, predictions, performance
# Define the Gradio interface
interface = gr.Interface(
fn=gradio_interface, # Use the single wrapper function
inputs=[
gr.File(label="Upload CSV File"),
gr.Radio(["Linear Regression", "Polynomial Regression","KNN", "Random Forests"], label="Choose Model")
],
outputs=[
gr.Dataframe(label="Preview Data"),
gr.Image(label="Population Trend"),
gr.Dataframe(label="Predictions"),
gr.Textbox(label="Model Performance")
],
title="Population Prediction Tool",
description="Upload a CSV file with Year and Population data. Choose a model (Linear or Polynomial Regression, KNN or Random Forests) to predict future population trends."
)
# Launch the Gradio App
interface.launch()
|