Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- app.py +132 -0
- linear_model.pkl +3 -0
- poly_features.pkl +3 -0
- poly_model.pkl +3 -0
- scaler.pkl +3 -0
app.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import seaborn as sns
|
| 5 |
+
import joblib
|
| 6 |
+
import numpy as np
|
| 7 |
+
from sklearn.metrics import mean_squared_error, r2_score
|
| 8 |
+
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# Load the trained models & transformers
|
| 12 |
+
linear_model = joblib.load("linear_model.pkl")
|
| 13 |
+
poly_model = joblib.load("poly_model.pkl")
|
| 14 |
+
poly_features = joblib.load("poly_features.pkl")
|
| 15 |
+
scaler = joblib.load("scaler.pkl") # Load the saved StandardScaler
|
| 16 |
+
|
| 17 |
+
# Function to load and preview CSV data
|
| 18 |
+
def load_data(file):
|
| 19 |
+
df = pd.read_csv(file)
|
| 20 |
+
print("DEBUG: CSV Data in Gradio:\n", df.head()) # Print first 5 rows
|
| 21 |
+
print("DEBUG: Data Types in Gradio:\n", df.dtypes) # Check column types
|
| 22 |
+
return df.head() # Show first 5 rows
|
| 23 |
+
|
| 24 |
+
# Function to visualize population trends
|
| 25 |
+
def plot_population_trend(file, model_choice):
|
| 26 |
+
df = pd.read_csv(file)
|
| 27 |
+
|
| 28 |
+
plt.figure(figsize=(8,5))
|
| 29 |
+
sns.scatterplot(x=df.iloc[:, 0], y=df.iloc[:, 1])
|
| 30 |
+
plt.xlabel("Years")
|
| 31 |
+
plt.ylabel("Population")
|
| 32 |
+
plt.title("Population Growth Trend")
|
| 33 |
+
plt.grid()
|
| 34 |
+
|
| 35 |
+
plt.figure(figsize=(8,5))
|
| 36 |
+
plt.scatter(df["Year"], df["Total_Population"], label="Actual Population", color="blue", alpha=0.6)
|
| 37 |
+
|
| 38 |
+
X = df["Year"].values.reshape(-1, 1) # Extract Year column
|
| 39 |
+
|
| 40 |
+
if model_choice == "Linear Regression":
|
| 41 |
+
X_scaled = scaler.transform(X)
|
| 42 |
+
predictions = linear_model.predict(X_scaled)
|
| 43 |
+
plt.plot(df["Year"], predictions, label="Linear Regression", color="red", linestyle="dashed")
|
| 44 |
+
else: # Polynomial Regression
|
| 45 |
+
X_scaled = scaler.transform(X) # Apply scaling
|
| 46 |
+
X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression
|
| 47 |
+
predictions = poly_model.predict(X_poly)
|
| 48 |
+
plt.plot(df["Year"], predictions, label="Polynomial Regression", color="green")
|
| 49 |
+
|
| 50 |
+
plt.xlabel("Year")
|
| 51 |
+
plt.ylabel("Population")
|
| 52 |
+
plt.title(f"Population Growth Prediction ({model_choice})")
|
| 53 |
+
plt.legend()
|
| 54 |
+
plt.grid()
|
| 55 |
+
|
| 56 |
+
#plt.savefig("population_trend.png")
|
| 57 |
+
#return "population_trend.png"
|
| 58 |
+
|
| 59 |
+
plt.savefig("population_trend.png")
|
| 60 |
+
return "population_trend.png"
|
| 61 |
+
|
| 62 |
+
# Function to predict population using the selected model
|
| 63 |
+
def predict_population(file, model_choice):
|
| 64 |
+
df = pd.read_csv(file)
|
| 65 |
+
|
| 66 |
+
# Ensure correct column format
|
| 67 |
+
if df.shape[1] < 2:
|
| 68 |
+
return None, "ERROR: CSV must contain two columns (Year, Population).", None
|
| 69 |
+
|
| 70 |
+
df.columns = ["Year", "Population"]
|
| 71 |
+
df = df.astype({"Year": int, "Population": float}) # Convert data types
|
| 72 |
+
|
| 73 |
+
X = df["Year"].values.reshape(-1, 1) # Extract Year column
|
| 74 |
+
|
| 75 |
+
if model_choice == "Linear Regression":
|
| 76 |
+
# Do NOT scale X for Linear Regression
|
| 77 |
+
X_scaled = scaler.transform(X)
|
| 78 |
+
predictions = linear_model.predict(X_scaled)
|
| 79 |
+
else: # Polynomial Regression
|
| 80 |
+
X_scaled = scaler.transform(X) # Apply the same scaling as training
|
| 81 |
+
X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression
|
| 82 |
+
predictions = poly_model.predict(X_poly)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
df["Predicted Population"] = predictions # Append predictions to DataFrame
|
| 87 |
+
|
| 88 |
+
# Extract the test set (2016-2020) before computing MSE & R²
|
| 89 |
+
test_mask = df["Year"].between(2016, 2020) # Select only test years
|
| 90 |
+
X_test = df.loc[test_mask, "Year"].values.reshape(-1, 1)
|
| 91 |
+
y_test = df.loc[test_mask, "Population"].values
|
| 92 |
+
y_pred_test = df.loc[test_mask, "Predicted Population"].values
|
| 93 |
+
|
| 94 |
+
# Compute metrics only on the test set
|
| 95 |
+
mse = mean_squared_error(y_test, y_pred_test)
|
| 96 |
+
r2 = r2_score(y_test, y_pred_test)
|
| 97 |
+
|
| 98 |
+
print("DEBUG: Model Choice =", model_choice)
|
| 99 |
+
print("DEBUG: X Values for Prediction:\n", X[:5]) # Print first 5 inputs
|
| 100 |
+
print("DEBUG: Predictions:\n", predictions[:5]) # Print first 5 predictions
|
| 101 |
+
|
| 102 |
+
return df, "population_trend.png", f"{model_choice} Results: MSE = {mse:.2f}, R² Score = {r2:.2f}"
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
# Wrapper function for Gradio
|
| 107 |
+
def gradio_interface(file, model_choice):
|
| 108 |
+
preview = load_data(file)
|
| 109 |
+
trend_image = plot_population_trend(file, model_choice)
|
| 110 |
+
predictions, _, performance = predict_population(file, model_choice)
|
| 111 |
+
return preview, trend_image, predictions, performance
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# Define the Gradio interface
|
| 115 |
+
interface = gr.Interface(
|
| 116 |
+
fn=gradio_interface, # Use the single wrapper function
|
| 117 |
+
inputs=[
|
| 118 |
+
gr.File(label="Upload CSV File"),
|
| 119 |
+
gr.Radio(["Linear Regression", "Polynomial Regression"], label="Choose Model")
|
| 120 |
+
],
|
| 121 |
+
outputs=[
|
| 122 |
+
gr.Dataframe(label="Preview Data"),
|
| 123 |
+
gr.Image(label="Population Trend"),
|
| 124 |
+
gr.Dataframe(label="Predictions"),
|
| 125 |
+
gr.Textbox(label="Model Performance")
|
| 126 |
+
],
|
| 127 |
+
title="Population Prediction Tool",
|
| 128 |
+
description="Upload a CSV file with Year and Population data. Choose a model (Linear or Polynomial Regression) to predict future population trends."
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
# Launch the Gradio App
|
| 132 |
+
interface.launch()
|
linear_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e7b8ab6a96218c4a23bf8050255dae8cf468bb005574b1ecaffe2de4c3d70d4
|
| 3 |
+
size 561
|
poly_features.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55a6ff11301e34be39169450c78b5f545a4737236f6a226d315d378f47e7fa87
|
| 3 |
+
size 255
|
poly_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4195848735a33af474b374c688a41aad5ed9b29839c2c95858a3cdfd7f12ac0f
|
| 3 |
+
size 617
|
scaler.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e664c813a5425937bb9a229017322def3636acfd71d341f8ce0a06ad528c2fd
|
| 3 |
+
size 623
|