File size: 5,960 Bytes
b3b5f1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b8c7ec
 
b3b5f1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b8c7ec
b3b5f1a
 
 
 
 
 
 
2b8c7ec
b3b5f1a
 
 
 
2b8c7ec
 
 
 
 
 
 
 
 
 
 
 
 
 
b3b5f1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b8c7ec
b3b5f1a
 
 
2b8c7ec
 
 
 
b3b5f1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b8c7ec
b3b5f1a
 
 
 
 
 
 
 
2b8c7ec
b3b5f1a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures, StandardScaler


# Load the trained models & transformers
linear_model = joblib.load("linear_model.pkl")
poly_model = joblib.load("poly_model.pkl")
poly_features = joblib.load("poly_features.pkl")
scaler = joblib.load("scaler.pkl")  # Load the saved StandardScaler
knn_model = joblib.load("KNN.pkl")
randforests_model = joblib.load("random_forests.pkl")

# Function to load and preview CSV data
def load_data(file):
    df = pd.read_csv(file)
    print("DEBUG: CSV Data in Gradio:\n", df.head())  # Print first 5 rows
    print("DEBUG: Data Types in Gradio:\n", df.dtypes)  # Check column types
    return df.head()  # Show first 5 rows

# Function to visualize population trends
def plot_population_trend(file, model_choice):
    df = pd.read_csv(file)

    plt.figure(figsize=(8,5))
    sns.scatterplot(x=df.iloc[:, 0], y=df.iloc[:, 1])
    plt.xlabel("Years")
    plt.ylabel("Population")
    plt.title("Population Growth Trend")
    plt.grid()

    plt.figure(figsize=(8,5))
    plt.scatter(df["Year"], df["Population"], label="Actual Population", color="blue", alpha=0.6)

    X = df["Year"].values.reshape(-1, 1)  # Extract Year column

    if model_choice == "Linear Regression":
        X_scaled = scaler.transform(X) 
        predictions = linear_model.predict(X_scaled)
        plt.plot(df["Year"], predictions, label="Linear Regression", color="red", linestyle="dashed")
    elif model_choice == "Polynomial Regression": # Polynomial Regression
        X_scaled = scaler.transform(X)  # Apply scaling
        X_poly = poly_features.transform(X_scaled)  # Transform for Polynomial Regression
        predictions = poly_model.predict(X_poly)
        plt.plot(df["Year"], predictions, label="Polynomial Regression", color="green")
    elif model_choice == "KNN":  # K-Nearest Neighbors (KNN)
            predictions = knn_model.predict(X)
            label = "KNN"
            color = "blue"
            linestyle = "dotted"
            plt.plot(df["Year"], predictions, label="KNN", color="blue")
    else:   #Random Forests 
            predictions = randforests_model.predict(X)
            label = "Random Forests"
            color = "yellow"
            linestyle = "dotted"
            plt.plot(df["Year"], predictions, label="Random Forests", color="yellow")

    

    plt.xlabel("Year")
    plt.ylabel("Population")
    plt.title(f"Population Growth Prediction ({model_choice})")
    plt.legend()
    plt.grid()

    #plt.savefig("population_trend.png")
    #return "population_trend.png"

    plt.savefig("population_trend.png")
    return "population_trend.png"

# Function to predict population using the selected model
def predict_population(file, model_choice):
    df = pd.read_csv(file)

    # Ensure correct column format
    if df.shape[1] < 2:
        return None, "ERROR: CSV must contain two columns (Year, Population).", None
    
    df.columns = ["Year", "Population"]
    df = df.astype({"Year": int, "Population": float})  # Convert data types

    X = df["Year"].values.reshape(-1, 1)  # Extract Year column

    if model_choice == "Linear Regression":
        # Do NOT scale X for Linear Regression
        X_scaled = scaler.transform(X) 
        predictions = linear_model.predict(X_scaled)  
    elif model_choice== "Polynomial Regression":  # Polynomial Regression
        X_scaled = scaler.transform(X)  # Apply the same scaling as training
        X_poly = poly_features.transform(X_scaled)  # Transform for Polynomial Regression
        predictions = poly_model.predict(X_poly)
    elif model_choice == "KNN":
         predictions = knn_model.predict(X)
    else:#random forests 
         predictions = randforests_model.predict(X)


    df["Predicted Population"] = predictions  # Append predictions to DataFrame

    # Extract the test set (2016-2020) before computing MSE & R²
    test_mask = df["Year"].between(2016, 2020)  # Select only test years
    X_test = df.loc[test_mask, "Year"].values.reshape(-1, 1)
    y_test = df.loc[test_mask, "Population"].values
    y_pred_test = df.loc[test_mask, "Predicted Population"].values

    # Compute metrics only on the test set
    mse = mean_squared_error(y_test, y_pred_test)
    r2 = r2_score(y_test, y_pred_test)

    print("DEBUG: Model Choice =", model_choice)
    print("DEBUG: X Values for Prediction:\n", X[:5])  # Print first 5 inputs
    print("DEBUG: Predictions:\n", predictions[:5])  # Print first 5 predictions

    return df, "population_trend.png", f"{model_choice} Results: MSE = {mse:.2f}, R² Score = {r2:.2f}"



# Wrapper function for Gradio
def gradio_interface(file, model_choice):
    preview = load_data(file)
    trend_image = plot_population_trend(file, model_choice)
    predictions, _, performance = predict_population(file, model_choice)
    return preview, trend_image, predictions, performance


# Define the Gradio interface
interface = gr.Interface(
    fn=gradio_interface,  # Use the single wrapper function
    inputs=[
        gr.File(label="Upload CSV File"),
        gr.Radio(["Linear Regression", "Polynomial Regression","KNN", "Random Forests"], label="Choose Model")
    ],
    outputs=[
        gr.Dataframe(label="Preview Data"),
        gr.Image(label="Population Trend"),
        gr.Dataframe(label="Predictions"),
        gr.Textbox(label="Model Performance")
    ],
    title="Population Prediction Tool",
    description="Upload a CSV file with Year and Population data. Choose a model (Linear or Polynomial Regression, KNN or Random Forests) to predict future population trends."
)

# Launch the Gradio App
interface.launch()