import pandas as pd import gradio as gr import matplotlib.pyplot as plt import seaborn as sns import joblib import numpy as np from sklearn.metrics import mean_squared_error, r2_score from sklearn.preprocessing import PolynomialFeatures, StandardScaler # Load the trained models & transformers linear_model = joblib.load("linear_model.pkl") poly_model = joblib.load("poly_model.pkl") poly_features = joblib.load("poly_features.pkl") scaler = joblib.load("scaler.pkl") # Load the saved StandardScaler knn_model = joblib.load("KNN.pkl") randforests_model = joblib.load("random_forests.pkl") # Function to load and preview CSV data def load_data(file): df = pd.read_csv(file) print("DEBUG: CSV Data in Gradio:\n", df.head()) # Print first 5 rows print("DEBUG: Data Types in Gradio:\n", df.dtypes) # Check column types return df.head() # Show first 5 rows # Function to visualize population trends def plot_population_trend(file, model_choice): df = pd.read_csv(file) plt.figure(figsize=(8,5)) sns.scatterplot(x=df.iloc[:, 0], y=df.iloc[:, 1]) plt.xlabel("Years") plt.ylabel("Population") plt.title("Population Growth Trend") plt.grid() plt.figure(figsize=(8,5)) plt.scatter(df["Year"], df["Population"], label="Actual Population", color="blue", alpha=0.6) X = df["Year"].values.reshape(-1, 1) # Extract Year column if model_choice == "Linear Regression": X_scaled = scaler.transform(X) predictions = linear_model.predict(X_scaled) plt.plot(df["Year"], predictions, label="Linear Regression", color="red", linestyle="dashed") elif model_choice == "Polynomial Regression": # Polynomial Regression X_scaled = scaler.transform(X) # Apply scaling X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression predictions = poly_model.predict(X_poly) plt.plot(df["Year"], predictions, label="Polynomial Regression", color="green") elif model_choice == "KNN": # K-Nearest Neighbors (KNN) predictions = knn_model.predict(X) label = "KNN" color = "blue" linestyle = "dotted" plt.plot(df["Year"], predictions, label="KNN", color="blue") else: #Random Forests predictions = randforests_model.predict(X) label = "Random Forests" color = "yellow" linestyle = "dotted" plt.plot(df["Year"], predictions, label="Random Forests", color="yellow") plt.xlabel("Year") plt.ylabel("Population") plt.title(f"Population Growth Prediction ({model_choice})") plt.legend() plt.grid() #plt.savefig("population_trend.png") #return "population_trend.png" plt.savefig("population_trend.png") return "population_trend.png" # Function to predict population using the selected model def predict_population(file, model_choice): df = pd.read_csv(file) # Ensure correct column format if df.shape[1] < 2: return None, "ERROR: CSV must contain two columns (Year, Population).", None df.columns = ["Year", "Population"] df = df.astype({"Year": int, "Population": float}) # Convert data types X = df["Year"].values.reshape(-1, 1) # Extract Year column if model_choice == "Linear Regression": # Do NOT scale X for Linear Regression X_scaled = scaler.transform(X) predictions = linear_model.predict(X_scaled) elif model_choice== "Polynomial Regression": # Polynomial Regression X_scaled = scaler.transform(X) # Apply the same scaling as training X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression predictions = poly_model.predict(X_poly) elif model_choice == "KNN": predictions = knn_model.predict(X) else:#random forests predictions = randforests_model.predict(X) df["Predicted Population"] = predictions # Append predictions to DataFrame # Extract the test set (2016-2020) before computing MSE & R² test_mask = df["Year"].between(2016, 2020) # Select only test years X_test = df.loc[test_mask, "Year"].values.reshape(-1, 1) y_test = df.loc[test_mask, "Population"].values y_pred_test = df.loc[test_mask, "Predicted Population"].values # Compute metrics only on the test set mse = mean_squared_error(y_test, y_pred_test) r2 = r2_score(y_test, y_pred_test) print("DEBUG: Model Choice =", model_choice) print("DEBUG: X Values for Prediction:\n", X[:5]) # Print first 5 inputs print("DEBUG: Predictions:\n", predictions[:5]) # Print first 5 predictions return df, "population_trend.png", f"{model_choice} Results: MSE = {mse:.2f}, R² Score = {r2:.2f}" # Wrapper function for Gradio def gradio_interface(file, model_choice): preview = load_data(file) trend_image = plot_population_trend(file, model_choice) predictions, _, performance = predict_population(file, model_choice) return preview, trend_image, predictions, performance # Define the Gradio interface interface = gr.Interface( fn=gradio_interface, # Use the single wrapper function inputs=[ gr.File(label="Upload CSV File"), gr.Radio(["Linear Regression", "Polynomial Regression","KNN", "Random Forests"], label="Choose Model") ], outputs=[ gr.Dataframe(label="Preview Data"), gr.Image(label="Population Trend"), gr.Dataframe(label="Predictions"), gr.Textbox(label="Model Performance") ], title="Population Prediction Tool", description="Upload a CSV file with Year and Population data. Choose a model (Linear or Polynomial Regression, KNN or Random Forests) to predict future population trends." ) # Launch the Gradio App interface.launch()