Spaces:

cpv2280
/

population-prediction

Sleeping

App Files Files Community

population-prediction / app.py

cpv2280

Upload 3 files

2b8c7ec verified 7 months ago

raw

history blame contribute delete

5.96 kB

	import pandas as pd
	import gradio as gr
	import matplotlib.pyplot as plt
	import seaborn as sns
	import joblib
	import numpy as np
	from sklearn.metrics import mean_squared_error, r2_score
	from sklearn.preprocessing import PolynomialFeatures, StandardScaler


	# Load the trained models & transformers
	linear_model = joblib.load("linear_model.pkl")
	poly_model = joblib.load("poly_model.pkl")
	poly_features = joblib.load("poly_features.pkl")
	scaler = joblib.load("scaler.pkl") # Load the saved StandardScaler
	knn_model = joblib.load("KNN.pkl")
	randforests_model = joblib.load("random_forests.pkl")

	# Function to load and preview CSV data
	def load_data(file):
	df = pd.read_csv(file)
	print("DEBUG: CSV Data in Gradio:\n", df.head()) # Print first 5 rows
	print("DEBUG: Data Types in Gradio:\n", df.dtypes) # Check column types
	return df.head() # Show first 5 rows

	# Function to visualize population trends
	def plot_population_trend(file, model_choice):
	df = pd.read_csv(file)

	plt.figure(figsize=(8,5))
	sns.scatterplot(x=df.iloc[:, 0], y=df.iloc[:, 1])
	plt.xlabel("Years")
	plt.ylabel("Population")
	plt.title("Population Growth Trend")
	plt.grid()

	plt.figure(figsize=(8,5))
	plt.scatter(df["Year"], df["Population"], label="Actual Population", color="blue", alpha=0.6)

	X = df["Year"].values.reshape(-1, 1) # Extract Year column

	if model_choice == "Linear Regression":
	X_scaled = scaler.transform(X)
	predictions = linear_model.predict(X_scaled)
	plt.plot(df["Year"], predictions, label="Linear Regression", color="red", linestyle="dashed")
	elif model_choice == "Polynomial Regression": # Polynomial Regression
	X_scaled = scaler.transform(X) # Apply scaling
	X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression
	predictions = poly_model.predict(X_poly)
	plt.plot(df["Year"], predictions, label="Polynomial Regression", color="green")
	elif model_choice == "KNN": # K-Nearest Neighbors (KNN)
	predictions = knn_model.predict(X)
	label = "KNN"
	color = "blue"
	linestyle = "dotted"
	plt.plot(df["Year"], predictions, label="KNN", color="blue")
	else: #Random Forests
	predictions = randforests_model.predict(X)
	label = "Random Forests"
	color = "yellow"
	linestyle = "dotted"
	plt.plot(df["Year"], predictions, label="Random Forests", color="yellow")



	plt.xlabel("Year")
	plt.ylabel("Population")
	plt.title(f"Population Growth Prediction ({model_choice})")
	plt.legend()
	plt.grid()

	#plt.savefig("population_trend.png")
	#return "population_trend.png"

	plt.savefig("population_trend.png")
	return "population_trend.png"

	# Function to predict population using the selected model
	def predict_population(file, model_choice):
	df = pd.read_csv(file)

	# Ensure correct column format
	if df.shape[1] < 2:
	return None, "ERROR: CSV must contain two columns (Year, Population).", None

	df.columns = ["Year", "Population"]
	df = df.astype({"Year": int, "Population": float}) # Convert data types

	X = df["Year"].values.reshape(-1, 1) # Extract Year column

	if model_choice == "Linear Regression":
	# Do NOT scale X for Linear Regression
	X_scaled = scaler.transform(X)
	predictions = linear_model.predict(X_scaled)
	elif model_choice== "Polynomial Regression": # Polynomial Regression
	X_scaled = scaler.transform(X) # Apply the same scaling as training
	X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression
	predictions = poly_model.predict(X_poly)
	elif model_choice == "KNN":
	predictions = knn_model.predict(X)
	else:#random forests
	predictions = randforests_model.predict(X)


	df["Predicted Population"] = predictions # Append predictions to DataFrame

	# Extract the test set (2016-2020) before computing MSE & R²
	test_mask = df["Year"].between(2016, 2020) # Select only test years
	X_test = df.loc[test_mask, "Year"].values.reshape(-1, 1)
	y_test = df.loc[test_mask, "Population"].values
	y_pred_test = df.loc[test_mask, "Predicted Population"].values

	# Compute metrics only on the test set
	mse = mean_squared_error(y_test, y_pred_test)
	r2 = r2_score(y_test, y_pred_test)

	print("DEBUG: Model Choice =", model_choice)
	print("DEBUG: X Values for Prediction:\n", X[:5]) # Print first 5 inputs
	print("DEBUG: Predictions:\n", predictions[:5]) # Print first 5 predictions

	return df, "population_trend.png", f"{model_choice} Results: MSE = {mse:.2f}, R² Score = {r2:.2f}"



	# Wrapper function for Gradio
	def gradio_interface(file, model_choice):
	preview = load_data(file)
	trend_image = plot_population_trend(file, model_choice)
	predictions, _, performance = predict_population(file, model_choice)
	return preview, trend_image, predictions, performance


	# Define the Gradio interface
	interface = gr.Interface(
	fn=gradio_interface, # Use the single wrapper function
	inputs=[
	gr.File(label="Upload CSV File"),
	gr.Radio(["Linear Regression", "Polynomial Regression","KNN", "Random Forests"], label="Choose Model")
	],
	outputs=[
	gr.Dataframe(label="Preview Data"),
	gr.Image(label="Population Trend"),
	gr.Dataframe(label="Predictions"),
	gr.Textbox(label="Model Performance")
	],
	title="Population Prediction Tool",
	description="Upload a CSV file with Year and Population data. Choose a model (Linear or Polynomial Regression, KNN or Random Forests) to predict future population trends."
	)

	# Launch the Gradio App
	interface.launch()