cpv2280 commited on
Commit
b3b5f1a
·
verified ·
1 Parent(s): 169a0e5

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +132 -0
  2. linear_model.pkl +3 -0
  3. poly_features.pkl +3 -0
  4. poly_model.pkl +3 -0
  5. scaler.pkl +3 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import joblib
6
+ import numpy as np
7
+ from sklearn.metrics import mean_squared_error, r2_score
8
+ from sklearn.preprocessing import PolynomialFeatures, StandardScaler
9
+
10
+
11
+ # Load the trained models & transformers
12
+ linear_model = joblib.load("linear_model.pkl")
13
+ poly_model = joblib.load("poly_model.pkl")
14
+ poly_features = joblib.load("poly_features.pkl")
15
+ scaler = joblib.load("scaler.pkl") # Load the saved StandardScaler
16
+
17
+ # Function to load and preview CSV data
18
+ def load_data(file):
19
+ df = pd.read_csv(file)
20
+ print("DEBUG: CSV Data in Gradio:\n", df.head()) # Print first 5 rows
21
+ print("DEBUG: Data Types in Gradio:\n", df.dtypes) # Check column types
22
+ return df.head() # Show first 5 rows
23
+
24
+ # Function to visualize population trends
25
+ def plot_population_trend(file, model_choice):
26
+ df = pd.read_csv(file)
27
+
28
+ plt.figure(figsize=(8,5))
29
+ sns.scatterplot(x=df.iloc[:, 0], y=df.iloc[:, 1])
30
+ plt.xlabel("Years")
31
+ plt.ylabel("Population")
32
+ plt.title("Population Growth Trend")
33
+ plt.grid()
34
+
35
+ plt.figure(figsize=(8,5))
36
+ plt.scatter(df["Year"], df["Total_Population"], label="Actual Population", color="blue", alpha=0.6)
37
+
38
+ X = df["Year"].values.reshape(-1, 1) # Extract Year column
39
+
40
+ if model_choice == "Linear Regression":
41
+ X_scaled = scaler.transform(X)
42
+ predictions = linear_model.predict(X_scaled)
43
+ plt.plot(df["Year"], predictions, label="Linear Regression", color="red", linestyle="dashed")
44
+ else: # Polynomial Regression
45
+ X_scaled = scaler.transform(X) # Apply scaling
46
+ X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression
47
+ predictions = poly_model.predict(X_poly)
48
+ plt.plot(df["Year"], predictions, label="Polynomial Regression", color="green")
49
+
50
+ plt.xlabel("Year")
51
+ plt.ylabel("Population")
52
+ plt.title(f"Population Growth Prediction ({model_choice})")
53
+ plt.legend()
54
+ plt.grid()
55
+
56
+ #plt.savefig("population_trend.png")
57
+ #return "population_trend.png"
58
+
59
+ plt.savefig("population_trend.png")
60
+ return "population_trend.png"
61
+
62
+ # Function to predict population using the selected model
63
+ def predict_population(file, model_choice):
64
+ df = pd.read_csv(file)
65
+
66
+ # Ensure correct column format
67
+ if df.shape[1] < 2:
68
+ return None, "ERROR: CSV must contain two columns (Year, Population).", None
69
+
70
+ df.columns = ["Year", "Population"]
71
+ df = df.astype({"Year": int, "Population": float}) # Convert data types
72
+
73
+ X = df["Year"].values.reshape(-1, 1) # Extract Year column
74
+
75
+ if model_choice == "Linear Regression":
76
+ # Do NOT scale X for Linear Regression
77
+ X_scaled = scaler.transform(X)
78
+ predictions = linear_model.predict(X_scaled)
79
+ else: # Polynomial Regression
80
+ X_scaled = scaler.transform(X) # Apply the same scaling as training
81
+ X_poly = poly_features.transform(X_scaled) # Transform for Polynomial Regression
82
+ predictions = poly_model.predict(X_poly)
83
+
84
+
85
+
86
+ df["Predicted Population"] = predictions # Append predictions to DataFrame
87
+
88
+ # Extract the test set (2016-2020) before computing MSE & R²
89
+ test_mask = df["Year"].between(2016, 2020) # Select only test years
90
+ X_test = df.loc[test_mask, "Year"].values.reshape(-1, 1)
91
+ y_test = df.loc[test_mask, "Population"].values
92
+ y_pred_test = df.loc[test_mask, "Predicted Population"].values
93
+
94
+ # Compute metrics only on the test set
95
+ mse = mean_squared_error(y_test, y_pred_test)
96
+ r2 = r2_score(y_test, y_pred_test)
97
+
98
+ print("DEBUG: Model Choice =", model_choice)
99
+ print("DEBUG: X Values for Prediction:\n", X[:5]) # Print first 5 inputs
100
+ print("DEBUG: Predictions:\n", predictions[:5]) # Print first 5 predictions
101
+
102
+ return df, "population_trend.png", f"{model_choice} Results: MSE = {mse:.2f}, R² Score = {r2:.2f}"
103
+
104
+
105
+
106
+ # Wrapper function for Gradio
107
+ def gradio_interface(file, model_choice):
108
+ preview = load_data(file)
109
+ trend_image = plot_population_trend(file, model_choice)
110
+ predictions, _, performance = predict_population(file, model_choice)
111
+ return preview, trend_image, predictions, performance
112
+
113
+
114
+ # Define the Gradio interface
115
+ interface = gr.Interface(
116
+ fn=gradio_interface, # Use the single wrapper function
117
+ inputs=[
118
+ gr.File(label="Upload CSV File"),
119
+ gr.Radio(["Linear Regression", "Polynomial Regression"], label="Choose Model")
120
+ ],
121
+ outputs=[
122
+ gr.Dataframe(label="Preview Data"),
123
+ gr.Image(label="Population Trend"),
124
+ gr.Dataframe(label="Predictions"),
125
+ gr.Textbox(label="Model Performance")
126
+ ],
127
+ title="Population Prediction Tool",
128
+ description="Upload a CSV file with Year and Population data. Choose a model (Linear or Polynomial Regression) to predict future population trends."
129
+ )
130
+
131
+ # Launch the Gradio App
132
+ interface.launch()
linear_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e7b8ab6a96218c4a23bf8050255dae8cf468bb005574b1ecaffe2de4c3d70d4
3
+ size 561
poly_features.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55a6ff11301e34be39169450c78b5f545a4737236f6a226d315d378f47e7fa87
3
+ size 255
poly_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4195848735a33af474b374c688a41aad5ed9b29839c2c95858a3cdfd7f12ac0f
3
+ size 617
scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e664c813a5425937bb9a229017322def3636acfd71d341f8ce0a06ad528c2fd
3
+ size 623