import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import r2_score import pickle # Load the data df = pd.read_csv("Cleaned_Car_data.csv") # Drop unnecessary columns df.drop(["Unnamed: 0", "name"], axis=1, inplace=True) # Feature Engineering df['car_age'] = 2025 - df['year'] df.drop(['year'], axis=1, inplace=True) # One-hot encoding df = pd.get_dummies(df, columns=['company', 'fuel_type'], drop_first=True) # Define X and y X = df.drop("Price", axis=1) y = df["Price"] # Train/test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train model model = LinearRegression() model.fit(X_train, y_train) # Evaluation y_pred = model.predict(X_test) print("R² Score:", r2_score(y_test, y_pred)) # Save model and columns with open("car_price_model.pkl", "wb") as f: pickle.dump(model, f) with open("model_columns.pkl", "wb") as f: pickle.dump(list(X.columns), f) print("✅ Model trained and saved successfully.")