File size: 161,714 Bytes
0ee0725 |
|
{
"cells": [
{
"cell_type": "markdown",
"id": "76d2fade",
"metadata": {},
"source": [
" # Iris Flower Detection ML "
]
},
{
"cell_type": "markdown",
"id": "44c89a08",
"metadata": {},
"source": [
"by: Lucky Sharma"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f36bf0de",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "90f1d6b2",
"metadata": {},
"outputs": [],
"source": [
"iris=pd.read_csv('https://raw.githubusercontent.com/itsluckysharma01/Datasets/refs/heads/main/IRIS.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "cab03872",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal_length</th>\n",
" <th>sepal_width</th>\n",
" <th>petal_length</th>\n",
" <th>petal_width</th>\n",
" <th>species</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5.1</td>\n",
" <td>3.5</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.9</td>\n",
" <td>3.0</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4.7</td>\n",
" <td>3.2</td>\n",
" <td>1.3</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.6</td>\n",
" <td>3.1</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5.0</td>\n",
" <td>3.6</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>Iris-setosa</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width species\n",
"0 5.1 3.5 1.4 0.2 Iris-setosa\n",
"1 4.9 3.0 1.4 0.2 Iris-setosa\n",
"2 4.7 3.2 1.3 0.2 Iris-setosa\n",
"3 4.6 3.1 1.5 0.2 Iris-setosa\n",
"4 5.0 3.6 1.4 0.2 Iris-setosa"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1cffbbaf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"sepal_length 0\n",
"sepal_width 0\n",
"petal_length 0\n",
"petal_width 0\n",
"species 0\n",
"dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c170d962",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<function matplotlib.pyplot.show(close=None, block=None)>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"iris.hist()\n",
"plt.show"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "56bed7c7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(150, 5)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8a2e6a9d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<function matplotlib.pyplot.show(close=None, block=None)>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x1000 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"iris.hist(linewidth=2,edgecolor='black',figsize=(10,10))\n",
"plt.show"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "f48d2fea",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: xlabel='petal_length', ylabel='petal_width'>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"iris.plot(kind=\"scatter\", x=\"sepal_length\", y=\"sepal_width\")\n",
"iris.plot(kind=\"scatter\", x=\"petal_length\", y=\"petal_width\" )\n"
]
},
{
"cell_type": "markdown",
"id": "4ce75462",
"metadata": {},
"source": [
" # Predicting the species of iris flower based on its features using machine learning."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c2c1edef",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.svm import SVC\n",
"from sklearn import svm\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn import metrics\n",
"from sklearn.preprocessing import LabelEncoder"
]
},
{
"cell_type": "markdown",
"id": "388c2509",
"metadata": {},
"source": [
"# Label Encoding The Non-Numeric Data on Species Column"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c08414fc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0\n",
"1 0\n",
"2 0\n",
"3 0\n",
"4 0\n",
" ..\n",
"145 2\n",
"146 2\n",
"147 2\n",
"148 2\n",
"149 2\n",
"Name: species, Length: 150, dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Label Encoding the Non-Numeric Data on Species Column\n",
"le = LabelEncoder()\n",
"iris['species'] = le.fit_transform(iris['species'])\n",
"iris['species']"
]
},
{
"cell_type": "markdown",
"id": "d1519888",
"metadata": {},
"source": [
"Divide the Dataset into test and train data\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "ae844616",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train shape: (112, 5)\n",
"Test shape: (38, 5)\n"
]
}
],
"source": [
"train, test = train_test_split(iris, test_size=0.25)\n",
"print(\"Train shape:\", train.shape) # Divide into 25 and 75 percent\n",
"print(\"Test shape:\", test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "17fa3fce",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal_length</th>\n",
" <th>sepal_width</th>\n",
" <th>petal_length</th>\n",
" <th>petal_width</th>\n",
" <th>species</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5.1</td>\n",
" <td>3.5</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.9</td>\n",
" <td>3.0</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width species\n",
"0 5.1 3.5 1.4 0.2 0\n",
"1 4.9 3.0 1.4 0.2 0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "55ff100f",
"metadata": {},
"outputs": [],
"source": [
"train_x = train.drop(columns=['species'])\n",
"train_y = train['species']\n",
"\n",
"test_x = test.drop(columns=['species'])\n",
"test_y = test['species']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "5ad99bf0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal_length</th>\n",
" <th>sepal_width</th>\n",
" <th>petal_length</th>\n",
" <th>petal_width</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>5.4</td>\n",
" <td>3.7</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>5.0</td>\n",
" <td>3.4</td>\n",
" <td>1.6</td>\n",
" <td>0.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69</th>\n",
" <td>5.6</td>\n",
" <td>2.5</td>\n",
" <td>3.9</td>\n",
" <td>1.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>111</th>\n",
" <td>6.4</td>\n",
" <td>2.7</td>\n",
" <td>5.3</td>\n",
" <td>1.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>6.7</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>1.7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width\n",
"10 5.4 3.7 1.5 0.2\n",
"26 5.0 3.4 1.6 0.4\n",
"69 5.6 2.5 3.9 1.1\n",
"111 6.4 2.7 5.3 1.9\n",
"77 6.7 3.0 5.0 1.7"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_x.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "b0139547",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10 0\n",
"26 0\n",
"69 1\n",
"111 2\n",
"77 1\n",
"Name: species, dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_y.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "dd735025",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal_length</th>\n",
" <th>sepal_width</th>\n",
" <th>petal_length</th>\n",
" <th>petal_width</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>105</th>\n",
" <td>7.6</td>\n",
" <td>3.0</td>\n",
" <td>6.6</td>\n",
" <td>2.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>6.1</td>\n",
" <td>2.8</td>\n",
" <td>4.0</td>\n",
" <td>1.3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width\n",
"105 7.6 3.0 6.6 2.1\n",
"71 6.1 2.8 4.0 1.3"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_x.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "7fd8e9ff",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"105 2\n",
"71 1\n",
"76 1\n",
"134 2\n",
"29 0\n",
"Name: species, dtype: int64"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_y.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "e7570b57",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((112, 4), (112,), (38, 4), (38,))"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_x.shape, train_y.shape, test_x.shape, test_y.shape"
]
},
{
"cell_type": "markdown",
"id": "1b51fbe7",
"metadata": {},
"source": [
"# There are various split mehod to divide the dataset into test and train data."
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "99cfe47f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal_length</th>\n",
" <th>sepal_width</th>\n",
" <th>petal_length</th>\n",
" <th>petal_width</th>\n",
" <th>species</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5.1</td>\n",
" <td>3.5</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.9</td>\n",
" <td>3.0</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4.7</td>\n",
" <td>3.2</td>\n",
" <td>1.3</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.6</td>\n",
" <td>3.1</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5.0</td>\n",
" <td>3.6</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width species\n",
"0 5.1 3.5 1.4 0.2 0\n",
"1 4.9 3.0 1.4 0.2 0\n",
"2 4.7 3.2 1.3 0.2 0\n",
"3 4.6 3.1 1.5 0.2 0\n",
"4 5.0 3.6 1.4 0.2 0"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris.head()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "3e75c275",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" sepal_length sepal_width petal_length petal_width\n",
"0 5.1 3.5 1.4 0.2\n",
"1 4.9 3.0 1.4 0.2\n",
"2 4.7 3.2 1.3 0.2\n",
"3 4.6 3.1 1.5 0.2\n",
"4 5.0 3.6 1.4 0.2\n",
".. ... ... ... ...\n",
"145 6.7 3.0 5.2 2.3\n",
"146 6.3 2.5 5.0 1.9\n",
"147 6.5 3.0 5.2 2.0\n",
"148 6.2 3.4 5.4 2.3\n",
"149 5.9 3.0 5.1 1.8\n",
"\n",
"[150 rows x 4 columns]\n"
]
},
{
"data": {
"text/plain": [
"((150, 4), (150,))"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x1=iris.drop(columns=['species'])\n",
"y1=iris['species']\n",
"print(x1)\n",
"x1.shape, y1.shape"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "fce62130",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 0\n",
"1 0\n",
"2 0\n",
"3 0\n",
"4 0\n",
" ..\n",
"145 2\n",
"146 2\n",
"147 2\n",
"148 2\n",
"149 2\n",
"Name: species, Length: 150, dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"((150, 3), (150,))"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x=iris.iloc[:,0:3]\n",
"y=iris.iloc[:,4]\n",
"print(y)\n",
"x.shape, y.shape"
]
},
{
"cell_type": "markdown",
"id": "2ac3b2f6",
"metadata": {},
"source": [
"# Model Evaluation"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "cdd946c9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy : 100.0\n"
]
}
],
"source": [
"model = LogisticRegression()\n",
"model.fit(train_x, train_y)\n",
"\n",
"predictions = model.predict(test_x)\n",
"print(\"Accuracy : \", metrics.accuracy_score(predictions, test_y)*100)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "d0cde07b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"accuracy: 94.73684210526315\n"
]
}
],
"source": [
"model = DecisionTreeClassifier()\n",
"model.fit(train_x,train_y)\n",
"predictions=model.predict(test_x)\n",
"print('accuracy:',metrics.accuracy_score(predictions,test_y) * 100)"
]
},
{
"cell_type": "markdown",
"id": "c0a68eb1",
"metadata": {},
"source": [
"\n",
"# 1. Preprocessing\n",
"\n",
"# In this case:\n",
"\n",
"# No missing values\n",
"\n",
"# Features are all numeric\n",
"\n",
"# scikit-learn automatically handles string Labels(Target) in classification problems, but not for categorical features.\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "8875d616",
"metadata": {},
"source": [
"# 2. Feature and Target Variable"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "d0d7e7df",
"metadata": {},
"outputs": [],
"source": [
"# x=train_x\n",
"# y=train_y\n",
"\n",
"#Feature set\n",
"x=iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]\n",
"\n",
"#Target variable\n",
"y=iris['species']"
]
},
{
"cell_type": "markdown",
"id": "ab718a4c",
"metadata": {},
"source": [
"Encode target labels (setosa → 0, versicolor → 1, virginica → 2)\n",
"#scikit-learn automatically handles string Labels(Target) in classification problems, but not for categorical features."
]
},
{
"cell_type": "markdown",
"id": "32c1e5dc",
"metadata": {},
"source": [
"# Split the dataset into Test train data"
]
},
{
"cell_type": "markdown",
"id": "c64426fb",
"metadata": {},
"source": [
"# 3. Train Model"
]
},
{
"cell_type": "markdown",
"id": "0c490a38",
"metadata": {},
"source": [
"Train a model (Logistic Regression model)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "13770e59",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"predicted_species is : Iris-versicolor\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\PANDIT JI\\AppData\\Roaming\\Python\\Python311\\site-packages\\sklearn\\base.py:493: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"model=LogisticRegression()\n",
"model.fit(x,y)\n",
"\n",
"pred=model.predict([[5.4,3.0,4.5,1.5]]) # Fill values in the list to predict\n",
"predicted_species = le.inverse_transform(pred) # Inverse transform to get original species name\n",
"print(\"predicted_species is : \", predicted_species[0])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "fe301287",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal_length</th>\n",
" <th>sepal_width</th>\n",
" <th>petal_length</th>\n",
" <th>petal_width</th>\n",
" <th>species</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5.1</td>\n",
" <td>3.5</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.9</td>\n",
" <td>3.0</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4.7</td>\n",
" <td>3.2</td>\n",
" <td>1.3</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.6</td>\n",
" <td>3.1</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5.0</td>\n",
" <td>3.6</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>6.7</td>\n",
" <td>3.0</td>\n",
" <td>5.2</td>\n",
" <td>2.3</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>6.3</td>\n",
" <td>2.5</td>\n",
" <td>5.0</td>\n",
" <td>1.9</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>6.5</td>\n",
" <td>3.0</td>\n",
" <td>5.2</td>\n",
" <td>2.0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>6.2</td>\n",
" <td>3.4</td>\n",
" <td>5.4</td>\n",
" <td>2.3</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>149</th>\n",
" <td>5.9</td>\n",
" <td>3.0</td>\n",
" <td>5.1</td>\n",
" <td>1.8</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>150 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width species\n",
"0 5.1 3.5 1.4 0.2 0\n",
"1 4.9 3.0 1.4 0.2 0\n",
"2 4.7 3.2 1.3 0.2 0\n",
"3 4.6 3.1 1.5 0.2 0\n",
"4 5.0 3.6 1.4 0.2 0\n",
".. ... ... ... ... ...\n",
"145 6.7 3.0 5.2 2.3 2\n",
"146 6.3 2.5 5.0 1.9 2\n",
"147 6.5 3.0 5.2 2.0 2\n",
"148 6.2 3.4 5.4 2.3 2\n",
"149 5.9 3.0 5.1 1.8 2\n",
"\n",
"[150 rows x 5 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris"
]
},
{
"cell_type": "markdown",
"id": "dc8b6b6e",
"metadata": {},
"source": [
"# 5. Test Model"
]
},
{
"cell_type": "markdown",
"id": "7145f84e",
"metadata": {},
"source": [
"# Check for another values"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "de54ebdc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"predicted_species is : Iris-virginica\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\PANDIT JI\\AppData\\Roaming\\Python\\Python311\\site-packages\\sklearn\\base.py:493: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"model=LogisticRegression()\n",
"model.fit(x,y)\n",
"\n",
"pred=model.predict([[5.9,3.0,5.1,1.8]]) # Fill values in the list to predict\n",
"predicted_species = le.inverse_transform(pred) # Inverse transform to get original species name\n",
"print(\"predicted_species is : \", predicted_species[0])"
]
},
{
"cell_type": "markdown",
"id": "aa658d1c",
"metadata": {},
"source": [
"# Predict For Nearest Neighbors"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "181afd43",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"predicted_species is : Iris-setosa\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\PANDIT JI\\AppData\\Roaming\\Python\\Python311\\site-packages\\sklearn\\base.py:493: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"model=LogisticRegression()\n",
"model.fit(x,y)\n",
"\n",
"pred=model.predict([[5.,3.0,1,.5]]) # Fill values in the list to predict\n",
"predicted_species = le.inverse_transform(pred) # Inverse transform to get original species name\n",
"print(\"predicted_species is : \", predicted_species[0])"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "77b7b266",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"predicted_species is : Iris-setosa\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\PANDIT JI\\AppData\\Roaming\\Python\\Python311\\site-packages\\sklearn\\base.py:493: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"model=LogisticRegression()\n",
"model.fit(x,y)\n",
"\n",
"pred=model.predict([[5,3,2,.6]]) # Fill values in the list to predict\n",
"predicted_species = le.inverse_transform(pred) # Inverse transform to get original species name\n",
"print(\"predicted_species is : \", predicted_species[0])"
]
},
{
"cell_type": "markdown",
"id": "5861a560",
"metadata": {},
"source": [
"we put nearest neighbors values to predict the species of iris flower. It can be used to predict the species of iris flower based on its features."
]
},
{
"cell_type": "markdown",
"id": "53520c6e",
"metadata": {},
"source": [
"# 4. Model Evaluation\n",
"***\n",
"Accuracy Check"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "53ede795",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 97.33333333333334\n"
]
}
],
"source": [
"model = LogisticRegression(max_iter=200)\n",
"model.fit(x, y)\n",
"prediction = model.predict(x)\n",
"print('Accuracy:',metrics.accuracy_score(prediction,y)*100)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "cf68bbb9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"accuracy: 100.0\n"
]
}
],
"source": [
"model = DecisionTreeClassifier()\n",
"model.fit(x,y)\n",
"prediction=model.predict(x)\n",
"print('accuracy:',metrics.accuracy_score(prediction,y)*100)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "cc6674dc",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.neighbors import KNeighborsClassifier"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "9260465a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Accuracy with k=3: 96.0 %\n"
]
}
],
"source": [
"# Train model using k=3 and print accuracy\n",
"model = KNeighborsClassifier(n_neighbors=3)\n",
"model.fit(x, y)\n",
"prediction = model.predict(x)\n",
"print(\" Accuracy with k=3:\", metrics.accuracy_score(y, prediction) * 100, \"%\")"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "e40b19c2",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score, classification_report, confusion_matrix"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "460366db",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 96.0 %\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" 0 1.00 1.00 1.00 50\n",
" 1 0.94 0.94 0.94 50\n",
" 2 0.94 0.94 0.94 50\n",
"\n",
" accuracy 0.96 150\n",
" macro avg 0.96 0.96 0.96 150\n",
"weighted avg 0.96 0.96 0.96 150\n",
"\n"
]
}
],
"source": [
"y_pred = model.predict(x)\n",
"print(\"Accuracy:\", accuracy_score(y, y_pred)*100, \"%\")\n",
"print(\"\\nClassification Report:\\n\", classification_report(y, y_pred))"
]
},
{
"cell_type": "markdown",
"id": "35b1c120",
"metadata": {},
"source": [
"# Saving the Model with joblib"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "0d0ca1c2",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import load_iris\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"import joblib"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "3a13ddc7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model saved as 'iris_model.pkl'\n"
]
}
],
"source": [
"model.fit(x, y)\n",
"joblib.dump(model, 'iris_model.pkl') #save the model to a file\n",
"print(\"Model saved as 'iris_model.pkl'\")"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "5e358195",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['iris_model.pkl']"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"\n",
"iris = load_iris()\n",
"X, y = iris.data, iris.target\n",
"model = RandomForestClassifier()\n",
"model.fit(X, y)\n",
"joblib.dump(model, 'iris_model.pkl')\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|