{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "6219ac96", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.preprocessing import StandardScaler\n", "\n", "from sklearn.metrics import accuracy_score\n", "from sklearn import svm\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 7, "id": "e7b1cf66", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model Accuracy: 72.07792207792207%\n" ] } ], "source": [ "!python train.py" ] }, { "cell_type": "code", "execution_count": 10, "id": "a6a89418", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " -V:3.13 * Python 3.13 (Store)\n", " -V:3.10 Python 3.10 (64-bit)\n", " -V:ContinuumAnalytics/Anaconda39-64 Anaconda 2022.05\n" ] } ], "source": [ "!py --list\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "16859c5b", "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv('diabetes.csv')" ] }, { "cell_type": "code", "execution_count": 5, "id": "69c54515", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", "
" ], "text/plain": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", "0 6 148 72 35 0 33.6 \n", "1 1 85 66 29 0 26.6 \n", "2 8 183 64 0 0 23.3 \n", "3 1 89 66 23 94 28.1 \n", "4 0 137 40 35 168 43.1 \n", "\n", " DiabetesPedigreeFunction Age Outcome \n", "0 0.627 50 1 \n", "1 0.351 31 0 \n", "2 0.672 32 1 \n", "3 0.167 21 0 \n", "4 2.288 33 1 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head()" ] }, { "cell_type": "code", "execution_count": 11, "id": "4dd13ad6", "metadata": {}, "outputs": [], "source": [ "x = data.drop(['Outcome'],axis=1)\n", "y = data['Outcome']" ] }, { "cell_type": "code", "execution_count": 26, "id": "f5945059", "metadata": {}, "outputs": [], "source": [ "scaler = StandardScaler(copy=True, with_mean=True, with_std=True)\n", "scale_data = scaler.fit_transform(x)\n" ] }, { "cell_type": "code", "execution_count": 27, "id": "583174fe", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 29, "id": "a93dffad", "metadata": {}, "outputs": [], "source": [ "x_train, x_test, y_train,y_test = train_test_split(x,y ,test_size=0.2, stratify=y, random_state=2)" ] }, { "cell_type": "code", "execution_count": 32, "id": "bc4473ce", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAge
619011900032.40.14124
329610570326830.80.12237
131189602384630.10.39859
4762105804519133.70.71129
4501806639042.01.89325
...........................
3035115980052.90.20928
5923132800034.40.40244
5591185740030.10.30035
72541127840039.40.23638
2530866832035.80.23825
\n", "

614 rows × 8 columns

\n", "
" ], "text/plain": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", "619 0 119 0 0 0 32.4 \n", "329 6 105 70 32 68 30.8 \n", "13 1 189 60 23 846 30.1 \n", "476 2 105 80 45 191 33.7 \n", "45 0 180 66 39 0 42.0 \n", ".. ... ... ... ... ... ... \n", "303 5 115 98 0 0 52.9 \n", "592 3 132 80 0 0 34.4 \n", "559 11 85 74 0 0 30.1 \n", "725 4 112 78 40 0 39.4 \n", "253 0 86 68 32 0 35.8 \n", "\n", " DiabetesPedigreeFunction Age \n", "619 0.141 24 \n", "329 0.122 37 \n", "13 0.398 59 \n", "476 0.711 29 \n", "45 1.893 25 \n", ".. ... ... \n", "303 0.209 28 \n", "592 0.402 44 \n", "559 0.300 35 \n", "725 0.236 38 \n", "253 0.238 25 \n", "\n", "[614 rows x 8 columns]" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_train" ] }, { "cell_type": "code", "execution_count": 37, "id": "abafb2c2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7833876221498371" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "classifier = svm.SVC(kernel='linear')\n", "classifier.fit(x_train,y_train)\n", "y_predict = classifier.predict(x_train)\n", "accuracy_score(y_predict, y_train)" ] }, { "cell_type": "code", "execution_count": 39, "id": "9b013f42", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7727272727272727" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_test_pred = classifier.predict(x_test)\n", "accuracy_score(y_test_pred, y_test)" ] }, { "cell_type": "code", "execution_count": 56, "id": "55e83896", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0]\n", "0.0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\danielle.DESKTOP-EM4NRIV\\anaconda3\\lib\\site-packages\\sklearn\\base.py:450: UserWarning: X does not have valid feature names, but SVC was fitted with feature names\n", " warnings.warn(\n" ] } ], "source": [ "input_data = np.asarray((7,107,74,0,0,29.6,0.254,31)).reshape(1,-1)\n", "\n", "predict = classifier.predict(input_data)\n", "print(predict)\n", "print(accuracy_score(predict,[1]))\n" ] }, { "cell_type": "markdown", "id": "9dab6b96", "metadata": {}, "source": [ "# using knn" ] }, { "cell_type": "code", "execution_count": 63, "id": "4438351c", "metadata": {}, "outputs": [], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.model_selection import GridSearchCV" ] }, { "cell_type": "code", "execution_count": 64, "id": "277f89e6", "metadata": {}, "outputs": [], "source": [ "model = KNeighborsClassifier(n_neighbors=2)" ] }, { "cell_type": "code", "execution_count": 65, "id": "b9916c2c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "KNeighborsClassifier(n_neighbors=2)" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(x_train,y_train)" ] }, { "cell_type": "code", "execution_count": 66, "id": "18a66aa2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8306188925081434" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.score(x_train,y_train)" ] }, { "cell_type": "code", "execution_count": 71, "id": "2c3a9475", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GridSearchCV(cv=5, estimator=KNeighborsClassifier(n_neighbors=2),\n", " param_grid={'n_neighbors': [3, 5, 7, 9, 11], 'p': [1, 2],\n", " 'weights': ['uniform', 'distance']},\n", " scoring='accuracy')" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "param_grid_k = {\n", " 'n_neighbors': [3, 5, 7, 9, 11], # Number of neighbors\n", " 'weights': ['uniform', 'distance'],# Weight function\n", " 'p': [1, 2] # Power parameter for Minkowski distance\n", "}\n", "grid_k = GridSearchCV(model, param_grid_k, cv=5)\n", "grid = GridSearchCV(model, param_grid= param_grid_k,cv=5 ,scoring = 'accuracy' )\n", "grid.fit(x_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": 73, "id": "204e8231", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7361322137811541" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid.best_score_" ] }, { "cell_type": "code", "execution_count": 77, "id": "3e4634f6", "metadata": {}, "outputs": [], "source": [ "opt_model_k = KNeighborsClassifier(**grid.best_params_)\n", "opt_model_k.fit(x_train,y_train)\n", "opt_model_k.score(x_test,y_test)\n", "y_pred = opt_model_k.predict(x_test)\n" ] }, { "cell_type": "code", "execution_count": 78, "id": "103db5f7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.7467532467532467\n", "0.7931596091205212\n" ] } ], "source": [ "print( accuracy_score(y_pred, y_test))\n", "print(opt_model_k.score(x_train,y_train))" ] }, { "cell_type": "code", "execution_count": null, "id": "6f2f62f5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }