{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"id": "aabfc9b7",
"metadata": {
"id": "aabfc9b7"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"source": [
"pip install numpy==1.22.0 pandas==1.5.3\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SFDDgqRzxfTB",
"outputId": "a652677b-ae2d-4052-d971-3990bc6b0186"
},
"id": "SFDDgqRzxfTB",
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: numpy==1.22.0 in /usr/local/lib/python3.10/dist-packages (1.22.0)\n",
"Requirement already satisfied: pandas==1.5.3 in /usr/local/lib/python3.10/dist-packages (1.5.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas==1.5.3) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas==1.5.3) (2023.4)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas==1.5.3) (1.16.0)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!pip show numpy\n",
"!pip show pandas"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "FGPxBWkrxkct",
"outputId": "d01503b5-51fa-43c5-c674-2f20340ee904"
},
"id": "FGPxBWkrxkct",
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Name: numpy\n",
"Version: 1.22.0\n",
"Summary: NumPy is the fundamental package for array computing with Python.\n",
"Home-page: https://www.numpy.org\n",
"Author: Travis E. Oliphant et al.\n",
"Author-email: \n",
"License: BSD\n",
"Location: /usr/local/lib/python3.10/dist-packages\n",
"Requires: \n",
"Required-by: albumentations, altair, arviz, astropy, autograd, blis, bokeh, bqplot, category-encoders, chex, cmdstanpy, contourpy, cudf-cu12, cufflinks, cupy-cuda12x, cvxpy, datascience, db-dtypes, dopamine_rl, ecos, flax, folium, geemap, gensim, gym, h5py, holoviews, hyperopt, ibis-framework, imageio, imbalanced-learn, imgaug, jax, jaxlib, librosa, lightgbm, matplotlib, matplotlib-venn, missingno, mizani, ml-dtypes, mlxtend, moviepy, music21, nibabel, numba, numexpr, opencv-contrib-python, opencv-python, opencv-python-headless, opt-einsum, optax, orbax-checkpoint, osqp, pandas, pandas-gbq, pandas-stubs, patsy, plotnine, prophet, pyarrow, pycocotools, pyerfa, pymc, pytensor, python-louvain, PyWavelets, qdldl, qudida, rmm-cu12, scikit-image, scikit-learn, scipy, scs, seaborn, shapely, sklearn-pandas, soxr, spacy, stanio, statsmodels, tables, tensorboard, tensorflow, tensorflow-datasets, tensorflow-hub, tensorflow-probability, tensorstore, thinc, tifffile, torchtext, torchvision, transformers, wordcloud, xarray, xarray-einstats, xgboost, yellowbrick, yfinance\n",
"Name: pandas\n",
"Version: 1.5.3\n",
"Summary: Powerful data structures for data analysis, time series, and statistics\n",
"Home-page: https://pandas.pydata.org\n",
"Author: The Pandas Development Team\n",
"Author-email: pandas-dev@python.org\n",
"License: BSD-3-Clause\n",
"Location: /usr/local/lib/python3.10/dist-packages\n",
"Requires: numpy, python-dateutil, pytz\n",
"Required-by: altair, arviz, bigframes, bokeh, bqplot, category-encoders, cmdstanpy, cudf-cu12, cufflinks, datascience, db-dtypes, dopamine_rl, fastai, geemap, geopandas, google-colab, gspread-dataframe, holoviews, ibis-framework, mizani, mlxtend, pandas-datareader, pandas-gbq, panel, plotnine, prophet, pymc, seaborn, sklearn-pandas, statsmodels, vega-datasets, xarray, yfinance\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "079ed1b4",
"metadata": {
"id": "079ed1b4"
},
"outputs": [],
"source": [
"match = pd.read_csv('/content/matches.csv')\n",
"delivery = pd.read_csv('/content/deliveries.csv')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "bfadbf7d",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 451
},
"id": "bfadbf7d",
"outputId": "d1a2bb15-e98c-44bb-a21a-ffabac7b4aab"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" id Season city date team1 \\\n",
"0 1 IPL-2017 Hyderabad 05-04-2017 Sunrisers Hyderabad \n",
"1 2 IPL-2017 Pune 06-04-2017 Mumbai Indians \n",
"2 3 IPL-2017 Rajkot 07-04-2017 Gujarat Lions \n",
"3 4 IPL-2017 Indore 08-04-2017 Rising Pune Supergiant \n",
"4 5 IPL-2017 Bangalore 08-04-2017 Royal Challengers Bangalore \n",
"\n",
" team2 toss_winner toss_decision \\\n",
"0 Royal Challengers Bangalore Royal Challengers Bangalore field \n",
"1 Rising Pune Supergiant Rising Pune Supergiant field \n",
"2 Kolkata Knight Riders Kolkata Knight Riders field \n",
"3 Kings XI Punjab Kings XI Punjab field \n",
"4 Delhi Daredevils Royal Challengers Bangalore bat \n",
"\n",
" result dl_applied winner win_by_runs \\\n",
"0 normal 0 Sunrisers Hyderabad 35 \n",
"1 normal 0 Rising Pune Supergiant 0 \n",
"2 normal 0 Kolkata Knight Riders 0 \n",
"3 normal 0 Kings XI Punjab 0 \n",
"4 normal 0 Royal Challengers Bangalore 15 \n",
"\n",
" win_by_wickets player_of_match venue \\\n",
"0 0 Yuvraj Singh Rajiv Gandhi International Stadium, Uppal \n",
"1 7 SPD Smith Maharashtra Cricket Association Stadium \n",
"2 10 CA Lynn Saurashtra Cricket Association Stadium \n",
"3 6 GJ Maxwell Holkar Cricket Stadium \n",
"4 0 KM Jadhav M Chinnaswamy Stadium \n",
"\n",
" umpire1 umpire2 umpire3 \n",
"0 AY Dandekar NJ Llong NaN \n",
"1 A Nand Kishore S Ravi NaN \n",
"2 Nitin Menon CK Nandan NaN \n",
"3 AK Chaudhary C Shamshuddin NaN \n",
"4 NaN NaN NaN "
],
"text/html": [
"\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" id \n",
" Season \n",
" city \n",
" date \n",
" team1 \n",
" team2 \n",
" toss_winner \n",
" toss_decision \n",
" result \n",
" dl_applied \n",
" winner \n",
" win_by_runs \n",
" win_by_wickets \n",
" player_of_match \n",
" venue \n",
" umpire1 \n",
" umpire2 \n",
" umpire3 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" IPL-2017 \n",
" Hyderabad \n",
" 05-04-2017 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" Royal Challengers Bangalore \n",
" field \n",
" normal \n",
" 0 \n",
" Sunrisers Hyderabad \n",
" 35 \n",
" 0 \n",
" Yuvraj Singh \n",
" Rajiv Gandhi International Stadium, Uppal \n",
" AY Dandekar \n",
" NJ Llong \n",
" NaN \n",
" \n",
" \n",
" 1 \n",
" 2 \n",
" IPL-2017 \n",
" Pune \n",
" 06-04-2017 \n",
" Mumbai Indians \n",
" Rising Pune Supergiant \n",
" Rising Pune Supergiant \n",
" field \n",
" normal \n",
" 0 \n",
" Rising Pune Supergiant \n",
" 0 \n",
" 7 \n",
" SPD Smith \n",
" Maharashtra Cricket Association Stadium \n",
" A Nand Kishore \n",
" S Ravi \n",
" NaN \n",
" \n",
" \n",
" 2 \n",
" 3 \n",
" IPL-2017 \n",
" Rajkot \n",
" 07-04-2017 \n",
" Gujarat Lions \n",
" Kolkata Knight Riders \n",
" Kolkata Knight Riders \n",
" field \n",
" normal \n",
" 0 \n",
" Kolkata Knight Riders \n",
" 0 \n",
" 10 \n",
" CA Lynn \n",
" Saurashtra Cricket Association Stadium \n",
" Nitin Menon \n",
" CK Nandan \n",
" NaN \n",
" \n",
" \n",
" 3 \n",
" 4 \n",
" IPL-2017 \n",
" Indore \n",
" 08-04-2017 \n",
" Rising Pune Supergiant \n",
" Kings XI Punjab \n",
" Kings XI Punjab \n",
" field \n",
" normal \n",
" 0 \n",
" Kings XI Punjab \n",
" 0 \n",
" 6 \n",
" GJ Maxwell \n",
" Holkar Cricket Stadium \n",
" AK Chaudhary \n",
" C Shamshuddin \n",
" NaN \n",
" \n",
" \n",
" 4 \n",
" 5 \n",
" IPL-2017 \n",
" Bangalore \n",
" 08-04-2017 \n",
" Royal Challengers Bangalore \n",
" Delhi Daredevils \n",
" Royal Challengers Bangalore \n",
" bat \n",
" normal \n",
" 0 \n",
" Royal Challengers Bangalore \n",
" 15 \n",
" 0 \n",
" KM Jadhav \n",
" M Chinnaswamy Stadium \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "match",
"summary": "{\n \"name\": \"match\",\n \"rows\": 756,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3464,\n \"min\": 1,\n \"max\": 11415,\n \"num_unique_values\": 756,\n \"samples\": [\n 409,\n 98,\n 425\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Season\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 12,\n \"samples\": [\n \"IPL-2018\",\n \"IPL-2016\",\n \"IPL-2017\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"city\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 32,\n \"samples\": [\n \"Sharjah\",\n \"Centurion\",\n \"Kochi\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"date\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 546,\n \"samples\": [\n \"26-05-2013\",\n \"20-05-2008\",\n \"10-04-2015\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"team1\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"Rajasthan Royals\",\n \"Kochi Tuskers Kerala\",\n \"Sunrisers Hyderabad\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"team2\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"Chennai Super Kings\",\n \"Pune Warriors\",\n \"Royal Challengers Bangalore\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"toss_winner\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"Rajasthan Royals\",\n \"Kochi Tuskers Kerala\",\n \"Royal Challengers Bangalore\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"toss_decision\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"bat\",\n \"field\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"result\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"normal\",\n \"tie\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"dl_applied\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"winner\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"Rajasthan Royals\",\n \"Pune Warriors\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"win_by_runs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 23,\n \"min\": 0,\n \"max\": 146,\n \"num_unique_values\": 89,\n \"samples\": [\n 53,\n 40\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"win_by_wickets\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 0,\n \"max\": 10,\n \"num_unique_values\": 11,\n \"samples\": [\n 4,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"player_of_match\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 226,\n \"samples\": [\n \"JJ Bumrah\",\n \"MA Agarwal\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"venue\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 41,\n \"samples\": [\n \"Barabati Stadium\",\n \"Dr DY Patil Sports Academy\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"umpire1\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 61,\n \"samples\": [\n \"AY Dandekar\",\n \"KN Ananthapadmanabhan\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"umpire2\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 65,\n \"samples\": [\n \"O Nandan\",\n \"Nanda Kishore\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"umpire3\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 25,\n \"samples\": [\n \"Chris Gaffaney\",\n \"Marais Erasmus\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 9
}
],
"source": [
"match.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "d4616531",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "d4616531",
"outputId": "5a409bd2-6f6a-440b-e7f7-93927fb7c8a1"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(756, 18)"
]
},
"metadata": {},
"execution_count": 10
}
],
"source": [
"match.shape"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "b9576f6a",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 429
},
"id": "b9576f6a",
"outputId": "3ea808cf-236d-43a1-d013-c948f6e75ac2"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" match_id inning batting_team bowling_team over \\\n",
"0 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 \n",
"1 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 \n",
"2 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 \n",
"3 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 \n",
"4 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 \n",
"\n",
" ball batsman non_striker bowler is_super_over ... bye_runs \\\n",
"0 1 DA Warner S Dhawan TS Mills 0 ... 0 \n",
"1 2 DA Warner S Dhawan TS Mills 0 ... 0 \n",
"2 3 DA Warner S Dhawan TS Mills 0 ... 0 \n",
"3 4 DA Warner S Dhawan TS Mills 0 ... 0 \n",
"4 5 DA Warner S Dhawan TS Mills 0 ... 0 \n",
"\n",
" legbye_runs noball_runs penalty_runs batsman_runs extra_runs \\\n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 4 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 2 \n",
"\n",
" total_runs player_dismissed dismissal_kind fielder \n",
"0 0 NaN NaN NaN \n",
"1 0 NaN NaN NaN \n",
"2 4 NaN NaN NaN \n",
"3 0 NaN NaN NaN \n",
"4 2 NaN NaN NaN \n",
"\n",
"[5 rows x 21 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" match_id \n",
" inning \n",
" batting_team \n",
" bowling_team \n",
" over \n",
" ball \n",
" batsman \n",
" non_striker \n",
" bowler \n",
" is_super_over \n",
" ... \n",
" bye_runs \n",
" legbye_runs \n",
" noball_runs \n",
" penalty_runs \n",
" batsman_runs \n",
" extra_runs \n",
" total_runs \n",
" player_dismissed \n",
" dismissal_kind \n",
" fielder \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 1 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" 1 \n",
" 1 \n",
" DA Warner \n",
" S Dhawan \n",
" TS Mills \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 1 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" 1 \n",
" 2 \n",
" DA Warner \n",
" S Dhawan \n",
" TS Mills \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 2 \n",
" 1 \n",
" 1 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" 1 \n",
" 3 \n",
" DA Warner \n",
" S Dhawan \n",
" TS Mills \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 4 \n",
" 0 \n",
" 4 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 3 \n",
" 1 \n",
" 1 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" 1 \n",
" 4 \n",
" DA Warner \n",
" S Dhawan \n",
" TS Mills \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 4 \n",
" 1 \n",
" 1 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" 1 \n",
" 5 \n",
" DA Warner \n",
" S Dhawan \n",
" TS Mills \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 2 \n",
" 2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
"
\n",
"
5 rows × 21 columns
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "delivery"
}
},
"metadata": {},
"execution_count": 11
}
],
"source": [
"delivery.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "be21b391",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "be21b391",
"outputId": "760b10fd-5c79-4b6b-8317-8b193061dab8"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
":1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" total_score_df = delivery.groupby(['match_id','inning']).sum()['total_runs'].reset_index()\n"
]
}
],
"source": [
"total_score_df = delivery.groupby(['match_id','inning']).sum()['total_runs'].reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "cbf8c553",
"metadata": {
"id": "cbf8c553"
},
"outputs": [],
"source": [
"total_score_df = total_score_df[total_score_df['inning'] == 1]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "0e59930d",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 423
},
"id": "0e59930d",
"outputId": "31c0886d-c4a5-41d2-8a9c-7e7c81afffe4"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" match_id inning total_runs\n",
"0 1 1 207\n",
"2 2 1 184\n",
"4 3 1 183\n",
"6 4 1 163\n",
"8 5 1 157\n",
"... ... ... ...\n",
"1518 11347 1 143\n",
"1520 11412 1 136\n",
"1522 11413 1 171\n",
"1524 11414 1 155\n",
"1526 11415 1 152\n",
"\n",
"[756 rows x 3 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" match_id \n",
" inning \n",
" total_runs \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 207 \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 1 \n",
" 184 \n",
" \n",
" \n",
" 4 \n",
" 3 \n",
" 1 \n",
" 183 \n",
" \n",
" \n",
" 6 \n",
" 4 \n",
" 1 \n",
" 163 \n",
" \n",
" \n",
" 8 \n",
" 5 \n",
" 1 \n",
" 157 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 1518 \n",
" 11347 \n",
" 1 \n",
" 143 \n",
" \n",
" \n",
" 1520 \n",
" 11412 \n",
" 1 \n",
" 136 \n",
" \n",
" \n",
" 1522 \n",
" 11413 \n",
" 1 \n",
" 171 \n",
" \n",
" \n",
" 1524 \n",
" 11414 \n",
" 1 \n",
" 155 \n",
" \n",
" \n",
" 1526 \n",
" 11415 \n",
" 1 \n",
" 152 \n",
" \n",
" \n",
"
\n",
"
756 rows × 3 columns
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "total_score_df",
"summary": "{\n \"name\": \"total_score_df\",\n \"rows\": 756,\n \"fields\": [\n {\n \"column\": \"match_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3464,\n \"min\": 1,\n \"max\": 11415,\n \"num_unique_values\": 756,\n \"samples\": [\n 409,\n 98,\n 425\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"inning\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 1,\n \"num_unique_values\": 1,\n \"samples\": [\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_runs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 30,\n \"min\": 56,\n \"max\": 263,\n \"num_unique_values\": 150,\n \"samples\": [\n 175\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 14
}
],
"source": [
"total_score_df"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "78c81c64",
"metadata": {
"id": "78c81c64"
},
"outputs": [],
"source": [
"match_df = match.merge(total_score_df[['match_id','total_runs']],left_on='id',right_on='match_id')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "6dad8a91",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 842
},
"id": "6dad8a91",
"outputId": "a851ec52-1f86-4514-c15d-1dfce4b5c91d"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" id Season city date team1 \\\n",
"0 1 IPL-2017 Hyderabad 05-04-2017 Sunrisers Hyderabad \n",
"1 2 IPL-2017 Pune 06-04-2017 Mumbai Indians \n",
"2 3 IPL-2017 Rajkot 07-04-2017 Gujarat Lions \n",
"3 4 IPL-2017 Indore 08-04-2017 Rising Pune Supergiant \n",
"4 5 IPL-2017 Bangalore 08-04-2017 Royal Challengers Bangalore \n",
".. ... ... ... ... ... \n",
"751 11347 IPL-2019 Mumbai 05-05-2019 Kolkata Knight Riders \n",
"752 11412 IPL-2019 Chennai 07-05-2019 Chennai Super Kings \n",
"753 11413 IPL-2019 Visakhapatnam 08-05-2019 Sunrisers Hyderabad \n",
"754 11414 IPL-2019 Visakhapatnam 10-05-2019 Delhi Capitals \n",
"755 11415 IPL-2019 Hyderabad 12-05-2019 Mumbai Indians \n",
"\n",
" team2 toss_winner toss_decision \\\n",
"0 Royal Challengers Bangalore Royal Challengers Bangalore field \n",
"1 Rising Pune Supergiant Rising Pune Supergiant field \n",
"2 Kolkata Knight Riders Kolkata Knight Riders field \n",
"3 Kings XI Punjab Kings XI Punjab field \n",
"4 Delhi Daredevils Royal Challengers Bangalore bat \n",
".. ... ... ... \n",
"751 Mumbai Indians Mumbai Indians field \n",
"752 Mumbai Indians Chennai Super Kings bat \n",
"753 Delhi Capitals Delhi Capitals field \n",
"754 Chennai Super Kings Chennai Super Kings field \n",
"755 Chennai Super Kings Mumbai Indians bat \n",
"\n",
" result dl_applied winner win_by_runs \\\n",
"0 normal 0 Sunrisers Hyderabad 35 \n",
"1 normal 0 Rising Pune Supergiant 0 \n",
"2 normal 0 Kolkata Knight Riders 0 \n",
"3 normal 0 Kings XI Punjab 0 \n",
"4 normal 0 Royal Challengers Bangalore 15 \n",
".. ... ... ... ... \n",
"751 normal 0 Mumbai Indians 0 \n",
"752 normal 0 Mumbai Indians 0 \n",
"753 normal 0 Delhi Capitals 0 \n",
"754 normal 0 Chennai Super Kings 0 \n",
"755 normal 0 Mumbai Indians 1 \n",
"\n",
" win_by_wickets player_of_match \\\n",
"0 0 Yuvraj Singh \n",
"1 7 SPD Smith \n",
"2 10 CA Lynn \n",
"3 6 GJ Maxwell \n",
"4 0 KM Jadhav \n",
".. ... ... \n",
"751 9 HH Pandya \n",
"752 6 AS Yadav \n",
"753 2 RR Pant \n",
"754 6 F du Plessis \n",
"755 0 JJ Bumrah \n",
"\n",
" venue umpire1 \\\n",
"0 Rajiv Gandhi International Stadium, Uppal AY Dandekar \n",
"1 Maharashtra Cricket Association Stadium A Nand Kishore \n",
"2 Saurashtra Cricket Association Stadium Nitin Menon \n",
"3 Holkar Cricket Stadium AK Chaudhary \n",
"4 M Chinnaswamy Stadium NaN \n",
".. ... ... \n",
"751 Wankhede Stadium Nanda Kishore \n",
"752 M. A. Chidambaram Stadium Nigel Llong \n",
"753 ACA-VDCA Stadium NaN \n",
"754 ACA-VDCA Stadium Sundaram Ravi \n",
"755 Rajiv Gandhi Intl. Cricket Stadium Nitin Menon \n",
"\n",
" umpire2 umpire3 match_id total_runs \n",
"0 NJ Llong NaN 1 207 \n",
"1 S Ravi NaN 2 184 \n",
"2 CK Nandan NaN 3 183 \n",
"3 C Shamshuddin NaN 4 163 \n",
"4 NaN NaN 5 157 \n",
".. ... ... ... ... \n",
"751 O Nandan S Ravi 11347 143 \n",
"752 Nitin Menon Ian Gould 11412 136 \n",
"753 NaN NaN 11413 171 \n",
"754 Bruce Oxenford Chettithody Shamshuddin 11414 155 \n",
"755 Ian Gould Nigel Llong 11415 152 \n",
"\n",
"[756 rows x 20 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" id \n",
" Season \n",
" city \n",
" date \n",
" team1 \n",
" team2 \n",
" toss_winner \n",
" toss_decision \n",
" result \n",
" dl_applied \n",
" winner \n",
" win_by_runs \n",
" win_by_wickets \n",
" player_of_match \n",
" venue \n",
" umpire1 \n",
" umpire2 \n",
" umpire3 \n",
" match_id \n",
" total_runs \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" IPL-2017 \n",
" Hyderabad \n",
" 05-04-2017 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" Royal Challengers Bangalore \n",
" field \n",
" normal \n",
" 0 \n",
" Sunrisers Hyderabad \n",
" 35 \n",
" 0 \n",
" Yuvraj Singh \n",
" Rajiv Gandhi International Stadium, Uppal \n",
" AY Dandekar \n",
" NJ Llong \n",
" NaN \n",
" 1 \n",
" 207 \n",
" \n",
" \n",
" 1 \n",
" 2 \n",
" IPL-2017 \n",
" Pune \n",
" 06-04-2017 \n",
" Mumbai Indians \n",
" Rising Pune Supergiant \n",
" Rising Pune Supergiant \n",
" field \n",
" normal \n",
" 0 \n",
" Rising Pune Supergiant \n",
" 0 \n",
" 7 \n",
" SPD Smith \n",
" Maharashtra Cricket Association Stadium \n",
" A Nand Kishore \n",
" S Ravi \n",
" NaN \n",
" 2 \n",
" 184 \n",
" \n",
" \n",
" 2 \n",
" 3 \n",
" IPL-2017 \n",
" Rajkot \n",
" 07-04-2017 \n",
" Gujarat Lions \n",
" Kolkata Knight Riders \n",
" Kolkata Knight Riders \n",
" field \n",
" normal \n",
" 0 \n",
" Kolkata Knight Riders \n",
" 0 \n",
" 10 \n",
" CA Lynn \n",
" Saurashtra Cricket Association Stadium \n",
" Nitin Menon \n",
" CK Nandan \n",
" NaN \n",
" 3 \n",
" 183 \n",
" \n",
" \n",
" 3 \n",
" 4 \n",
" IPL-2017 \n",
" Indore \n",
" 08-04-2017 \n",
" Rising Pune Supergiant \n",
" Kings XI Punjab \n",
" Kings XI Punjab \n",
" field \n",
" normal \n",
" 0 \n",
" Kings XI Punjab \n",
" 0 \n",
" 6 \n",
" GJ Maxwell \n",
" Holkar Cricket Stadium \n",
" AK Chaudhary \n",
" C Shamshuddin \n",
" NaN \n",
" 4 \n",
" 163 \n",
" \n",
" \n",
" 4 \n",
" 5 \n",
" IPL-2017 \n",
" Bangalore \n",
" 08-04-2017 \n",
" Royal Challengers Bangalore \n",
" Delhi Daredevils \n",
" Royal Challengers Bangalore \n",
" bat \n",
" normal \n",
" 0 \n",
" Royal Challengers Bangalore \n",
" 15 \n",
" 0 \n",
" KM Jadhav \n",
" M Chinnaswamy Stadium \n",
" NaN \n",
" NaN \n",
" NaN \n",
" 5 \n",
" 157 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 751 \n",
" 11347 \n",
" IPL-2019 \n",
" Mumbai \n",
" 05-05-2019 \n",
" Kolkata Knight Riders \n",
" Mumbai Indians \n",
" Mumbai Indians \n",
" field \n",
" normal \n",
" 0 \n",
" Mumbai Indians \n",
" 0 \n",
" 9 \n",
" HH Pandya \n",
" Wankhede Stadium \n",
" Nanda Kishore \n",
" O Nandan \n",
" S Ravi \n",
" 11347 \n",
" 143 \n",
" \n",
" \n",
" 752 \n",
" 11412 \n",
" IPL-2019 \n",
" Chennai \n",
" 07-05-2019 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" Chennai Super Kings \n",
" bat \n",
" normal \n",
" 0 \n",
" Mumbai Indians \n",
" 0 \n",
" 6 \n",
" AS Yadav \n",
" M. A. Chidambaram Stadium \n",
" Nigel Llong \n",
" Nitin Menon \n",
" Ian Gould \n",
" 11412 \n",
" 136 \n",
" \n",
" \n",
" 753 \n",
" 11413 \n",
" IPL-2019 \n",
" Visakhapatnam \n",
" 08-05-2019 \n",
" Sunrisers Hyderabad \n",
" Delhi Capitals \n",
" Delhi Capitals \n",
" field \n",
" normal \n",
" 0 \n",
" Delhi Capitals \n",
" 0 \n",
" 2 \n",
" RR Pant \n",
" ACA-VDCA Stadium \n",
" NaN \n",
" NaN \n",
" NaN \n",
" 11413 \n",
" 171 \n",
" \n",
" \n",
" 754 \n",
" 11414 \n",
" IPL-2019 \n",
" Visakhapatnam \n",
" 10-05-2019 \n",
" Delhi Capitals \n",
" Chennai Super Kings \n",
" Chennai Super Kings \n",
" field \n",
" normal \n",
" 0 \n",
" Chennai Super Kings \n",
" 0 \n",
" 6 \n",
" F du Plessis \n",
" ACA-VDCA Stadium \n",
" Sundaram Ravi \n",
" Bruce Oxenford \n",
" Chettithody Shamshuddin \n",
" 11414 \n",
" 155 \n",
" \n",
" \n",
" 755 \n",
" 11415 \n",
" IPL-2019 \n",
" Hyderabad \n",
" 12-05-2019 \n",
" Mumbai Indians \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" bat \n",
" normal \n",
" 0 \n",
" Mumbai Indians \n",
" 1 \n",
" 0 \n",
" JJ Bumrah \n",
" Rajiv Gandhi Intl. Cricket Stadium \n",
" Nitin Menon \n",
" Ian Gould \n",
" Nigel Llong \n",
" 11415 \n",
" 152 \n",
" \n",
" \n",
"
\n",
"
756 rows × 20 columns
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "match_df",
"summary": "{\n \"name\": \"match_df\",\n \"rows\": 756,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3464,\n \"min\": 1,\n \"max\": 11415,\n \"num_unique_values\": 756,\n \"samples\": [\n 409,\n 98,\n 425\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Season\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 12,\n \"samples\": [\n \"IPL-2018\",\n \"IPL-2016\",\n \"IPL-2017\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"city\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 32,\n \"samples\": [\n \"Sharjah\",\n \"Centurion\",\n \"Kochi\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"date\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 546,\n \"samples\": [\n \"26-05-2013\",\n \"20-05-2008\",\n \"10-04-2015\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"team1\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"Rajasthan Royals\",\n \"Kochi Tuskers Kerala\",\n \"Sunrisers Hyderabad\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"team2\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"Chennai Super Kings\",\n \"Pune Warriors\",\n \"Royal Challengers Bangalore\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"toss_winner\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"Rajasthan Royals\",\n \"Kochi Tuskers Kerala\",\n \"Royal Challengers Bangalore\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"toss_decision\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"bat\",\n \"field\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"result\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"normal\",\n \"tie\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"dl_applied\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"winner\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"Rajasthan Royals\",\n \"Pune Warriors\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"win_by_runs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 23,\n \"min\": 0,\n \"max\": 146,\n \"num_unique_values\": 89,\n \"samples\": [\n 53,\n 40\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"win_by_wickets\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 0,\n \"max\": 10,\n \"num_unique_values\": 11,\n \"samples\": [\n 4,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"player_of_match\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 226,\n \"samples\": [\n \"JJ Bumrah\",\n \"MA Agarwal\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"venue\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 41,\n \"samples\": [\n \"Barabati Stadium\",\n \"Dr DY Patil Sports Academy\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"umpire1\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 61,\n \"samples\": [\n \"AY Dandekar\",\n \"KN Ananthapadmanabhan\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"umpire2\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 65,\n \"samples\": [\n \"O Nandan\",\n \"Nanda Kishore\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"umpire3\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 25,\n \"samples\": [\n \"Chris Gaffaney\",\n \"Marais Erasmus\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"match_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3464,\n \"min\": 1,\n \"max\": 11415,\n \"num_unique_values\": 756,\n \"samples\": [\n 409,\n 98\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_runs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 30,\n \"min\": 56,\n \"max\": 263,\n \"num_unique_values\": 150,\n \"samples\": [\n 175,\n 159\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 16
}
],
"source": [
"match_df"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "46d110b1",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "46d110b1",
"outputId": "a1a1e3a1-a75c-42f6-b686-23e51b1a656b"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array(['Sunrisers Hyderabad', 'Mumbai Indians', 'Gujarat Lions',\n",
" 'Rising Pune Supergiant', 'Royal Challengers Bangalore',\n",
" 'Kolkata Knight Riders', 'Delhi Daredevils', 'Kings XI Punjab',\n",
" 'Chennai Super Kings', 'Rajasthan Royals', 'Deccan Chargers',\n",
" 'Kochi Tuskers Kerala', 'Pune Warriors', 'Rising Pune Supergiants',\n",
" 'Delhi Capitals'], dtype=object)"
]
},
"metadata": {},
"execution_count": 17
}
],
"source": [
"match_df['team1'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "9f048dbf",
"metadata": {
"id": "9f048dbf"
},
"outputs": [],
"source": [
"teams = [\n",
" 'Sunrisers Hyderabad',\n",
" 'Mumbai Indians',\n",
" 'Royal Challengers Bangalore',\n",
" 'Kolkata Knight Riders',\n",
" 'Kings XI Punjab',\n",
" 'Chennai Super Kings',\n",
" 'Rajasthan Royals',\n",
" 'Delhi Capitals'\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "4ca212ee",
"metadata": {
"id": "4ca212ee"
},
"outputs": [],
"source": [
"match_df['team1'] = match_df['team1'].str.replace('Delhi Daredevils','Delhi Capitals')\n",
"match_df['team2'] = match_df['team2'].str.replace('Delhi Daredevils','Delhi Capitals')\n",
"\n",
"match_df['team1'] = match_df['team1'].str.replace('Deccan Chargers','Sunrisers Hyderabad')\n",
"match_df['team2'] = match_df['team2'].str.replace('Deccan Chargers','Sunrisers Hyderabad')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "ec3d2992",
"metadata": {
"id": "ec3d2992"
},
"outputs": [],
"source": [
"match_df = match_df[match_df['team1'].isin(teams)]\n",
"match_df = match_df[match_df['team2'].isin(teams)]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "456148f0",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "456148f0",
"outputId": "46a14d66-2a98-4124-d3e4-9a8f1466f640"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(641, 20)"
]
},
"metadata": {},
"execution_count": 21
}
],
"source": [
"match_df.shape"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "82af99c7",
"metadata": {
"id": "82af99c7"
},
"outputs": [],
"source": [
"# Check if the column exists before trying to filter on it.\n",
"if 'dl_applied' in match_df.columns:\n",
" match_df = match_df[match_df['dl_applied'] == 0]\n",
"else:\n",
" print(\"Column 'dl_applied' not found in the DataFrame.\")"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "bb7e68ce",
"metadata": {
"id": "bb7e68ce"
},
"outputs": [],
"source": [
"match_df = match_df[['match_id','city','winner','total_runs']]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "cfa8b802",
"metadata": {
"id": "cfa8b802"
},
"outputs": [],
"source": [
"delivery_df = match_df.merge(delivery,on='match_id')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "bb9e3301",
"metadata": {
"id": "bb9e3301"
},
"outputs": [],
"source": [
"delivery_df = delivery_df[delivery_df['inning'] == 2]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "ed062c89",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 704
},
"id": "ed062c89",
"outputId": "dda09727-05b3-43bb-d849-a77f9c992733"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" match_id city winner total_runs_x inning \\\n",
"125 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"126 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"127 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"128 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"129 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"... ... ... ... ... ... \n",
"149573 11415 Hyderabad Mumbai Indians 152 2 \n",
"149574 11415 Hyderabad Mumbai Indians 152 2 \n",
"149575 11415 Hyderabad Mumbai Indians 152 2 \n",
"149576 11415 Hyderabad Mumbai Indians 152 2 \n",
"149577 11415 Hyderabad Mumbai Indians 152 2 \n",
"\n",
" batting_team bowling_team over ball \\\n",
"125 Royal Challengers Bangalore Sunrisers Hyderabad 1 1 \n",
"126 Royal Challengers Bangalore Sunrisers Hyderabad 1 2 \n",
"127 Royal Challengers Bangalore Sunrisers Hyderabad 1 3 \n",
"128 Royal Challengers Bangalore Sunrisers Hyderabad 1 4 \n",
"129 Royal Challengers Bangalore Sunrisers Hyderabad 1 5 \n",
"... ... ... ... ... \n",
"149573 Chennai Super Kings Mumbai Indians 20 2 \n",
"149574 Chennai Super Kings Mumbai Indians 20 3 \n",
"149575 Chennai Super Kings Mumbai Indians 20 4 \n",
"149576 Chennai Super Kings Mumbai Indians 20 5 \n",
"149577 Chennai Super Kings Mumbai Indians 20 6 \n",
"\n",
" batsman ... bye_runs legbye_runs noball_runs penalty_runs \\\n",
"125 CH Gayle ... 0 0 0 0 \n",
"126 Mandeep Singh ... 0 0 0 0 \n",
"127 Mandeep Singh ... 0 0 0 0 \n",
"128 Mandeep Singh ... 0 0 0 0 \n",
"129 Mandeep Singh ... 0 0 0 0 \n",
"... ... ... ... ... ... ... \n",
"149573 RA Jadeja ... 0 0 0 0 \n",
"149574 SR Watson ... 0 0 0 0 \n",
"149575 SR Watson ... 0 0 0 0 \n",
"149576 SN Thakur ... 0 0 0 0 \n",
"149577 SN Thakur ... 0 0 0 0 \n",
"\n",
" batsman_runs extra_runs total_runs_y player_dismissed \\\n",
"125 1 0 1 NaN \n",
"126 0 0 0 NaN \n",
"127 0 0 0 NaN \n",
"128 2 0 2 NaN \n",
"129 4 0 4 NaN \n",
"... ... ... ... ... \n",
"149573 1 0 1 NaN \n",
"149574 2 0 2 NaN \n",
"149575 1 0 1 SR Watson \n",
"149576 2 0 2 NaN \n",
"149577 0 0 0 SN Thakur \n",
"\n",
" dismissal_kind fielder \n",
"125 NaN NaN \n",
"126 NaN NaN \n",
"127 NaN NaN \n",
"128 NaN NaN \n",
"129 NaN NaN \n",
"... ... ... \n",
"149573 NaN NaN \n",
"149574 NaN NaN \n",
"149575 run out KH Pandya \n",
"149576 NaN NaN \n",
"149577 lbw NaN \n",
"\n",
"[72413 rows x 24 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" match_id \n",
" city \n",
" winner \n",
" total_runs_x \n",
" inning \n",
" batting_team \n",
" bowling_team \n",
" over \n",
" ball \n",
" batsman \n",
" ... \n",
" bye_runs \n",
" legbye_runs \n",
" noball_runs \n",
" penalty_runs \n",
" batsman_runs \n",
" extra_runs \n",
" total_runs_y \n",
" player_dismissed \n",
" dismissal_kind \n",
" fielder \n",
" \n",
" \n",
" \n",
" \n",
" 125 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 1 \n",
" CH Gayle \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 126 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 2 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 127 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 3 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 128 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 4 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 2 \n",
" 0 \n",
" 2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 129 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 5 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 4 \n",
" 0 \n",
" 4 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 149573 \n",
" 11415 \n",
" Hyderabad \n",
" Mumbai Indians \n",
" 152 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 2 \n",
" RA Jadeja \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 149574 \n",
" 11415 \n",
" Hyderabad \n",
" Mumbai Indians \n",
" 152 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 3 \n",
" SR Watson \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 2 \n",
" 0 \n",
" 2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 149575 \n",
" 11415 \n",
" Hyderabad \n",
" Mumbai Indians \n",
" 152 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 4 \n",
" SR Watson \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" SR Watson \n",
" run out \n",
" KH Pandya \n",
" \n",
" \n",
" 149576 \n",
" 11415 \n",
" Hyderabad \n",
" Mumbai Indians \n",
" 152 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 5 \n",
" SN Thakur \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 2 \n",
" 0 \n",
" 2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 149577 \n",
" 11415 \n",
" Hyderabad \n",
" Mumbai Indians \n",
" 152 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 6 \n",
" SN Thakur \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" SN Thakur \n",
" lbw \n",
" NaN \n",
" \n",
" \n",
"
\n",
"
72413 rows × 24 columns
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "delivery_df"
}
},
"metadata": {},
"execution_count": 26
}
],
"source": [
"delivery_df"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "3a2aed14",
"metadata": {
"id": "3a2aed14"
},
"outputs": [],
"source": [
"# Convert 'total_runs_y' to a numeric type (e.g., integer) before applying cumsum\n",
"delivery_df['total_runs_y'] = delivery_df['total_runs_y'].astype(int) # Or float if needed\n",
"\n",
"# Now calculate the cumulative sum\n",
"delivery_df['current_score'] = delivery_df.groupby('match_id')['total_runs_y'].cumsum()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "a37ab264",
"metadata": {
"id": "a37ab264"
},
"outputs": [],
"source": [
"delivery_df['runs_left'] = delivery_df['total_runs_x'] - delivery_df['current_score']"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "91142ecc",
"metadata": {
"id": "91142ecc"
},
"outputs": [],
"source": [
"delivery_df['balls_left'] = 126 - (delivery_df['over']*6 + delivery_df['ball'])"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "e49251b7",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 704
},
"id": "e49251b7",
"outputId": "351c5980-6210-49fb-9e22-7741392e52a1"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" match_id city winner total_runs_x inning \\\n",
"125 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"126 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"127 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"128 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"129 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"... ... ... ... ... ... \n",
"149573 11415 Hyderabad Mumbai Indians 152 2 \n",
"149574 11415 Hyderabad Mumbai Indians 152 2 \n",
"149575 11415 Hyderabad Mumbai Indians 152 2 \n",
"149576 11415 Hyderabad Mumbai Indians 152 2 \n",
"149577 11415 Hyderabad Mumbai Indians 152 2 \n",
"\n",
" batting_team bowling_team over ball \\\n",
"125 Royal Challengers Bangalore Sunrisers Hyderabad 1 1 \n",
"126 Royal Challengers Bangalore Sunrisers Hyderabad 1 2 \n",
"127 Royal Challengers Bangalore Sunrisers Hyderabad 1 3 \n",
"128 Royal Challengers Bangalore Sunrisers Hyderabad 1 4 \n",
"129 Royal Challengers Bangalore Sunrisers Hyderabad 1 5 \n",
"... ... ... ... ... \n",
"149573 Chennai Super Kings Mumbai Indians 20 2 \n",
"149574 Chennai Super Kings Mumbai Indians 20 3 \n",
"149575 Chennai Super Kings Mumbai Indians 20 4 \n",
"149576 Chennai Super Kings Mumbai Indians 20 5 \n",
"149577 Chennai Super Kings Mumbai Indians 20 6 \n",
"\n",
" batsman ... penalty_runs batsman_runs extra_runs \\\n",
"125 CH Gayle ... 0 1 0 \n",
"126 Mandeep Singh ... 0 0 0 \n",
"127 Mandeep Singh ... 0 0 0 \n",
"128 Mandeep Singh ... 0 2 0 \n",
"129 Mandeep Singh ... 0 4 0 \n",
"... ... ... ... ... ... \n",
"149573 RA Jadeja ... 0 1 0 \n",
"149574 SR Watson ... 0 2 0 \n",
"149575 SR Watson ... 0 1 0 \n",
"149576 SN Thakur ... 0 2 0 \n",
"149577 SN Thakur ... 0 0 0 \n",
"\n",
" total_runs_y player_dismissed dismissal_kind fielder \\\n",
"125 1 NaN NaN NaN \n",
"126 0 NaN NaN NaN \n",
"127 0 NaN NaN NaN \n",
"128 2 NaN NaN NaN \n",
"129 4 NaN NaN NaN \n",
"... ... ... ... ... \n",
"149573 1 NaN NaN NaN \n",
"149574 2 NaN NaN NaN \n",
"149575 1 SR Watson run out KH Pandya \n",
"149576 2 NaN NaN NaN \n",
"149577 0 SN Thakur lbw NaN \n",
"\n",
" current_score runs_left balls_left \n",
"125 1 206 119 \n",
"126 1 206 118 \n",
"127 1 206 117 \n",
"128 3 204 116 \n",
"129 7 200 115 \n",
"... ... ... ... \n",
"149573 152 0 4 \n",
"149574 154 -2 3 \n",
"149575 155 -3 2 \n",
"149576 157 -5 1 \n",
"149577 157 -5 0 \n",
"\n",
"[72413 rows x 27 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" match_id \n",
" city \n",
" winner \n",
" total_runs_x \n",
" inning \n",
" batting_team \n",
" bowling_team \n",
" over \n",
" ball \n",
" batsman \n",
" ... \n",
" penalty_runs \n",
" batsman_runs \n",
" extra_runs \n",
" total_runs_y \n",
" player_dismissed \n",
" dismissal_kind \n",
" fielder \n",
" current_score \n",
" runs_left \n",
" balls_left \n",
" \n",
" \n",
" \n",
" \n",
" 125 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 1 \n",
" CH Gayle \n",
" ... \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" 1 \n",
" 206 \n",
" 119 \n",
" \n",
" \n",
" 126 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 2 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" 1 \n",
" 206 \n",
" 118 \n",
" \n",
" \n",
" 127 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 3 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" 1 \n",
" 206 \n",
" 117 \n",
" \n",
" \n",
" 128 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 4 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 2 \n",
" 0 \n",
" 2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" 3 \n",
" 204 \n",
" 116 \n",
" \n",
" \n",
" 129 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 5 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 4 \n",
" 0 \n",
" 4 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" 7 \n",
" 200 \n",
" 115 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 149573 \n",
" 11415 \n",
" Hyderabad \n",
" Mumbai Indians \n",
" 152 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 2 \n",
" RA Jadeja \n",
" ... \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" 152 \n",
" 0 \n",
" 4 \n",
" \n",
" \n",
" 149574 \n",
" 11415 \n",
" Hyderabad \n",
" Mumbai Indians \n",
" 152 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 3 \n",
" SR Watson \n",
" ... \n",
" 0 \n",
" 2 \n",
" 0 \n",
" 2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" 154 \n",
" -2 \n",
" 3 \n",
" \n",
" \n",
" 149575 \n",
" 11415 \n",
" Hyderabad \n",
" Mumbai Indians \n",
" 152 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 4 \n",
" SR Watson \n",
" ... \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" SR Watson \n",
" run out \n",
" KH Pandya \n",
" 155 \n",
" -3 \n",
" 2 \n",
" \n",
" \n",
" 149576 \n",
" 11415 \n",
" Hyderabad \n",
" Mumbai Indians \n",
" 152 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 5 \n",
" SN Thakur \n",
" ... \n",
" 0 \n",
" 2 \n",
" 0 \n",
" 2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" 157 \n",
" -5 \n",
" 1 \n",
" \n",
" \n",
" 149577 \n",
" 11415 \n",
" Hyderabad \n",
" Mumbai Indians \n",
" 152 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 6 \n",
" SN Thakur \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" SN Thakur \n",
" lbw \n",
" NaN \n",
" 157 \n",
" -5 \n",
" 0 \n",
" \n",
" \n",
"
\n",
"
72413 rows × 27 columns
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "delivery_df"
}
},
"metadata": {},
"execution_count": 30
}
],
"source": [
"delivery_df"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "5ee97c37",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 704
},
"id": "5ee97c37",
"outputId": "5499b493-3e92-4453-beca-0ddd62f0e1f1"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" match_id inning batting_team bowling_team \\\n",
"0 1 1 Sunrisers Hyderabad Royal Challengers Bangalore \n",
"1 1 1 Sunrisers Hyderabad Royal Challengers Bangalore \n",
"2 1 1 Sunrisers Hyderabad Royal Challengers Bangalore \n",
"3 1 1 Sunrisers Hyderabad Royal Challengers Bangalore \n",
"4 1 1 Sunrisers Hyderabad Royal Challengers Bangalore \n",
"... ... ... ... ... \n",
"179073 11415 2 Chennai Super Kings Mumbai Indians \n",
"179074 11415 2 Chennai Super Kings Mumbai Indians \n",
"179075 11415 2 Chennai Super Kings Mumbai Indians \n",
"179076 11415 2 Chennai Super Kings Mumbai Indians \n",
"179077 11415 2 Chennai Super Kings Mumbai Indians \n",
"\n",
" over ball batsman non_striker bowler is_super_over ... \\\n",
"0 1 1 DA Warner S Dhawan TS Mills 0 ... \n",
"1 1 2 DA Warner S Dhawan TS Mills 0 ... \n",
"2 1 3 DA Warner S Dhawan TS Mills 0 ... \n",
"3 1 4 DA Warner S Dhawan TS Mills 0 ... \n",
"4 1 5 DA Warner S Dhawan TS Mills 0 ... \n",
"... ... ... ... ... ... ... ... \n",
"179073 20 2 RA Jadeja SR Watson SL Malinga 0 ... \n",
"179074 20 3 SR Watson RA Jadeja SL Malinga 0 ... \n",
"179075 20 4 SR Watson RA Jadeja SL Malinga 0 ... \n",
"179076 20 5 SN Thakur RA Jadeja SL Malinga 0 ... \n",
"179077 20 6 SN Thakur RA Jadeja SL Malinga 0 ... \n",
"\n",
" bye_runs legbye_runs noball_runs penalty_runs batsman_runs \\\n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 4 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"... ... ... ... ... ... \n",
"179073 0 0 0 0 1 \n",
"179074 0 0 0 0 2 \n",
"179075 0 0 0 0 1 \n",
"179076 0 0 0 0 2 \n",
"179077 0 0 0 0 0 \n",
"\n",
" extra_runs total_runs player_dismissed dismissal_kind fielder \n",
"0 0 0 NaN NaN NaN \n",
"1 0 0 NaN NaN NaN \n",
"2 0 4 NaN NaN NaN \n",
"3 0 0 NaN NaN NaN \n",
"4 2 2 NaN NaN NaN \n",
"... ... ... ... ... ... \n",
"179073 0 1 NaN NaN NaN \n",
"179074 0 2 NaN NaN NaN \n",
"179075 0 1 SR Watson run out KH Pandya \n",
"179076 0 2 NaN NaN NaN \n",
"179077 0 0 SN Thakur lbw NaN \n",
"\n",
"[179078 rows x 21 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" match_id \n",
" inning \n",
" batting_team \n",
" bowling_team \n",
" over \n",
" ball \n",
" batsman \n",
" non_striker \n",
" bowler \n",
" is_super_over \n",
" ... \n",
" bye_runs \n",
" legbye_runs \n",
" noball_runs \n",
" penalty_runs \n",
" batsman_runs \n",
" extra_runs \n",
" total_runs \n",
" player_dismissed \n",
" dismissal_kind \n",
" fielder \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 1 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" 1 \n",
" 1 \n",
" DA Warner \n",
" S Dhawan \n",
" TS Mills \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 1 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" 1 \n",
" 2 \n",
" DA Warner \n",
" S Dhawan \n",
" TS Mills \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 2 \n",
" 1 \n",
" 1 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" 1 \n",
" 3 \n",
" DA Warner \n",
" S Dhawan \n",
" TS Mills \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 4 \n",
" 0 \n",
" 4 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 3 \n",
" 1 \n",
" 1 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" 1 \n",
" 4 \n",
" DA Warner \n",
" S Dhawan \n",
" TS Mills \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 4 \n",
" 1 \n",
" 1 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" 1 \n",
" 5 \n",
" DA Warner \n",
" S Dhawan \n",
" TS Mills \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 2 \n",
" 2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 179073 \n",
" 11415 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 2 \n",
" RA Jadeja \n",
" SR Watson \n",
" SL Malinga \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 179074 \n",
" 11415 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 3 \n",
" SR Watson \n",
" RA Jadeja \n",
" SL Malinga \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 2 \n",
" 0 \n",
" 2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 179075 \n",
" 11415 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 4 \n",
" SR Watson \n",
" RA Jadeja \n",
" SL Malinga \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" SR Watson \n",
" run out \n",
" KH Pandya \n",
" \n",
" \n",
" 179076 \n",
" 11415 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 5 \n",
" SN Thakur \n",
" RA Jadeja \n",
" SL Malinga \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 2 \n",
" 0 \n",
" 2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 179077 \n",
" 11415 \n",
" 2 \n",
" Chennai Super Kings \n",
" Mumbai Indians \n",
" 20 \n",
" 6 \n",
" SN Thakur \n",
" RA Jadeja \n",
" SL Malinga \n",
" 0 \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" SN Thakur \n",
" lbw \n",
" NaN \n",
" \n",
" \n",
"
\n",
"
179078 rows × 21 columns
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "delivery"
}
},
"metadata": {},
"execution_count": 31
}
],
"source": [
"delivery_df['player_dismissed'] = delivery_df['player_dismissed'].fillna(\"0\")\n",
"delivery_df['player_dismissed'] = delivery_df['player_dismissed'].apply(lambda x:x if x == \"0\" else \"1\")\n",
"delivery_df['player_dismissed'] = delivery_df['player_dismissed'].astype('int') # Ensure conversion to integer\n",
"wickets = delivery_df.groupby('match_id')['player_dismissed'].cumsum() # Remove .values, cumsum already returns a Series\n",
"delivery"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "030b9c43",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 429
},
"id": "030b9c43",
"outputId": "6bc62058-c398-43cb-aca4-b15850608469"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" match_id city winner total_runs_x inning \\\n",
"125 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"126 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"127 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"128 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"129 1 Hyderabad Sunrisers Hyderabad 207 2 \n",
"\n",
" batting_team bowling_team over ball \\\n",
"125 Royal Challengers Bangalore Sunrisers Hyderabad 1 1 \n",
"126 Royal Challengers Bangalore Sunrisers Hyderabad 1 2 \n",
"127 Royal Challengers Bangalore Sunrisers Hyderabad 1 3 \n",
"128 Royal Challengers Bangalore Sunrisers Hyderabad 1 4 \n",
"129 Royal Challengers Bangalore Sunrisers Hyderabad 1 5 \n",
"\n",
" batsman ... penalty_runs batsman_runs extra_runs total_runs_y \\\n",
"125 CH Gayle ... 0 1 0 1 \n",
"126 Mandeep Singh ... 0 0 0 0 \n",
"127 Mandeep Singh ... 0 0 0 0 \n",
"128 Mandeep Singh ... 0 2 0 2 \n",
"129 Mandeep Singh ... 0 4 0 4 \n",
"\n",
" player_dismissed dismissal_kind fielder current_score runs_left \\\n",
"125 0 NaN NaN 1 206 \n",
"126 0 NaN NaN 1 206 \n",
"127 0 NaN NaN 1 206 \n",
"128 0 NaN NaN 3 204 \n",
"129 0 NaN NaN 7 200 \n",
"\n",
" balls_left \n",
"125 119 \n",
"126 118 \n",
"127 117 \n",
"128 116 \n",
"129 115 \n",
"\n",
"[5 rows x 27 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" match_id \n",
" city \n",
" winner \n",
" total_runs_x \n",
" inning \n",
" batting_team \n",
" bowling_team \n",
" over \n",
" ball \n",
" batsman \n",
" ... \n",
" penalty_runs \n",
" batsman_runs \n",
" extra_runs \n",
" total_runs_y \n",
" player_dismissed \n",
" dismissal_kind \n",
" fielder \n",
" current_score \n",
" runs_left \n",
" balls_left \n",
" \n",
" \n",
" \n",
" \n",
" 125 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 1 \n",
" CH Gayle \n",
" ... \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" 1 \n",
" 206 \n",
" 119 \n",
" \n",
" \n",
" 126 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 2 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" 1 \n",
" 206 \n",
" 118 \n",
" \n",
" \n",
" 127 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 3 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" 1 \n",
" 206 \n",
" 117 \n",
" \n",
" \n",
" 128 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 4 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 2 \n",
" 0 \n",
" 2 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" 3 \n",
" 204 \n",
" 116 \n",
" \n",
" \n",
" 129 \n",
" 1 \n",
" Hyderabad \n",
" Sunrisers Hyderabad \n",
" 207 \n",
" 2 \n",
" Royal Challengers Bangalore \n",
" Sunrisers Hyderabad \n",
" 1 \n",
" 5 \n",
" Mandeep Singh \n",
" ... \n",
" 0 \n",
" 4 \n",
" 0 \n",
" 4 \n",
" 0 \n",
" NaN \n",
" NaN \n",
" 7 \n",
" 200 \n",
" 115 \n",
" \n",
" \n",
"
\n",
"
5 rows × 27 columns
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "delivery_df"
}
},
"metadata": {},
"execution_count": 32
}
],
"source": [
"delivery_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "f9fe60c7",
"metadata": {
"id": "f9fe60c7"
},
"outputs": [],
"source": [
"# crr = runs/overs\n",
"delivery_df['crr'] = (delivery_df['current_score']*6)/(120 - delivery_df['balls_left'])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "7d484dea",
"metadata": {
"id": "7d484dea"
},
"outputs": [],
"source": [
"delivery_df['rrr'] = (delivery_df['runs_left']*6)/delivery_df['balls_left']"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "730c19d4",
"metadata": {
"id": "730c19d4"
},
"outputs": [],
"source": [
"def result(row):\n",
" return 1 if row['batting_team'] == row['winner'] else 0"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "a49caf70",
"metadata": {
"id": "a49caf70"
},
"outputs": [],
"source": [
"delivery_df['result'] = delivery_df.apply(result,axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "2999909b",
"metadata": {
"id": "2999909b"
},
"outputs": [],
"source": [
"# Add the 'wickets' Series to the delivery_df DataFrame\n",
"delivery_df['wickets'] = wickets\n",
"\n",
"# Now create the final DataFrame\n",
"final_df = delivery_df[['batting_team','bowling_team','city','runs_left','balls_left','wickets','total_runs_x','crr','rrr','result']]"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "fb242ffd",
"metadata": {
"id": "fb242ffd"
},
"outputs": [],
"source": [
"final_df = final_df.sample(final_df.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "3dc0b91d",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 118
},
"id": "3dc0b91d",
"outputId": "908b84ce-4ebe-414a-fef9-f4bf10f007df"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" batting_team bowling_team city runs_left \\\n",
"135691 Kolkata Knight Riders Sunrisers Hyderabad Kolkata 51 \n",
"\n",
" balls_left wickets total_runs_x crr rrr result \n",
"135691 17 4 189 8.038835 18.0 1 "
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" batting_team \n",
" bowling_team \n",
" city \n",
" runs_left \n",
" balls_left \n",
" wickets \n",
" total_runs_x \n",
" crr \n",
" rrr \n",
" result \n",
" \n",
" \n",
" \n",
" \n",
" 135691 \n",
" Kolkata Knight Riders \n",
" Sunrisers Hyderabad \n",
" Kolkata \n",
" 51 \n",
" 17 \n",
" 4 \n",
" 189 \n",
" 8.038835 \n",
" 18.0 \n",
" 1 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"summary": "{\n \"name\": \"final_df\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"batting_team\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Kolkata Knight Riders\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bowling_team\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Sunrisers Hyderabad\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"city\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Kolkata\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"runs_left\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 51,\n \"max\": 51,\n \"num_unique_values\": 1,\n \"samples\": [\n 51\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"balls_left\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 17,\n \"max\": 17,\n \"num_unique_values\": 1,\n \"samples\": [\n 17\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"wickets\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 4,\n \"max\": 4,\n \"num_unique_values\": 1,\n \"samples\": [\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_runs_x\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 189,\n \"max\": 189,\n \"num_unique_values\": 1,\n \"samples\": [\n 189\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"crr\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 8.03883495145631,\n \"max\": 8.03883495145631,\n \"num_unique_values\": 1,\n \"samples\": [\n 8.03883495145631\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rrr\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 18.0,\n \"max\": 18.0,\n \"num_unique_values\": 1,\n \"samples\": [\n 18.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"result\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 1,\n \"max\": 1,\n \"num_unique_values\": 1,\n \"samples\": [\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 39
}
],
"source": [
"final_df.sample()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "dfec0834",
"metadata": {
"id": "dfec0834"
},
"outputs": [],
"source": [
"final_df.dropna(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "bafcba9c",
"metadata": {
"id": "bafcba9c"
},
"outputs": [],
"source": [
"final_df = final_df[final_df['balls_left'] != 0]"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "54edf23b",
"metadata": {
"id": "54edf23b"
},
"outputs": [],
"source": [
"X = final_df.iloc[:,:-1]\n",
"y = final_df.iloc[:,-1]\n",
"from sklearn.model_selection import train_test_split\n",
"X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "3aa219a5",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 634
},
"id": "3aa219a5",
"outputId": "699970f0-9c51-465c-c3fe-a3b57cbed3af"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" batting_team bowling_team city \\\n",
"64488 Mumbai Indians Delhi Daredevils Delhi \n",
"85786 Mumbai Indians Chennai Super Kings Delhi \n",
"82269 Sunrisers Hyderabad Chennai Super Kings Hyderabad \n",
"65470 Mumbai Indians Deccan Chargers Mumbai \n",
"135896 Mumbai Indians Delhi Capitals Mumbai \n",
"... ... ... ... \n",
"41310 Deccan Chargers Mumbai Indians Mumbai \n",
"43512 Deccan Chargers Chennai Super Kings Nagpur \n",
"126242 Delhi Daredevils Kings XI Punjab Delhi \n",
"73409 Sunrisers Hyderabad Royal Challengers Bangalore Hyderabad \n",
"101346 Kings XI Punjab Rajasthan Royals Pune \n",
"\n",
" runs_left balls_left wickets total_runs_x crr rrr \n",
"64488 92 45 3 207 9.200000 12.266667 \n",
"85786 103 64 2 192 9.535714 9.656250 \n",
"82269 199 101 1 223 7.578947 11.821782 \n",
"65470 37 48 3 100 5.250000 4.625000 \n",
"135896 121 56 3 219 9.187500 12.964286 \n",
"... ... ... ... ... ... ... \n",
"41310 96 48 5 178 6.833333 12.000000 \n",
"43512 48 36 3 138 6.428571 8.000000 \n",
"126242 111 89 2 157 8.903226 7.483146 \n",
"73409 107 94 2 130 5.307692 6.829787 \n",
"101346 158 117 1 162 8.000000 8.102564 \n",
"\n",
"[57073 rows x 9 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" batting_team \n",
" bowling_team \n",
" city \n",
" runs_left \n",
" balls_left \n",
" wickets \n",
" total_runs_x \n",
" crr \n",
" rrr \n",
" \n",
" \n",
" \n",
" \n",
" 64488 \n",
" Mumbai Indians \n",
" Delhi Daredevils \n",
" Delhi \n",
" 92 \n",
" 45 \n",
" 3 \n",
" 207 \n",
" 9.200000 \n",
" 12.266667 \n",
" \n",
" \n",
" 85786 \n",
" Mumbai Indians \n",
" Chennai Super Kings \n",
" Delhi \n",
" 103 \n",
" 64 \n",
" 2 \n",
" 192 \n",
" 9.535714 \n",
" 9.656250 \n",
" \n",
" \n",
" 82269 \n",
" Sunrisers Hyderabad \n",
" Chennai Super Kings \n",
" Hyderabad \n",
" 199 \n",
" 101 \n",
" 1 \n",
" 223 \n",
" 7.578947 \n",
" 11.821782 \n",
" \n",
" \n",
" 65470 \n",
" Mumbai Indians \n",
" Deccan Chargers \n",
" Mumbai \n",
" 37 \n",
" 48 \n",
" 3 \n",
" 100 \n",
" 5.250000 \n",
" 4.625000 \n",
" \n",
" \n",
" 135896 \n",
" Mumbai Indians \n",
" Delhi Capitals \n",
" Mumbai \n",
" 121 \n",
" 56 \n",
" 3 \n",
" 219 \n",
" 9.187500 \n",
" 12.964286 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 41310 \n",
" Deccan Chargers \n",
" Mumbai Indians \n",
" Mumbai \n",
" 96 \n",
" 48 \n",
" 5 \n",
" 178 \n",
" 6.833333 \n",
" 12.000000 \n",
" \n",
" \n",
" 43512 \n",
" Deccan Chargers \n",
" Chennai Super Kings \n",
" Nagpur \n",
" 48 \n",
" 36 \n",
" 3 \n",
" 138 \n",
" 6.428571 \n",
" 8.000000 \n",
" \n",
" \n",
" 126242 \n",
" Delhi Daredevils \n",
" Kings XI Punjab \n",
" Delhi \n",
" 111 \n",
" 89 \n",
" 2 \n",
" 157 \n",
" 8.903226 \n",
" 7.483146 \n",
" \n",
" \n",
" 73409 \n",
" Sunrisers Hyderabad \n",
" Royal Challengers Bangalore \n",
" Hyderabad \n",
" 107 \n",
" 94 \n",
" 2 \n",
" 130 \n",
" 5.307692 \n",
" 6.829787 \n",
" \n",
" \n",
" 101346 \n",
" Kings XI Punjab \n",
" Rajasthan Royals \n",
" Pune \n",
" 158 \n",
" 117 \n",
" 1 \n",
" 162 \n",
" 8.000000 \n",
" 8.102564 \n",
" \n",
" \n",
"
\n",
"
57073 rows × 9 columns
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "X_train",
"summary": "{\n \"name\": \"X_train\",\n \"rows\": 57073,\n \"fields\": [\n {\n \"column\": \"batting_team\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Kolkata Knight Riders\",\n \"Sunrisers Hyderabad\",\n \"Rajasthan Royals\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bowling_team\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Royal Challengers Bangalore\",\n \"Chennai Super Kings\",\n \"Mumbai Indians\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"city\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 29,\n \"samples\": [\n \"Indore\",\n \"Ahmedabad\",\n \"Mohali\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"runs_left\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 50,\n \"min\": -16,\n \"max\": 248,\n \"num_unique_values\": 254,\n \"samples\": [\n 206,\n 117,\n 153\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"balls_left\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 33,\n \"min\": -2,\n \"max\": 119,\n \"num_unique_values\": 121,\n \"samples\": [\n 95,\n 16,\n 56\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"wickets\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 0,\n \"max\": 10,\n \"num_unique_values\": 11,\n \"samples\": [\n 0,\n 3,\n 9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_runs_x\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29,\n \"min\": 65,\n \"max\": 250,\n \"num_unique_values\": 142,\n \"samples\": [\n 232,\n 170,\n 95\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"crr\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.2750805067157467,\n \"min\": 0.0,\n \"max\": 42.0,\n \"num_unique_values\": 5313,\n \"samples\": [\n 8.225806451612904,\n 10.88659793814433,\n 4.645161290322581\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rrr\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.14860217304409,\n \"min\": -510.0,\n \"max\": 678.0,\n \"num_unique_values\": 8548,\n \"samples\": [\n 11.493975903614459,\n 29.47826086956522,\n 11.866666666666667\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 43
}
],
"source": [
"X_train"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "45c6fffa",
"metadata": {
"id": "45c6fffa"
},
"outputs": [],
"source": [
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"trf = ColumnTransformer([\n",
" ('trf',OneHotEncoder(sparse=False,drop='first'),['batting_team','bowling_team','city'])\n",
"]\n",
",remainder='passthrough')"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "9be108ac",
"metadata": {
"id": "9be108ac"
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.pipeline import Pipeline"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "92dfbfcb",
"metadata": {
"id": "92dfbfcb"
},
"outputs": [],
"source": [
"pipe = Pipeline(steps=[\n",
" ('step1',trf),\n",
" ('step2',LogisticRegression(solver='liblinear'))\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "12679868",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 245
},
"id": "12679868",
"outputId": "826f4fa5-fa60-4e0a-e2cb-f3079e381635"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/sklearn/preprocessing/_encoders.py:868: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n",
" warnings.warn(\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Pipeline(steps=[('step1',\n",
" ColumnTransformer(remainder='passthrough',\n",
" transformers=[('trf',\n",
" OneHotEncoder(drop='first',\n",
" sparse=False),\n",
" ['batting_team',\n",
" 'bowling_team', 'city'])])),\n",
" ('step2', LogisticRegression(solver='liblinear'))])"
],
"text/html": [
"Pipeline(steps=[('step1',\n",
" ColumnTransformer(remainder='passthrough',\n",
" transformers=[('trf',\n",
" OneHotEncoder(drop='first',\n",
" sparse=False),\n",
" ['batting_team',\n",
" 'bowling_team', 'city'])])),\n",
" ('step2', LogisticRegression(solver='liblinear'))]) In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. Pipeline Pipeline(steps=[('step1',\n",
" ColumnTransformer(remainder='passthrough',\n",
" transformers=[('trf',\n",
" OneHotEncoder(drop='first',\n",
" sparse=False),\n",
" ['batting_team',\n",
" 'bowling_team', 'city'])])),\n",
" ('step2', LogisticRegression(solver='liblinear'))]) "
]
},
"metadata": {},
"execution_count": 47
}
],
"source": [
"pipe.fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "cf3fde3b",
"metadata": {
"id": "cf3fde3b"
},
"outputs": [],
"source": [
"y_pred = pipe.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "b43ea121",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "b43ea121",
"outputId": "e60f3bba-4636-4914-b251-00f35e1a41e8"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.7987245076739785"
]
},
"metadata": {},
"execution_count": 49
}
],
"source": [
"from sklearn.metrics import accuracy_score\n",
"accuracy_score(y_test,y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "01205f46",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "01205f46",
"outputId": "ed532931-5c10-407b-d56c-4d77829869eb"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([0.11028987, 0.88971013])"
]
},
"metadata": {},
"execution_count": 50
}
],
"source": [
"pipe.predict_proba(X_test)[10]"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "cf6fbd69",
"metadata": {
"id": "cf6fbd69"
},
"outputs": [],
"source": [
"def match_summary(row):\n",
" print(\"Batting Team-\" + row['batting_team'] + \" | Bowling Team-\" + row['bowling_team'] + \" | Target- \" + str(row['total_runs_x']))\n",
""
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "41c62b45",
"metadata": {
"id": "41c62b45"
},
"outputs": [],
"source": [
"def match_progression(x_df,match_id,pipe):\n",
" match = x_df[x_df['match_id'] == match_id]\n",
" match = match[(match['ball'] == 6)]\n",
" temp_df = match[['batting_team','bowling_team','city','runs_left','balls_left','wickets','total_runs_x','crr','rrr']].dropna()\n",
" temp_df = temp_df[temp_df['balls_left'] != 0]\n",
" result = pipe.predict_proba(temp_df)\n",
" temp_df['lose'] = np.round(result.T[0]*100,1)\n",
" temp_df['win'] = np.round(result.T[1]*100,1)\n",
" temp_df['end_of_over'] = range(1,temp_df.shape[0]+1)\n",
"\n",
" target = temp_df['total_runs_x'].values[0]\n",
" runs = list(temp_df['runs_left'].values)\n",
" new_runs = runs[:]\n",
" runs.insert(0,target)\n",
" temp_df['runs_after_over'] = np.array(runs)[:-1] - np.array(new_runs)\n",
" wickets = list(temp_df['wickets'].values)\n",
" new_wickets = wickets[:]\n",
" new_wickets.insert(0,10)\n",
" wickets.append(0)\n",
" w = np.array(wickets)\n",
" nw = np.array(new_wickets)\n",
" temp_df['wickets_in_over'] = (nw - w)[0:temp_df.shape[0]]\n",
"\n",
" print(\"Target-\",target)\n",
" temp_df = temp_df[['end_of_over','runs_after_over','wickets_in_over','lose','win']]\n",
" return temp_df,target\n",
""
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "d3238e65",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 662
},
"id": "d3238e65",
"outputId": "3556213a-5988-4b94-e3e7-8a1fb26eec0b"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Target- 178\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" end_of_over runs_after_over wickets_in_over lose win\n",
"10459 1 4 10 57.4 42.6\n",
"10467 2 8 0 52.3 47.7\n",
"10473 3 1 0 59.2 40.8\n",
"10479 4 7 -1 70.7 29.3\n",
"10485 5 12 0 60.8 39.2\n",
"10491 6 13 0 48.5 51.5\n",
"10497 7 9 0 42.5 57.5\n",
"10505 8 15 0 28.4 71.6\n",
"10511 9 7 0 26.2 73.8\n",
"10518 10 17 0 14.3 85.7\n",
"10524 11 9 -1 19.9 80.1\n",
"10530 12 9 0 16.4 83.6\n",
"10536 13 8 0 14.1 85.9\n",
"10542 14 8 0 12.1 87.9\n",
"10548 15 5 -1 21.2 78.8\n",
"10555 16 8 -1 30.0 70.0\n",
"10561 17 8 -2 56.4 43.6\n",
"10567 18 6 -1 71.3 28.7\n",
"10573 19 8 -2 89.6 10.4"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" end_of_over \n",
" runs_after_over \n",
" wickets_in_over \n",
" lose \n",
" win \n",
" \n",
" \n",
" \n",
" \n",
" 10459 \n",
" 1 \n",
" 4 \n",
" 10 \n",
" 57.4 \n",
" 42.6 \n",
" \n",
" \n",
" 10467 \n",
" 2 \n",
" 8 \n",
" 0 \n",
" 52.3 \n",
" 47.7 \n",
" \n",
" \n",
" 10473 \n",
" 3 \n",
" 1 \n",
" 0 \n",
" 59.2 \n",
" 40.8 \n",
" \n",
" \n",
" 10479 \n",
" 4 \n",
" 7 \n",
" -1 \n",
" 70.7 \n",
" 29.3 \n",
" \n",
" \n",
" 10485 \n",
" 5 \n",
" 12 \n",
" 0 \n",
" 60.8 \n",
" 39.2 \n",
" \n",
" \n",
" 10491 \n",
" 6 \n",
" 13 \n",
" 0 \n",
" 48.5 \n",
" 51.5 \n",
" \n",
" \n",
" 10497 \n",
" 7 \n",
" 9 \n",
" 0 \n",
" 42.5 \n",
" 57.5 \n",
" \n",
" \n",
" 10505 \n",
" 8 \n",
" 15 \n",
" 0 \n",
" 28.4 \n",
" 71.6 \n",
" \n",
" \n",
" 10511 \n",
" 9 \n",
" 7 \n",
" 0 \n",
" 26.2 \n",
" 73.8 \n",
" \n",
" \n",
" 10518 \n",
" 10 \n",
" 17 \n",
" 0 \n",
" 14.3 \n",
" 85.7 \n",
" \n",
" \n",
" 10524 \n",
" 11 \n",
" 9 \n",
" -1 \n",
" 19.9 \n",
" 80.1 \n",
" \n",
" \n",
" 10530 \n",
" 12 \n",
" 9 \n",
" 0 \n",
" 16.4 \n",
" 83.6 \n",
" \n",
" \n",
" 10536 \n",
" 13 \n",
" 8 \n",
" 0 \n",
" 14.1 \n",
" 85.9 \n",
" \n",
" \n",
" 10542 \n",
" 14 \n",
" 8 \n",
" 0 \n",
" 12.1 \n",
" 87.9 \n",
" \n",
" \n",
" 10548 \n",
" 15 \n",
" 5 \n",
" -1 \n",
" 21.2 \n",
" 78.8 \n",
" \n",
" \n",
" 10555 \n",
" 16 \n",
" 8 \n",
" -1 \n",
" 30.0 \n",
" 70.0 \n",
" \n",
" \n",
" 10561 \n",
" 17 \n",
" 8 \n",
" -2 \n",
" 56.4 \n",
" 43.6 \n",
" \n",
" \n",
" 10567 \n",
" 18 \n",
" 6 \n",
" -1 \n",
" 71.3 \n",
" 28.7 \n",
" \n",
" \n",
" 10573 \n",
" 19 \n",
" 8 \n",
" -2 \n",
" 89.6 \n",
" 10.4 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "temp_df",
"summary": "{\n \"name\": \"temp_df\",\n \"rows\": 19,\n \"fields\": [\n {\n \"column\": \"end_of_over\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5,\n \"min\": 1,\n \"max\": 19,\n \"num_unique_values\": 19,\n \"samples\": [\n 1,\n 6,\n 12\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"runs_after_over\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 17,\n \"num_unique_values\": 11,\n \"samples\": [\n 13,\n 4,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"wickets_in_over\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": -2,\n \"max\": 10,\n \"num_unique_values\": 4,\n \"samples\": [\n 0,\n -2,\n 10\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"lose\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 23.312713947092202,\n \"min\": 12.1,\n \"max\": 89.6,\n \"num_unique_values\": 19,\n \"samples\": [\n 57.4,\n 48.5,\n 16.4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"win\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 23.312713947092202,\n \"min\": 10.4,\n \"max\": 87.9,\n \"num_unique_values\": 19,\n \"samples\": [\n 42.6,\n 51.5,\n 83.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 53
}
],
"source": [
"temp_df,target = match_progression(delivery_df,74,pipe)\n",
"temp_df"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "256b9c2d",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 351
},
"id": "256b9c2d",
"outputId": "3831dfb2-e24c-4f7c-81bc-25406b42a8cc"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Target-178')"
]
},
"metadata": {},
"execution_count": 54
},
{
"output_type": "display_data",
"data": {
"text/plain": [
""
],
"image/png": "\n"
},
"metadata": {}
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"plt.figure(figsize=(18,8))\n",
"plt.plot(temp_df['end_of_over'],temp_df['wickets_in_over'],color='yellow',linewidth=3)\n",
"plt.plot(temp_df['end_of_over'],temp_df['win'],color='#00a65a',linewidth=4)\n",
"plt.plot(temp_df['end_of_over'],temp_df['lose'],color='red',linewidth=4)\n",
"plt.bar(temp_df['end_of_over'],temp_df['runs_after_over'])\n",
"plt.title('Target-' + str(target))"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "5731378e",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5731378e",
"outputId": "60891b1a-b758-4485-b7f9-82e014c18491"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['Sunrisers Hyderabad',\n",
" 'Mumbai Indians',\n",
" 'Royal Challengers Bangalore',\n",
" 'Kolkata Knight Riders',\n",
" 'Kings XI Punjab',\n",
" 'Chennai Super Kings',\n",
" 'Rajasthan Royals',\n",
" 'Delhi Capitals']"
]
},
"metadata": {},
"execution_count": 55
}
],
"source": [
"teams"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "fb7e305d",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fb7e305d",
"outputId": "b9540df9-3c1d-4746-83d3-60a0d3403299"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array(['Hyderabad', 'Bangalore', 'Mumbai', 'Indore', 'Kolkata', 'Delhi',\n",
" 'Chandigarh', 'Jaipur', 'Chennai', 'Cape Town', 'Port Elizabeth',\n",
" 'Durban', 'Centurion', 'East London', 'Johannesburg', 'Kimberley',\n",
" 'Bloemfontein', 'Ahmedabad', 'Cuttack', 'Nagpur', 'Dharamsala',\n",
" 'Visakhapatnam', 'Pune', 'Raipur', 'Ranchi', 'Abu Dhabi',\n",
" 'Sharjah', nan, 'Mohali', 'Bengaluru'], dtype=object)"
]
},
"metadata": {},
"execution_count": 56
}
],
"source": [
"delivery_df['city'].unique()"
]
},
{
"cell_type": "code",
"source": [
"!pip install category_encoders\n",
"import category_encoders as ce\n",
"\n",
"def transform_new_data(new_data):\n",
" # Assuming you want to use a specific encoder like OneHotEncoder\n",
" encoder = ce.OneHotEncoder(cols=['city']) # Replace 'city' with the actual column name\n",
" encoder.fit(new_data) # Fit the encoder to your data\n",
" transformed_data = encoder.transform(new_data)\n",
" return transformed_data\n",
"\n",
"# Example of transforming new data\n",
"new_data = pd.DataFrame({'city': ['Abu Dhabi', 'New York']})\n",
"transformed_new_data = transform_new_data(new_data)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7nDHbpX3qugB",
"outputId": "eecf14d8-d853-4cf8-cceb-3cfed53a5b15"
},
"id": "7nDHbpX3qugB",
"execution_count": 57,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: category_encoders in /usr/local/lib/python3.10/dist-packages (2.6.3)\n",
"Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (1.22.0)\n",
"Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (1.2.2)\n",
"Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (1.11.4)\n",
"Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (0.14.2)\n",
"Requirement already satisfied: pandas>=1.0.5 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (1.5.3)\n",
"Requirement already satisfied: patsy>=0.5.1 in /usr/local/lib/python3.10/dist-packages (from category_encoders) (0.5.6)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.5->category_encoders) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.5->category_encoders) (2023.4)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from patsy>=0.5.1->category_encoders) (1.16.0)\n",
"Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->category_encoders) (1.4.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->category_encoders) (3.5.0)\n",
"Collecting numpy>=1.14.0 (from category_encoders)\n",
" Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.2/18.2 MB\u001b[0m \u001b[31m33.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: packaging>=21.3 in /usr/local/lib/python3.10/dist-packages (from statsmodels>=0.9.0->category_encoders) (24.1)\n",
"Installing collected packages: numpy\n",
" Attempting uninstall: numpy\n",
" Found existing installation: numpy 1.22.0\n",
" Uninstalling numpy-1.22.0:\n",
" Successfully uninstalled numpy-1.22.0\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"cudf-cu12 24.4.1 requires pandas<2.2.2dev0,>=2.0, but you have pandas 1.5.3 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0mSuccessfully installed numpy-1.26.4\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"handle_unknown='ignore'"
],
"metadata": {
"id": "on3bGFY1re6z"
},
"id": "on3bGFY1re6z",
"execution_count": 58,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"# Sample data\n",
"data = pd.DataFrame({'City': ['New York', 'Los Angeles', 'San Francisco', 'Chicago', 'Houston']})\n",
"\n",
"# Define all possible categories\n",
"all_possible_categories = ['New York', 'Los Angeles', 'San Francisco', 'Chicago', 'Houston', 'Abu Dhabi']\n",
"\n",
"# Initialize the encoder\n",
"encoder = OneHotEncoder(categories=[all_possible_categories], handle_unknown='ignore')\n",
"\n",
"# Fit the encoder\n",
"encoder.fit(data[['City']])\n",
"\n",
"# Function to transform new data\n",
"def transform_new_data(new_data):\n",
" transformed_data = encoder.transform(new_data[['City']])\n",
" return transformed_data\n",
"\n",
"# Example of transforming new data\n",
"new_data = pd.DataFrame({'City': ['Abu Dhabi', 'New York']})\n",
"transformed_new_data = transform_new_data(new_data)\n",
"print(transformed_new_data.toarray())\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yi1uqXWUuNva",
"outputId": "3479abdd-83a6-428a-b8ec-ec0a134932a8"
},
"id": "yi1uqXWUuNva",
"execution_count": 59,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[0. 0. 0. 0. 0. 1.]\n",
" [1. 0. 0. 0. 0. 0.]]\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "99e08b54",
"metadata": {
"id": "99e08b54"
},
"outputs": [],
"source": [
"import pickle\n",
"pickle.dump(pipe,open('pipe.pkl','wb'))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
},
"colab": {
"provenance": []
}
},
"nbformat": 4,
"nbformat_minor": 5
}