{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "pp68FAQf9aMN" }, "source": [ "# Sarcasm Detection\n", " **Acknowledgement**\n", "\n", "Misra, Rishabh, and Prahal Arora. \"Sarcasm Detection using Hybrid Neural Network.\" arXiv preprint arXiv:1908.07414 (2019).\n", "\n", "**Required Files given in below link.**\n", "\n", "https://drive.google.com/drive/folders/1xUnF35naPGU63xwRDVGc-DkZ3M8V5mMk" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "S3Wj_mIZ8S3K" }, "source": [ "## Install `Tensorflow2.0` " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 679 }, "colab_type": "code", "id": "jW2Uk8otQvi8", "outputId": "08f2d715-7e92-4ea5-d85c-2b97e0cf9f2e" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting tensorflow==2.0.0\n", " Using cached https://files.pythonhosted.org/packages/46/0f/7bd55361168bb32796b360ad15a25de6966c9c1beb58a8e30c01c8279862/tensorflow-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl\n", "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (3.10.0)\n", "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (3.2.1)\n", "Requirement already satisfied: tensorflow-estimator<2.1.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (2.0.1)\n", "Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.0.8)\n", "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.12.1)\n", "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.18.4)\n", "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.28.1)\n", "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.34.2)\n", "Requirement already satisfied: gast==0.2.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.2.2)\n", "Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.2.0)\n", "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.8.1)\n", "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.12.0)\n", "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.1.0)\n", "Requirement already satisfied: tensorboard<2.1.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (2.0.2)\n", "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.1.0)\n", "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.9.0)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.6.1->tensorflow==2.0.0) (46.1.3)\n", "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.8->tensorflow==2.0.0) (2.10.0)\n", "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.0.1)\n", "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.2.1)\n", "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.7.2)\n", "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (2.23.0)\n", "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (0.4.1)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (0.2.8)\n", "Requirement already satisfied: cachetools<3.2,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.1.1)\n", "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (4.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (2020.4.5.1)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.24.3)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.0.4)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (2.9)\n", "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.3.0)\n", "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (0.4.8)\n", "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.1.0)\n", "Installing collected packages: tensorflow\n", "Successfully installed tensorflow-2.0.0\n" ] } ], "source": [ "!!pip uninstall tensorflow\n", "!pip install tensorflow==2.0.0" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "v9kv9tyJ77eF" }, "source": [ "## Get Required Files from Drive" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "D0O_n6OIEVyL", "outputId": "2f8098b6-740a-4e8d-ac4e-597c8727791a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount(\"/content/drive/\", force_remount=True).\n" ] } ], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive/')" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "0mgRpOvFMjKR" }, "outputs": [], "source": [ "#Set your project path \n", "project_path = '/content/drive/My Drive/Colab Notebooks/NLP/'" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "WXYwajPeQbRq" }, "source": [ "#**## Reading and Exploring Data**" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "vAk6BRUh8CqL" }, "source": [ "## Read Data \"Sarcasm_Headlines_Dataset.json\". Explore the data and get some insights about the data.\n", "Hint - As its in json format you need to use pandas.read_json function. Give paraemeter lines = True." ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "StSLB-T8PuGr" }, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "\n", "data = pd.read_json(os.path.join(project_path,'Sarcasm_Headlines_Dataset.json'),lines=True)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 415 }, "colab_type": "code", "id": "m6LXx4qHqgWN", "outputId": "02f65927-d795-43a0-c991-eb338634eb3c" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
article_linkheadlineis_sarcastic
0https://www.huffingtonpost.com/entry/versace-b...former versace store clerk sues over secret 'b...0
1https://www.huffingtonpost.com/entry/roseanne-...the 'roseanne' revival catches up to our thorn...0
2https://local.theonion.com/mom-starting-to-fea...mom starting to fear son's web series closest ...1
3https://politics.theonion.com/boehner-just-wan...boehner just wants wife to listen, not come up...1
4https://www.huffingtonpost.com/entry/jk-rowlin...j.k. rowling wishes snape happy birthday in th...0
............
26704https://www.huffingtonpost.com/entry/american-...american politics in moral free-fall0
26705https://www.huffingtonpost.com/entry/americas-...america's best 20 hikes0
26706https://www.huffingtonpost.com/entry/reparatio...reparations and obama0
26707https://www.huffingtonpost.com/entry/israeli-b...israeli ban targeting boycott supporters raise...0
26708https://www.huffingtonpost.com/entry/gourmet-g...gourmet gifts for the foodie 20140
\n", "

26709 rows × 3 columns

\n", "
" ], "text/plain": [ " article_link ... is_sarcastic\n", "0 https://www.huffingtonpost.com/entry/versace-b... ... 0\n", "1 https://www.huffingtonpost.com/entry/roseanne-... ... 0\n", "2 https://local.theonion.com/mom-starting-to-fea... ... 1\n", "3 https://politics.theonion.com/boehner-just-wan... ... 1\n", "4 https://www.huffingtonpost.com/entry/jk-rowlin... ... 0\n", "... ... ... ...\n", "26704 https://www.huffingtonpost.com/entry/american-... ... 0\n", "26705 https://www.huffingtonpost.com/entry/americas-... ... 0\n", "26706 https://www.huffingtonpost.com/entry/reparatio... ... 0\n", "26707 https://www.huffingtonpost.com/entry/israeli-b... ... 0\n", "26708 https://www.huffingtonpost.com/entry/gourmet-g... ... 0\n", "\n", "[26709 rows x 3 columns]" ] }, "execution_count": 5, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 311 }, "colab_type": "code", "id": "wsNtC1fDqnH_", "outputId": "5bc63360-23a8-4b3b-fac5-8d2a9c00cd87" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(26709, 3)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
is_sarcastic
count26709.000000
mean0.438953
std0.496269
min0.000000
25%0.000000
50%0.000000
75%1.000000
max1.000000
\n", "
" ], "text/plain": [ " is_sarcastic\n", "count 26709.000000\n", "mean 0.438953\n", "std 0.496269\n", "min 0.000000\n", "25% 0.000000\n", "50% 0.000000\n", "75% 1.000000\n", "max 1.000000" ] }, "execution_count": 6, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "print (data.shape)\n", "data.describe()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "tNpztCllqwJT", "outputId": "c18539c0-ff8d-4ba6-86e1-a5d318c22fa6" }, "outputs": [ { "data": { "text/plain": [ "\"the 'roseanne' revival catches up to our thorny political mood, for better and worse\"" ] }, "execution_count": 7, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "data['headline'][1]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 52 }, "colab_type": "code", "id": "DxLSk1n9rFL2", "outputId": "92fb9f7e-9738-49ee-fc6e-60bd28d4c79a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n" ] } ], "source": [ "##The column headline needs to be cleaned up as we have special characters and numbers in the column\n", "\n", "import re\n", "from nltk.corpus import stopwords\n", "import nltk\n", "import string\n", "nltk.download('stopwords')\n", "stopwords = set(stopwords.words('english'))\n", "def cleanData(text):\n", " text = re.sub(r'\\d+', '', text)\n", " text = \"\".join([char for char in text if char not in string.punctuation])\n", " return text\n", "\n", "data['headline']=data['headline'].apply(cleanData)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "e_FfMsSr3W73", "outputId": "5bcdc31d-d5c3-4ca0-a072-5540f57fe079" }, "outputs": [ { "data": { "text/plain": [ "'the roseanne revival catches up to our thorny political mood for better and worse'" ] }, "execution_count": 9, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "data['headline'][1]" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "z6pXf7A78E2H" }, "source": [ "## Drop `article_link` from dataset.\n", "As we only need headline text data and is_sarcastic column for this project. We can drop artical link column here." ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "VLSVsvrlP9qD" }, "outputs": [], "source": [ "data.drop('article_link',inplace=True,axis=1)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "D0h6IOxU8OdH" }, "source": [ "## Get the Length of each line and find the maximum length.\n", "As different lines are of different length. We need to pad the our sequences using the max length." ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "BRAsChZAQmr3" }, "outputs": [], "source": [ "maxlen = max([len(text) for text in data['headline']])" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "VPPd0YuPXi2M" }, "source": [ "#**## Modelling**" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "35abKfRx8as3" }, "source": [ "## Import required modules required for modelling." ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "DVel73hYEV4r" }, "outputs": [], "source": [ "import numpy as np\n", "import tensorflow as tf\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "from tensorflow.keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, Flatten, Bidirectional, GlobalMaxPool1D\n", "from tensorflow.keras.models import Model, Sequential" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "9ziybaD1RdD9" }, "source": [ "# Set Different Parameters for the model." ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "jPw9gAN_EV6m" }, "outputs": [], "source": [ "max_features = 10000\n", "maxlen = max([len(text) for text in data['headline']])\n", "embedding_size = 200" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "9abSe-bM8fn9" }, "source": [ "## Apply Keras Tokenizer of headline column of your data.\n", "Hint - First create a tokenizer instance using Tokenizer(num_words=max_features) \n", "And then fit this tokenizer instance on your data column df['headline'] using .fit_on_texts()" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "T9Ad26HfTFMS" }, "outputs": [], "source": [ "tokenizer = Tokenizer(num_words=max_features,filters='!\"#$%&()*+,-./:;<=>?@[\\\\]^_`{|}~\\t\\n',lower=True,split=' ', char_level=False)\n", "tokenizer.fit_on_texts(data['headline'])" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "0Ffi63KsST3P" }, "source": [ "# Define X and y for your model." ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 381 }, "colab_type": "code", "id": "wnjxBdqmSS4s", "outputId": "50110aa7-1293-4bf9-bfdc-ee0d7a2a7675" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of Samples: 26709\n", "[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 287 780 3505 2213 47 353 92 2111 5\n", " 2476 8139]\n", "Number of Labels: 26709\n", "0\n" ] } ], "source": [ "X = tokenizer.texts_to_sequences(data['headline'])\n", "X = pad_sequences(X, maxlen = maxlen)\n", "y = np.asarray(data['is_sarcastic'])\n", "\n", "print(\"Number of Samples:\", len(X))\n", "print(X[0])\n", "print(\"Number of Labels: \", len(y))\n", "print(y[0])" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "WJLyKg-98rH_" }, "source": [ "## Get the Vocabulary size\n", "You can use tokenizer.word_index." ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "q-2w0gHEUUIo", "outputId": "3a2f9a93-fce6-4066-d3e7-ab84096b6e6a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "27667\n" ] } ], "source": [ "num_words=len(tokenizer.word_index)\n", "print (num_words)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "5hjeMi40XcB1" }, "source": [ "#**## Word Embedding**" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "bUF1TuQa8ux0" }, "source": [ "## Get Glove Word Embeddings" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "vq5AIfRtMeZh" }, "outputs": [], "source": [ "glove_file = project_path + \"glove.6B.zip\"" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "DJLX_n2WMecA" }, "outputs": [], "source": [ "#Extract Glove embedding zip file\n", "from zipfile import ZipFile\n", "with ZipFile(glove_file, 'r') as z:\n", " z.extractall()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "9IuXlu8-U3HG" }, "source": [ "# Get the Word Embeddings using Embedding file as given below." ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "elZ-T5aFGZmZ" }, "outputs": [], "source": [ "EMBEDDING_FILE = './glove.6B.200d.txt'\n", "\n", "embeddings = {}\n", "for o in open(EMBEDDING_FILE):\n", " word = o.split(\" \")[0]\n", " # print(word)\n", " embd = o.split(\" \")[1:]\n", " embd = np.asarray(embd, dtype='float32')\n", " # print(embd)\n", " embeddings[word] = embd\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "bTPxveDmVCrA" }, "source": [ "# Create a weight matrix for words in training docs" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 235 }, "colab_type": "code", "id": "xQgOhiywU9nU", "outputId": "3e43d444-0fec-4777-a46c-fc2a20530eb5" }, "outputs": [ { "ename": "IndexError", "evalue": "ignored", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0membedding_vector\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0membeddings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0membedding_vector\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0membedding_matrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0membedding_vector\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membeddings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mIndexError\u001b[0m: index 27667 is out of bounds for axis 0 with size 27667" ] } ], "source": [ "embedding_matrix = np.zeros((num_words, 200))\n", "\n", "for word, i in tokenizer.word_index.items():\n", " embedding_vector = embeddings.get(word)\n", " if embedding_vector is not None:\n", " embedding_matrix[i] = embedding_vector\n", "\n", "len(embeddings.values())" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "u7IbWuEX82Ra" }, "source": [ "## Create and Compile your Model\n", "Hint - Use Sequential model instance and then add Embedding layer, Bidirectional(LSTM) layer, then dense and dropout layers as required. \n", "In the end add a final dense layer with sigmoid activation for binary classification.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "d7jhsSgYXG4l" }, "outputs": [], "source": [ "### Embedding layer for hint \n", "## model.add(Embedding(num_words, embedding_size, weights = [embedding_matrix]))\n", "### Bidirectional LSTM layer for hint \n", "## model.add(Bidirectional(LSTM(128, return_sequences = True)))\n", "import tensorflow as tf\n", "\n", "input_layer = Input(shape=(maxlen,),dtype=tf.int64)\n", "embed = Embedding(embedding_matrix.shape[0],output_dim=200,weights=[embedding_matrix],input_length=maxlen, trainable=True)(input_layer)\n", "lstm=Bidirectional(LSTM(128))(embed)\n", "drop=Dropout(0.3)(lstm)\n", "dense =Dense(100,activation='relu')(drop)\n", "out=Dense(2,activation='softmax')(dense)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "IJFMxZwMWoTw" }, "source": [ "# Fit your model with a batch size of 100 and validation_split = 0.2. and state the validation accuracy\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 364 }, "colab_type": "code", "id": "ZpVkajCcWnRK", "outputId": "6463c9ee-b14b-4792-e27a-9b950b0997a4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"model_1\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "input_2 (InputLayer) [(None, 240)] 0 \n", "_________________________________________________________________\n", "embedding_1 (Embedding) (None, 240, 200) 5533400 \n", "_________________________________________________________________\n", "bidirectional_1 (Bidirection (None, 256) 336896 \n", "_________________________________________________________________\n", "dropout_1 (Dropout) (None, 256) 0 \n", "_________________________________________________________________\n", "dense_2 (Dense) (None, 100) 25700 \n", "_________________________________________________________________\n", "dense_3 (Dense) (None, 2) 202 \n", "=================================================================\n", "Total params: 5,896,198\n", "Trainable params: 5,896,198\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "batch_size = 100\n", "epochs = 5\n", "\n", "model = Model(input_layer,out)\n", "model.compile(loss='sparse_categorical_crossentropy',optimizer=\"adam\",metrics=['accuracy'])\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 225 }, "colab_type": "code", "id": "TNDkfYCWte4Q", "outputId": "10c71378-2dcb-4ee3-fcdd-bdae376e9ca9" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train on 21367 samples\n", "Epoch 1/5\n", "21367/21367 [==============================] - 460s 22ms/sample - loss: 0.4514 - accuracy: 0.7802\n", "Epoch 2/5\n", "21367/21367 [==============================] - 457s 21ms/sample - loss: 0.2630 - accuracy: 0.8925\n", "Epoch 3/5\n", "21367/21367 [==============================] - 459s 22ms/sample - loss: 0.1763 - accuracy: 0.9320\n", "Epoch 4/5\n", "21367/21367 [==============================] - 468s 22ms/sample - loss: 0.1181 - accuracy: 0.9570\n", "Epoch 5/5\n", "21367/21367 [==============================] - 468s 22ms/sample - loss: 0.0718 - accuracy: 0.9758\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 31, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)\n", "\n", "model.fit(X_train,y_train,batch_size=batch_size, epochs=epochs, verbose=1)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 55 }, "colab_type": "code", "id": "kqvqFFu7tjqH", "outputId": "d31ef4a5-9371-4b1f-b83d-f2095a6e23ba" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r", "5342/1 [====================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================] - 54s 10ms/sample\n" ] } ], "source": [ "test_pred = model.predict(np.array(X_test), verbose=1)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "YkgjPw-ctohS" }, "outputs": [], "source": [ "test_pred = [1 if j>i else 0 for i,j in test_pred]" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 52 }, "colab_type": "code", "id": "683aglhutqwx", "outputId": "da6dcbdf-89d9-4aba-c6ee-03548c3d889a" }, "outputs": [ { "data": { "text/plain": [ "array([[2651, 380],\n", " [ 326, 1985]])" ] }, "execution_count": 34, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "from sklearn.metrics import confusion_matrix\n", "confusion_matrix(y_test, test_pred)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 173 }, "colab_type": "code", "id": "PNjr1BQBtxGe", "outputId": "da88dbb7-9caf-41d2-d85b-ae3cc0c4fd18" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.89 0.87 0.88 3031\n", " 1 0.84 0.86 0.85 2311\n", "\n", " accuracy 0.87 5342\n", " macro avg 0.86 0.87 0.87 5342\n", "weighted avg 0.87 0.87 0.87 5342\n", "\n" ] } ], "source": [ "from sklearn.metrics import classification_report\n", "\n", "print(classification_report(y_test, test_pred))" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "ssLxdHIGt6Nl" }, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [], "name": "NLP_Project_Sarcasm_Detection_Kavitha.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 1 }