diff --git "a/Sarcasm-Detection-NLP-master/Sarcasm_Detection_NLP.ipynb" "b/Sarcasm-Detection-NLP-master/Sarcasm_Detection_NLP.ipynb" new file mode 100644--- /dev/null +++ "b/Sarcasm-Detection-NLP-master/Sarcasm_Detection_NLP.ipynb" @@ -0,0 +1,1176 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "pp68FAQf9aMN" + }, + "source": [ + "# Sarcasm Detection\n", + " **Acknowledgement**\n", + "\n", + "Misra, Rishabh, and Prahal Arora. \"Sarcasm Detection using Hybrid Neural Network.\" arXiv preprint arXiv:1908.07414 (2019).\n", + "\n", + "**Required Files given in below link.**\n", + "\n", + "https://drive.google.com/drive/folders/1xUnF35naPGU63xwRDVGc-DkZ3M8V5mMk" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "S3Wj_mIZ8S3K" + }, + "source": [ + "## Install `Tensorflow2.0` " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 679 + }, + "colab_type": "code", + "id": "jW2Uk8otQvi8", + "outputId": "08f2d715-7e92-4ea5-d85c-2b97e0cf9f2e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting tensorflow==2.0.0\n", + " Using cached https://files.pythonhosted.org/packages/46/0f/7bd55361168bb32796b360ad15a25de6966c9c1beb58a8e30c01c8279862/tensorflow-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl\n", + "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (3.10.0)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (3.2.1)\n", + "Requirement already satisfied: tensorflow-estimator<2.1.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (2.0.1)\n", + "Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.0.8)\n", + "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.12.1)\n", + "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.18.4)\n", + "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.28.1)\n", + "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.34.2)\n", + "Requirement already satisfied: gast==0.2.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.2.2)\n", + "Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.2.0)\n", + "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.8.1)\n", + "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.12.0)\n", + "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.1.0)\n", + "Requirement already satisfied: tensorboard<2.1.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (2.0.2)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.1.0)\n", + "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.9.0)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.6.1->tensorflow==2.0.0) (46.1.3)\n", + "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.8->tensorflow==2.0.0) (2.10.0)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.0.1)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.2.1)\n", + "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.7.2)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (2.23.0)\n", + "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (0.4.1)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (0.2.8)\n", + "Requirement already satisfied: cachetools<3.2,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.1.1)\n", + "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (4.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (2020.4.5.1)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.24.3)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.0.4)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (2.9)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.3.0)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (0.4.8)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.1.0)\n", + "Installing collected packages: tensorflow\n", + "Successfully installed tensorflow-2.0.0\n" + ] + } + ], + "source": [ + "!!pip uninstall tensorflow\n", + "!pip install tensorflow==2.0.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "v9kv9tyJ77eF" + }, + "source": [ + "## Get Required Files from Drive" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "colab_type": "code", + "id": "D0O_n6OIEVyL", + "outputId": "2f8098b6-740a-4e8d-ac4e-597c8727791a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount(\"/content/drive/\", force_remount=True).\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive/')" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "0mgRpOvFMjKR" + }, + "outputs": [], + "source": [ + "#Set your project path \n", + "project_path = '/content/drive/My Drive/Colab Notebooks/NLP/'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "WXYwajPeQbRq" + }, + "source": [ + "#**## Reading and Exploring Data**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "vAk6BRUh8CqL" + }, + "source": [ + "## Read Data \"Sarcasm_Headlines_Dataset.json\". Explore the data and get some insights about the data.\n", + "Hint - As its in json format you need to use pandas.read_json function. Give paraemeter lines = True." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "StSLB-T8PuGr" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "\n", + "data = pd.read_json(os.path.join(project_path,'Sarcasm_Headlines_Dataset.json'),lines=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 415 + }, + "colab_type": "code", + "id": "m6LXx4qHqgWN", + "outputId": "02f65927-d795-43a0-c991-eb338634eb3c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
article_linkheadlineis_sarcastic
0https://www.huffingtonpost.com/entry/versace-b...former versace store clerk sues over secret 'b...0
1https://www.huffingtonpost.com/entry/roseanne-...the 'roseanne' revival catches up to our thorn...0
2https://local.theonion.com/mom-starting-to-fea...mom starting to fear son's web series closest ...1
3https://politics.theonion.com/boehner-just-wan...boehner just wants wife to listen, not come up...1
4https://www.huffingtonpost.com/entry/jk-rowlin...j.k. rowling wishes snape happy birthday in th...0
............
26704https://www.huffingtonpost.com/entry/american-...american politics in moral free-fall0
26705https://www.huffingtonpost.com/entry/americas-...america's best 20 hikes0
26706https://www.huffingtonpost.com/entry/reparatio...reparations and obama0
26707https://www.huffingtonpost.com/entry/israeli-b...israeli ban targeting boycott supporters raise...0
26708https://www.huffingtonpost.com/entry/gourmet-g...gourmet gifts for the foodie 20140
\n", + "

26709 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " article_link ... is_sarcastic\n", + "0 https://www.huffingtonpost.com/entry/versace-b... ... 0\n", + "1 https://www.huffingtonpost.com/entry/roseanne-... ... 0\n", + "2 https://local.theonion.com/mom-starting-to-fea... ... 1\n", + "3 https://politics.theonion.com/boehner-just-wan... ... 1\n", + "4 https://www.huffingtonpost.com/entry/jk-rowlin... ... 0\n", + "... ... ... ...\n", + "26704 https://www.huffingtonpost.com/entry/american-... ... 0\n", + "26705 https://www.huffingtonpost.com/entry/americas-... ... 0\n", + "26706 https://www.huffingtonpost.com/entry/reparatio... ... 0\n", + "26707 https://www.huffingtonpost.com/entry/israeli-b... ... 0\n", + "26708 https://www.huffingtonpost.com/entry/gourmet-g... ... 0\n", + "\n", + "[26709 rows x 3 columns]" + ] + }, + "execution_count": 5, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 311 + }, + "colab_type": "code", + "id": "wsNtC1fDqnH_", + "outputId": "5bc63360-23a8-4b3b-fac5-8d2a9c00cd87" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(26709, 3)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
is_sarcastic
count26709.000000
mean0.438953
std0.496269
min0.000000
25%0.000000
50%0.000000
75%1.000000
max1.000000
\n", + "
" + ], + "text/plain": [ + " is_sarcastic\n", + "count 26709.000000\n", + "mean 0.438953\n", + "std 0.496269\n", + "min 0.000000\n", + "25% 0.000000\n", + "50% 0.000000\n", + "75% 1.000000\n", + "max 1.000000" + ] + }, + "execution_count": 6, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "print (data.shape)\n", + "data.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "colab_type": "code", + "id": "tNpztCllqwJT", + "outputId": "c18539c0-ff8d-4ba6-86e1-a5d318c22fa6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "\"the 'roseanne' revival catches up to our thorny political mood, for better and worse\"" + ] + }, + "execution_count": 7, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data['headline'][1]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "colab_type": "code", + "id": "DxLSk1n9rFL2", + "outputId": "92fb9f7e-9738-49ee-fc6e-60bd28d4c79a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n" + ] + } + ], + "source": [ + "##The column headline needs to be cleaned up as we have special characters and numbers in the column\n", + "\n", + "import re\n", + "from nltk.corpus import stopwords\n", + "import nltk\n", + "import string\n", + "nltk.download('stopwords')\n", + "stopwords = set(stopwords.words('english'))\n", + "def cleanData(text):\n", + " text = re.sub(r'\\d+', '', text)\n", + " text = \"\".join([char for char in text if char not in string.punctuation])\n", + " return text\n", + "\n", + "data['headline']=data['headline'].apply(cleanData)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "colab_type": "code", + "id": "e_FfMsSr3W73", + "outputId": "5bcdc31d-d5c3-4ca0-a072-5540f57fe079" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'the roseanne revival catches up to our thorny political mood for better and worse'" + ] + }, + "execution_count": 9, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data['headline'][1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "z6pXf7A78E2H" + }, + "source": [ + "## Drop `article_link` from dataset.\n", + "As we only need headline text data and is_sarcastic column for this project. We can drop artical link column here." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "VLSVsvrlP9qD" + }, + "outputs": [], + "source": [ + "data.drop('article_link',inplace=True,axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "D0h6IOxU8OdH" + }, + "source": [ + "## Get the Length of each line and find the maximum length.\n", + "As different lines are of different length. We need to pad the our sequences using the max length." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "BRAsChZAQmr3" + }, + "outputs": [], + "source": [ + "maxlen = max([len(text) for text in data['headline']])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "VPPd0YuPXi2M" + }, + "source": [ + "#**## Modelling**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "35abKfRx8as3" + }, + "source": [ + "## Import required modules required for modelling." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "DVel73hYEV4r" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow.keras.preprocessing.text import Tokenizer\n", + "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", + "from tensorflow.keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, Flatten, Bidirectional, GlobalMaxPool1D\n", + "from tensorflow.keras.models import Model, Sequential" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "9ziybaD1RdD9" + }, + "source": [ + "# Set Different Parameters for the model." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "jPw9gAN_EV6m" + }, + "outputs": [], + "source": [ + "max_features = 10000\n", + "maxlen = max([len(text) for text in data['headline']])\n", + "embedding_size = 200" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "9abSe-bM8fn9" + }, + "source": [ + "## Apply Keras Tokenizer of headline column of your data.\n", + "Hint - First create a tokenizer instance using Tokenizer(num_words=max_features) \n", + "And then fit this tokenizer instance on your data column df['headline'] using .fit_on_texts()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "T9Ad26HfTFMS" + }, + "outputs": [], + "source": [ + "tokenizer = Tokenizer(num_words=max_features,filters='!\"#$%&()*+,-./:;<=>?@[\\\\]^_`{|}~\\t\\n',lower=True,split=' ', char_level=False)\n", + "tokenizer.fit_on_texts(data['headline'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "0Ffi63KsST3P" + }, + "source": [ + "# Define X and y for your model." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 381 + }, + "colab_type": "code", + "id": "wnjxBdqmSS4s", + "outputId": "50110aa7-1293-4bf9-bfdc-ee0d7a2a7675" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of Samples: 26709\n", + "[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 287 780 3505 2213 47 353 92 2111 5\n", + " 2476 8139]\n", + "Number of Labels: 26709\n", + "0\n" + ] + } + ], + "source": [ + "X = tokenizer.texts_to_sequences(data['headline'])\n", + "X = pad_sequences(X, maxlen = maxlen)\n", + "y = np.asarray(data['is_sarcastic'])\n", + "\n", + "print(\"Number of Samples:\", len(X))\n", + "print(X[0])\n", + "print(\"Number of Labels: \", len(y))\n", + "print(y[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "WJLyKg-98rH_" + }, + "source": [ + "## Get the Vocabulary size\n", + "You can use tokenizer.word_index." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "colab_type": "code", + "id": "q-2w0gHEUUIo", + "outputId": "3a2f9a93-fce6-4066-d3e7-ab84096b6e6a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "27667\n" + ] + } + ], + "source": [ + "num_words=len(tokenizer.word_index)\n", + "print (num_words)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "5hjeMi40XcB1" + }, + "source": [ + "#**## Word Embedding**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "bUF1TuQa8ux0" + }, + "source": [ + "## Get Glove Word Embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "vq5AIfRtMeZh" + }, + "outputs": [], + "source": [ + "glove_file = project_path + \"glove.6B.zip\"" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "DJLX_n2WMecA" + }, + "outputs": [], + "source": [ + "#Extract Glove embedding zip file\n", + "from zipfile import ZipFile\n", + "with ZipFile(glove_file, 'r') as z:\n", + " z.extractall()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "9IuXlu8-U3HG" + }, + "source": [ + "# Get the Word Embeddings using Embedding file as given below." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "elZ-T5aFGZmZ" + }, + "outputs": [], + "source": [ + "EMBEDDING_FILE = './glove.6B.200d.txt'\n", + "\n", + "embeddings = {}\n", + "for o in open(EMBEDDING_FILE):\n", + " word = o.split(\" \")[0]\n", + " # print(word)\n", + " embd = o.split(\" \")[1:]\n", + " embd = np.asarray(embd, dtype='float32')\n", + " # print(embd)\n", + " embeddings[word] = embd\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "bTPxveDmVCrA" + }, + "source": [ + "# Create a weight matrix for words in training docs" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 + }, + "colab_type": "code", + "id": "xQgOhiywU9nU", + "outputId": "3e43d444-0fec-4777-a46c-fc2a20530eb5" + }, + "outputs": [ + { + "ename": "IndexError", + "evalue": "ignored", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0membedding_vector\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0membeddings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0membedding_vector\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0membedding_matrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0membedding_vector\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membeddings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mIndexError\u001b[0m: index 27667 is out of bounds for axis 0 with size 27667" + ] + } + ], + "source": [ + "embedding_matrix = np.zeros((num_words, 200))\n", + "\n", + "for word, i in tokenizer.word_index.items():\n", + " embedding_vector = embeddings.get(word)\n", + " if embedding_vector is not None:\n", + " embedding_matrix[i] = embedding_vector\n", + "\n", + "len(embeddings.values())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "u7IbWuEX82Ra" + }, + "source": [ + "## Create and Compile your Model\n", + "Hint - Use Sequential model instance and then add Embedding layer, Bidirectional(LSTM) layer, then dense and dropout layers as required. \n", + "In the end add a final dense layer with sigmoid activation for binary classification.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "d7jhsSgYXG4l" + }, + "outputs": [], + "source": [ + "### Embedding layer for hint \n", + "## model.add(Embedding(num_words, embedding_size, weights = [embedding_matrix]))\n", + "### Bidirectional LSTM layer for hint \n", + "## model.add(Bidirectional(LSTM(128, return_sequences = True)))\n", + "import tensorflow as tf\n", + "\n", + "input_layer = Input(shape=(maxlen,),dtype=tf.int64)\n", + "embed = Embedding(embedding_matrix.shape[0],output_dim=200,weights=[embedding_matrix],input_length=maxlen, trainable=True)(input_layer)\n", + "lstm=Bidirectional(LSTM(128))(embed)\n", + "drop=Dropout(0.3)(lstm)\n", + "dense =Dense(100,activation='relu')(drop)\n", + "out=Dense(2,activation='softmax')(dense)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "IJFMxZwMWoTw" + }, + "source": [ + "# Fit your model with a batch size of 100 and validation_split = 0.2. and state the validation accuracy\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 364 + }, + "colab_type": "code", + "id": "ZpVkajCcWnRK", + "outputId": "6463c9ee-b14b-4792-e27a-9b950b0997a4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"model_1\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "input_2 (InputLayer) [(None, 240)] 0 \n", + "_________________________________________________________________\n", + "embedding_1 (Embedding) (None, 240, 200) 5533400 \n", + "_________________________________________________________________\n", + "bidirectional_1 (Bidirection (None, 256) 336896 \n", + "_________________________________________________________________\n", + "dropout_1 (Dropout) (None, 256) 0 \n", + "_________________________________________________________________\n", + "dense_2 (Dense) (None, 100) 25700 \n", + "_________________________________________________________________\n", + "dense_3 (Dense) (None, 2) 202 \n", + "=================================================================\n", + "Total params: 5,896,198\n", + "Trainable params: 5,896,198\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "batch_size = 100\n", + "epochs = 5\n", + "\n", + "model = Model(input_layer,out)\n", + "model.compile(loss='sparse_categorical_crossentropy',optimizer=\"adam\",metrics=['accuracy'])\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "colab_type": "code", + "id": "TNDkfYCWte4Q", + "outputId": "10c71378-2dcb-4ee3-fcdd-bdae376e9ca9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train on 21367 samples\n", + "Epoch 1/5\n", + "21367/21367 [==============================] - 460s 22ms/sample - loss: 0.4514 - accuracy: 0.7802\n", + "Epoch 2/5\n", + "21367/21367 [==============================] - 457s 21ms/sample - loss: 0.2630 - accuracy: 0.8925\n", + "Epoch 3/5\n", + "21367/21367 [==============================] - 459s 22ms/sample - loss: 0.1763 - accuracy: 0.9320\n", + "Epoch 4/5\n", + "21367/21367 [==============================] - 468s 22ms/sample - loss: 0.1181 - accuracy: 0.9570\n", + "Epoch 5/5\n", + "21367/21367 [==============================] - 468s 22ms/sample - loss: 0.0718 - accuracy: 0.9758\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 31, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)\n", + "\n", + "model.fit(X_train,y_train,batch_size=batch_size, epochs=epochs, verbose=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 55 + }, + "colab_type": "code", + "id": "kqvqFFu7tjqH", + "outputId": "d31ef4a5-9371-4b1f-b83d-f2095a6e23ba" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5342/1 [====================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================] - 54s 10ms/sample\n" + ] + } + ], + "source": [ + "test_pred = model.predict(np.array(X_test), verbose=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "YkgjPw-ctohS" + }, + "outputs": [], + "source": [ + "test_pred = [1 if j>i else 0 for i,j in test_pred]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "colab_type": "code", + "id": "683aglhutqwx", + "outputId": "da6dcbdf-89d9-4aba-c6ee-03548c3d889a" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[2651, 380],\n", + " [ 326, 1985]])" + ] + }, + "execution_count": 34, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "confusion_matrix(y_test, test_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 173 + }, + "colab_type": "code", + "id": "PNjr1BQBtxGe", + "outputId": "da88dbb7-9caf-41d2-d85b-ae3cc0c4fd18" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.89 0.87 0.88 3031\n", + " 1 0.84 0.86 0.85 2311\n", + "\n", + " accuracy 0.87 5342\n", + " macro avg 0.86 0.87 0.87 5342\n", + "weighted avg 0.87 0.87 0.87 5342\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.metrics import classification_report\n", + "\n", + "print(classification_report(y_test, test_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ssLxdHIGt6Nl" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "NLP_Project_Sarcasm_Detection_Kavitha.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}