diff --git "a/Sarcasm-Detection-NLP-master/Sarcasm_Detection_NLP.ipynb" "b/Sarcasm-Detection-NLP-master/Sarcasm_Detection_NLP.ipynb"
new file mode 100644--- /dev/null
+++ "b/Sarcasm-Detection-NLP-master/Sarcasm_Detection_NLP.ipynb"
@@ -0,0 +1,1176 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "pp68FAQf9aMN"
+ },
+ "source": [
+ "# Sarcasm Detection\n",
+ " **Acknowledgement**\n",
+ "\n",
+ "Misra, Rishabh, and Prahal Arora. \"Sarcasm Detection using Hybrid Neural Network.\" arXiv preprint arXiv:1908.07414 (2019).\n",
+ "\n",
+ "**Required Files given in below link.**\n",
+ "\n",
+ "https://drive.google.com/drive/folders/1xUnF35naPGU63xwRDVGc-DkZ3M8V5mMk"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "S3Wj_mIZ8S3K"
+ },
+ "source": [
+ "## Install `Tensorflow2.0` "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 679
+ },
+ "colab_type": "code",
+ "id": "jW2Uk8otQvi8",
+ "outputId": "08f2d715-7e92-4ea5-d85c-2b97e0cf9f2e"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Collecting tensorflow==2.0.0\n",
+ " Using cached https://files.pythonhosted.org/packages/46/0f/7bd55361168bb32796b360ad15a25de6966c9c1beb58a8e30c01c8279862/tensorflow-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl\n",
+ "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (3.10.0)\n",
+ "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (3.2.1)\n",
+ "Requirement already satisfied: tensorflow-estimator<2.1.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (2.0.1)\n",
+ "Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.0.8)\n",
+ "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.12.1)\n",
+ "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.18.4)\n",
+ "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.28.1)\n",
+ "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.34.2)\n",
+ "Requirement already satisfied: gast==0.2.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.2.2)\n",
+ "Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.2.0)\n",
+ "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.8.1)\n",
+ "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.12.0)\n",
+ "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.1.0)\n",
+ "Requirement already satisfied: tensorboard<2.1.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (2.0.2)\n",
+ "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (1.1.0)\n",
+ "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==2.0.0) (0.9.0)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.6.1->tensorflow==2.0.0) (46.1.3)\n",
+ "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.8->tensorflow==2.0.0) (2.10.0)\n",
+ "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.0.1)\n",
+ "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.2.1)\n",
+ "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.7.2)\n",
+ "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (2.23.0)\n",
+ "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (0.4.1)\n",
+ "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (0.2.8)\n",
+ "Requirement already satisfied: cachetools<3.2,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.1.1)\n",
+ "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (4.0)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (2020.4.5.1)\n",
+ "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.24.3)\n",
+ "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.0.4)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (2.9)\n",
+ "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (1.3.0)\n",
+ "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (0.4.8)\n",
+ "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow==2.0.0) (3.1.0)\n",
+ "Installing collected packages: tensorflow\n",
+ "Successfully installed tensorflow-2.0.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "!!pip uninstall tensorflow\n",
+ "!pip install tensorflow==2.0.0"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "v9kv9tyJ77eF"
+ },
+ "source": [
+ "## Get Required Files from Drive"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "colab_type": "code",
+ "id": "D0O_n6OIEVyL",
+ "outputId": "2f8098b6-740a-4e8d-ac4e-597c8727791a"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount(\"/content/drive/\", force_remount=True).\n"
+ ]
+ }
+ ],
+ "source": [
+ "from google.colab import drive\n",
+ "drive.mount('/content/drive/')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "0mgRpOvFMjKR"
+ },
+ "outputs": [],
+ "source": [
+ "#Set your project path \n",
+ "project_path = '/content/drive/My Drive/Colab Notebooks/NLP/'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "WXYwajPeQbRq"
+ },
+ "source": [
+ "#**## Reading and Exploring Data**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "vAk6BRUh8CqL"
+ },
+ "source": [
+ "## Read Data \"Sarcasm_Headlines_Dataset.json\". Explore the data and get some insights about the data.\n",
+ "Hint - As its in json format you need to use pandas.read_json function. Give paraemeter lines = True."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "StSLB-T8PuGr"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import os\n",
+ "\n",
+ "data = pd.read_json(os.path.join(project_path,'Sarcasm_Headlines_Dataset.json'),lines=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 415
+ },
+ "colab_type": "code",
+ "id": "m6LXx4qHqgWN",
+ "outputId": "02f65927-d795-43a0-c991-eb338634eb3c"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " article_link | \n",
+ " headline | \n",
+ " is_sarcastic | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " https://www.huffingtonpost.com/entry/versace-b... | \n",
+ " former versace store clerk sues over secret 'b... | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " https://www.huffingtonpost.com/entry/roseanne-... | \n",
+ " the 'roseanne' revival catches up to our thorn... | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " https://local.theonion.com/mom-starting-to-fea... | \n",
+ " mom starting to fear son's web series closest ... | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " https://politics.theonion.com/boehner-just-wan... | \n",
+ " boehner just wants wife to listen, not come up... | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " https://www.huffingtonpost.com/entry/jk-rowlin... | \n",
+ " j.k. rowling wishes snape happy birthday in th... | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 26704 | \n",
+ " https://www.huffingtonpost.com/entry/american-... | \n",
+ " american politics in moral free-fall | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 26705 | \n",
+ " https://www.huffingtonpost.com/entry/americas-... | \n",
+ " america's best 20 hikes | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 26706 | \n",
+ " https://www.huffingtonpost.com/entry/reparatio... | \n",
+ " reparations and obama | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 26707 | \n",
+ " https://www.huffingtonpost.com/entry/israeli-b... | \n",
+ " israeli ban targeting boycott supporters raise... | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 26708 | \n",
+ " https://www.huffingtonpost.com/entry/gourmet-g... | \n",
+ " gourmet gifts for the foodie 2014 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
26709 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " article_link ... is_sarcastic\n",
+ "0 https://www.huffingtonpost.com/entry/versace-b... ... 0\n",
+ "1 https://www.huffingtonpost.com/entry/roseanne-... ... 0\n",
+ "2 https://local.theonion.com/mom-starting-to-fea... ... 1\n",
+ "3 https://politics.theonion.com/boehner-just-wan... ... 1\n",
+ "4 https://www.huffingtonpost.com/entry/jk-rowlin... ... 0\n",
+ "... ... ... ...\n",
+ "26704 https://www.huffingtonpost.com/entry/american-... ... 0\n",
+ "26705 https://www.huffingtonpost.com/entry/americas-... ... 0\n",
+ "26706 https://www.huffingtonpost.com/entry/reparatio... ... 0\n",
+ "26707 https://www.huffingtonpost.com/entry/israeli-b... ... 0\n",
+ "26708 https://www.huffingtonpost.com/entry/gourmet-g... ... 0\n",
+ "\n",
+ "[26709 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 311
+ },
+ "colab_type": "code",
+ "id": "wsNtC1fDqnH_",
+ "outputId": "5bc63360-23a8-4b3b-fac5-8d2a9c00cd87"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(26709, 3)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " is_sarcastic | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 26709.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 0.438953 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 0.496269 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " is_sarcastic\n",
+ "count 26709.000000\n",
+ "mean 0.438953\n",
+ "std 0.496269\n",
+ "min 0.000000\n",
+ "25% 0.000000\n",
+ "50% 0.000000\n",
+ "75% 1.000000\n",
+ "max 1.000000"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "print (data.shape)\n",
+ "data.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "colab_type": "code",
+ "id": "tNpztCllqwJT",
+ "outputId": "c18539c0-ff8d-4ba6-86e1-a5d318c22fa6"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\"the 'roseanne' revival catches up to our thorny political mood, for better and worse\""
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data['headline'][1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 52
+ },
+ "colab_type": "code",
+ "id": "DxLSk1n9rFL2",
+ "outputId": "92fb9f7e-9738-49ee-fc6e-60bd28d4c79a"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+ "[nltk_data] Package stopwords is already up-to-date!\n"
+ ]
+ }
+ ],
+ "source": [
+ "##The column headline needs to be cleaned up as we have special characters and numbers in the column\n",
+ "\n",
+ "import re\n",
+ "from nltk.corpus import stopwords\n",
+ "import nltk\n",
+ "import string\n",
+ "nltk.download('stopwords')\n",
+ "stopwords = set(stopwords.words('english'))\n",
+ "def cleanData(text):\n",
+ " text = re.sub(r'\\d+', '', text)\n",
+ " text = \"\".join([char for char in text if char not in string.punctuation])\n",
+ " return text\n",
+ "\n",
+ "data['headline']=data['headline'].apply(cleanData)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "colab_type": "code",
+ "id": "e_FfMsSr3W73",
+ "outputId": "5bcdc31d-d5c3-4ca0-a072-5540f57fe079"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'the roseanne revival catches up to our thorny political mood for better and worse'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data['headline'][1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "z6pXf7A78E2H"
+ },
+ "source": [
+ "## Drop `article_link` from dataset.\n",
+ "As we only need headline text data and is_sarcastic column for this project. We can drop artical link column here."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "VLSVsvrlP9qD"
+ },
+ "outputs": [],
+ "source": [
+ "data.drop('article_link',inplace=True,axis=1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "D0h6IOxU8OdH"
+ },
+ "source": [
+ "## Get the Length of each line and find the maximum length.\n",
+ "As different lines are of different length. We need to pad the our sequences using the max length."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "BRAsChZAQmr3"
+ },
+ "outputs": [],
+ "source": [
+ "maxlen = max([len(text) for text in data['headline']])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "VPPd0YuPXi2M"
+ },
+ "source": [
+ "#**## Modelling**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "35abKfRx8as3"
+ },
+ "source": [
+ "## Import required modules required for modelling."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "DVel73hYEV4r"
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import tensorflow as tf\n",
+ "from tensorflow.keras.preprocessing.text import Tokenizer\n",
+ "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
+ "from tensorflow.keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, Flatten, Bidirectional, GlobalMaxPool1D\n",
+ "from tensorflow.keras.models import Model, Sequential"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "9ziybaD1RdD9"
+ },
+ "source": [
+ "# Set Different Parameters for the model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "jPw9gAN_EV6m"
+ },
+ "outputs": [],
+ "source": [
+ "max_features = 10000\n",
+ "maxlen = max([len(text) for text in data['headline']])\n",
+ "embedding_size = 200"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "9abSe-bM8fn9"
+ },
+ "source": [
+ "## Apply Keras Tokenizer of headline column of your data.\n",
+ "Hint - First create a tokenizer instance using Tokenizer(num_words=max_features) \n",
+ "And then fit this tokenizer instance on your data column df['headline'] using .fit_on_texts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "T9Ad26HfTFMS"
+ },
+ "outputs": [],
+ "source": [
+ "tokenizer = Tokenizer(num_words=max_features,filters='!\"#$%&()*+,-./:;<=>?@[\\\\]^_`{|}~\\t\\n',lower=True,split=' ', char_level=False)\n",
+ "tokenizer.fit_on_texts(data['headline'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "0Ffi63KsST3P"
+ },
+ "source": [
+ "# Define X and y for your model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 381
+ },
+ "colab_type": "code",
+ "id": "wnjxBdqmSS4s",
+ "outputId": "50110aa7-1293-4bf9-bfdc-ee0d7a2a7675"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of Samples: 26709\n",
+ "[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 287 780 3505 2213 47 353 92 2111 5\n",
+ " 2476 8139]\n",
+ "Number of Labels: 26709\n",
+ "0\n"
+ ]
+ }
+ ],
+ "source": [
+ "X = tokenizer.texts_to_sequences(data['headline'])\n",
+ "X = pad_sequences(X, maxlen = maxlen)\n",
+ "y = np.asarray(data['is_sarcastic'])\n",
+ "\n",
+ "print(\"Number of Samples:\", len(X))\n",
+ "print(X[0])\n",
+ "print(\"Number of Labels: \", len(y))\n",
+ "print(y[0])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "WJLyKg-98rH_"
+ },
+ "source": [
+ "## Get the Vocabulary size\n",
+ "You can use tokenizer.word_index."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "colab_type": "code",
+ "id": "q-2w0gHEUUIo",
+ "outputId": "3a2f9a93-fce6-4066-d3e7-ab84096b6e6a"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "27667\n"
+ ]
+ }
+ ],
+ "source": [
+ "num_words=len(tokenizer.word_index)\n",
+ "print (num_words)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "5hjeMi40XcB1"
+ },
+ "source": [
+ "#**## Word Embedding**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "bUF1TuQa8ux0"
+ },
+ "source": [
+ "## Get Glove Word Embeddings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "vq5AIfRtMeZh"
+ },
+ "outputs": [],
+ "source": [
+ "glove_file = project_path + \"glove.6B.zip\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "DJLX_n2WMecA"
+ },
+ "outputs": [],
+ "source": [
+ "#Extract Glove embedding zip file\n",
+ "from zipfile import ZipFile\n",
+ "with ZipFile(glove_file, 'r') as z:\n",
+ " z.extractall()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "9IuXlu8-U3HG"
+ },
+ "source": [
+ "# Get the Word Embeddings using Embedding file as given below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "elZ-T5aFGZmZ"
+ },
+ "outputs": [],
+ "source": [
+ "EMBEDDING_FILE = './glove.6B.200d.txt'\n",
+ "\n",
+ "embeddings = {}\n",
+ "for o in open(EMBEDDING_FILE):\n",
+ " word = o.split(\" \")[0]\n",
+ " # print(word)\n",
+ " embd = o.split(\" \")[1:]\n",
+ " embd = np.asarray(embd, dtype='float32')\n",
+ " # print(embd)\n",
+ " embeddings[word] = embd\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "bTPxveDmVCrA"
+ },
+ "source": [
+ "# Create a weight matrix for words in training docs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 235
+ },
+ "colab_type": "code",
+ "id": "xQgOhiywU9nU",
+ "outputId": "3e43d444-0fec-4777-a46c-fc2a20530eb5"
+ },
+ "outputs": [
+ {
+ "ename": "IndexError",
+ "evalue": "ignored",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0membedding_vector\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0membeddings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0membedding_vector\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0membedding_matrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0membedding_vector\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membeddings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mIndexError\u001b[0m: index 27667 is out of bounds for axis 0 with size 27667"
+ ]
+ }
+ ],
+ "source": [
+ "embedding_matrix = np.zeros((num_words, 200))\n",
+ "\n",
+ "for word, i in tokenizer.word_index.items():\n",
+ " embedding_vector = embeddings.get(word)\n",
+ " if embedding_vector is not None:\n",
+ " embedding_matrix[i] = embedding_vector\n",
+ "\n",
+ "len(embeddings.values())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "u7IbWuEX82Ra"
+ },
+ "source": [
+ "## Create and Compile your Model\n",
+ "Hint - Use Sequential model instance and then add Embedding layer, Bidirectional(LSTM) layer, then dense and dropout layers as required. \n",
+ "In the end add a final dense layer with sigmoid activation for binary classification.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "d7jhsSgYXG4l"
+ },
+ "outputs": [],
+ "source": [
+ "### Embedding layer for hint \n",
+ "## model.add(Embedding(num_words, embedding_size, weights = [embedding_matrix]))\n",
+ "### Bidirectional LSTM layer for hint \n",
+ "## model.add(Bidirectional(LSTM(128, return_sequences = True)))\n",
+ "import tensorflow as tf\n",
+ "\n",
+ "input_layer = Input(shape=(maxlen,),dtype=tf.int64)\n",
+ "embed = Embedding(embedding_matrix.shape[0],output_dim=200,weights=[embedding_matrix],input_length=maxlen, trainable=True)(input_layer)\n",
+ "lstm=Bidirectional(LSTM(128))(embed)\n",
+ "drop=Dropout(0.3)(lstm)\n",
+ "dense =Dense(100,activation='relu')(drop)\n",
+ "out=Dense(2,activation='softmax')(dense)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "colab_type": "text",
+ "id": "IJFMxZwMWoTw"
+ },
+ "source": [
+ "# Fit your model with a batch size of 100 and validation_split = 0.2. and state the validation accuracy\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 364
+ },
+ "colab_type": "code",
+ "id": "ZpVkajCcWnRK",
+ "outputId": "6463c9ee-b14b-4792-e27a-9b950b0997a4"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model: \"model_1\"\n",
+ "_________________________________________________________________\n",
+ "Layer (type) Output Shape Param # \n",
+ "=================================================================\n",
+ "input_2 (InputLayer) [(None, 240)] 0 \n",
+ "_________________________________________________________________\n",
+ "embedding_1 (Embedding) (None, 240, 200) 5533400 \n",
+ "_________________________________________________________________\n",
+ "bidirectional_1 (Bidirection (None, 256) 336896 \n",
+ "_________________________________________________________________\n",
+ "dropout_1 (Dropout) (None, 256) 0 \n",
+ "_________________________________________________________________\n",
+ "dense_2 (Dense) (None, 100) 25700 \n",
+ "_________________________________________________________________\n",
+ "dense_3 (Dense) (None, 2) 202 \n",
+ "=================================================================\n",
+ "Total params: 5,896,198\n",
+ "Trainable params: 5,896,198\n",
+ "Non-trainable params: 0\n",
+ "_________________________________________________________________\n"
+ ]
+ }
+ ],
+ "source": [
+ "batch_size = 100\n",
+ "epochs = 5\n",
+ "\n",
+ "model = Model(input_layer,out)\n",
+ "model.compile(loss='sparse_categorical_crossentropy',optimizer=\"adam\",metrics=['accuracy'])\n",
+ "model.summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 225
+ },
+ "colab_type": "code",
+ "id": "TNDkfYCWte4Q",
+ "outputId": "10c71378-2dcb-4ee3-fcdd-bdae376e9ca9"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Train on 21367 samples\n",
+ "Epoch 1/5\n",
+ "21367/21367 [==============================] - 460s 22ms/sample - loss: 0.4514 - accuracy: 0.7802\n",
+ "Epoch 2/5\n",
+ "21367/21367 [==============================] - 457s 21ms/sample - loss: 0.2630 - accuracy: 0.8925\n",
+ "Epoch 3/5\n",
+ "21367/21367 [==============================] - 459s 22ms/sample - loss: 0.1763 - accuracy: 0.9320\n",
+ "Epoch 4/5\n",
+ "21367/21367 [==============================] - 468s 22ms/sample - loss: 0.1181 - accuracy: 0.9570\n",
+ "Epoch 5/5\n",
+ "21367/21367 [==============================] - 468s 22ms/sample - loss: 0.0718 - accuracy: 0.9758\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)\n",
+ "\n",
+ "model.fit(X_train,y_train,batch_size=batch_size, epochs=epochs, verbose=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 55
+ },
+ "colab_type": "code",
+ "id": "kqvqFFu7tjqH",
+ "outputId": "d31ef4a5-9371-4b1f-b83d-f2095a6e23ba"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ "5342/1 [====================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================] - 54s 10ms/sample\n"
+ ]
+ }
+ ],
+ "source": [
+ "test_pred = model.predict(np.array(X_test), verbose=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "YkgjPw-ctohS"
+ },
+ "outputs": [],
+ "source": [
+ "test_pred = [1 if j>i else 0 for i,j in test_pred]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 52
+ },
+ "colab_type": "code",
+ "id": "683aglhutqwx",
+ "outputId": "da6dcbdf-89d9-4aba-c6ee-03548c3d889a"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[2651, 380],\n",
+ " [ 326, 1985]])"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.metrics import confusion_matrix\n",
+ "confusion_matrix(y_test, test_pred)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 173
+ },
+ "colab_type": "code",
+ "id": "PNjr1BQBtxGe",
+ "outputId": "da88dbb7-9caf-41d2-d85b-ae3cc0c4fd18"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.89 0.87 0.88 3031\n",
+ " 1 0.84 0.86 0.85 2311\n",
+ "\n",
+ " accuracy 0.87 5342\n",
+ " macro avg 0.86 0.87 0.87 5342\n",
+ "weighted avg 0.87 0.87 0.87 5342\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.metrics import classification_report\n",
+ "\n",
+ "print(classification_report(y_test, test_pred))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 0,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "ssLxdHIGt6Nl"
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "collapsed_sections": [],
+ "name": "NLP_Project_Sarcasm_Detection_Kavitha.ipynb",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}