submission-attempt

Sleeping

App Files Files Community

laureBe commited on Jan 29

Commit

48bd5aa

verified ·

1 Parent(s): 0ae53cb

Upload notebooks_submitted-text.ipynb

Browse files

Files changed (1) hide show

notebooks/notebooks_submitted-text.ipynb +945 -0

notebooks/notebooks_submitted-text.ipynb ADDED Viewed

	@@ -0,0 +1,945 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Text task notebook template\n",
+    "## Loading the necessary libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-01-29 12:18:59.954133: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'quote': 'Interesting to note that Oklahoma minimum temperatures in 2011 were in the bottom ten, including the coldest Oklahoma temperature ever recorded, -31F on February 10, 2011.', 'label': '0_not_relevant', 'source': 'FLICC', 'url': 'https://huggingface.co/datasets/fzanartu/FLICCdataset', 'language': 'en', 'subsource': 'CARDS', 'id': None, '__index_level_0__': 1109}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "DatasetDict({\n",
+       "    train: Dataset({\n",
+       "        features: ['quote', 'label', 'source', 'url', 'language', 'subsource', 'id', '__index_level_0__'],\n",
+       "        num_rows: 4872\n",
+       "    })\n",
+       "    test: Dataset({\n",
+       "        features: ['quote', 'label', 'source', 'url', 'language', 'subsource', 'id', '__index_level_0__'],\n",
+       "        num_rows: 1219\n",
+       "    })\n",
+       "})"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from codecarbon import EmissionsTracker\n",
+    "import huggingface_hub\n",
+    "from fastapi import APIRouter\n",
+    "from datetime import datetime\n",
+    "from datasets import load_dataset\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "import pandas as pd\n",
+    "from tqdm import tqdm\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "import tensorflow as tf\n",
+    "from sklearn import preprocessing, decomposition, model_selection, metrics, pipeline\n",
+    "from keras.layers import GlobalMaxPooling1D, Conv1D, MaxPooling1D, Flatten, Bidirectional, SpatialDropout1D\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "sys.path.append('../tasks')\n",
+    "\n",
+    "#from utils.evaluation import TextEvaluationRequest\n",
+    "#from utils.emissions import tracker, clean_emissions_data, get_space_info\n",
+    "\n",
+    "dataset = load_dataset(\"quotaclimat/frugalaichallenge-text-train\")\n",
+    "print(next(iter(dataset['train'])))\n",
+    "    # Convert string labels to integers\n",
+    "LABEL_MAPPING = {\n",
+    "        \"0_not_relevant\": 0,\n",
+    "        \"1_not_happening\": 1,\n",
+    "        \"2_not_human\": 2,\n",
+    "        \"3_not_bad\": 3,\n",
+    "        \"4_solutions_harmful_unnecessary\": 4,\n",
+    "        \"5_science_unreliable\": 5,\n",
+    "        \"6_proponents_biased\": 6,\n",
+    "        \"7_fossil_fuels_needed\": 7\n",
+    "    }\n",
+    "dataset = dataset.map(lambda x: {\"label\": LABEL_MAPPING[x[\"label\"]]})\n",
+    "dataset\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading the datasets and splitting them"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#request = TextEvaluationRequest()\n",
+    "\n",
+    "# Load and prepare the dataset\n",
+    "#dataset = load_dataset(request.dataset_name)\n",
+    "\n",
+    "# Convert string labels to integers\n",
+    "#dataset = dataset.map(lambda x: {\"label\": LABEL_MAPPING[x[\"label\"]]})\n",
+    "\n",
+    "# Split dataset\n",
+    "train_test = dataset[\"train\"].train_test_split(test_size=.2, #request.test_size, \n",
+    "                                               seed=42 )#request.test_seed)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_dataset = train_test[\"train\"]\n",
+    "test_dataset = train_test[\"test\"]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package stopwords to\n",
+      "[nltk_data]     /Users/laureberti/nltk_data...\n",
+      "[nltk_data]   Package stopwords is already up-to-date!\n",
+      "[nltk_data] Downloading package wordnet to\n",
+      "[nltk_data]     /Users/laureberti/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>quote</th>\n",
+       "      <th>clean_text</th>\n",
+       "      <th>length_clean_text</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Americans for Tax Reform opposes a carbon tax ...</td>\n",
+       "      <td>american tax reform oppose carbon tax work tir...</td>\n",
+       "      <td>79</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>More than 100 climate models over the past 30 ...</td>\n",
+       "      <td>100 climate model past 30 year predict actuall...</td>\n",
+       "      <td>152</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>As an oil and gas operator who has been in the...</td>\n",
+       "      <td>oil gas operator ha industry 30 year im fortun...</td>\n",
+       "      <td>362</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Climate has always changed, there've been many...</td>\n",
+       "      <td>climate ha always change thereve many extincti...</td>\n",
+       "      <td>141</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>People have made a mistake. They’ve started to...</td>\n",
+       "      <td>people make mistake theyve start believe human...</td>\n",
+       "      <td>118</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               quote  \\\n",
+       "0  Americans for Tax Reform opposes a carbon tax ...   \n",
+       "1  More than 100 climate models over the past 30 ...   \n",
+       "2  As an oil and gas operator who has been in the...   \n",
+       "3  Climate has always changed, there've been many...   \n",
+       "4  People have made a mistake. They’ve started to...   \n",
+       "\n",
+       "                                          clean_text  length_clean_text  \n",
+       "0  american tax reform oppose carbon tax work tir...                 79  \n",
+       "1  100 climate model past 30 year predict actuall...                152  \n",
+       "2  oil gas operator ha industry 30 year im fortun...                362  \n",
+       "3  climate ha always change thereve many extincti...                141  \n",
+       "4  people make mistake theyve start believe human...                118  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import nltk\n",
+    "nltk.download('stopwords')\n",
+    "nltk.download('wordnet')\n",
+    "\n",
+    "import re\n",
+    "from nltk.stem import WordNetLemmatizer\n",
+    "from nltk.corpus import stopwords\n",
+    "\n",
+    "stop_words = set(stopwords.words(\"english\")) \n",
+    "lemmatizer = WordNetLemmatizer()\n",
+    "\n",
+    "\n",
+    "def clean_text(text):\n",
+    "    text = re.sub(r'[^\\w\\s]','',text, re.UNICODE)\n",
+    "    text = text.lower()\n",
+    "    text = [lemmatizer.lemmatize(token) for token in text.split(\" \")]\n",
+    "    text = [lemmatizer.lemmatize(token, \"v\") for token in text]\n",
+    "    text = [word for word in text if not word in stop_words]\n",
+    "    text = \" \".join(text)\n",
+    "    return text\n",
+    "\n",
+    "train_df= pd.DataFrame(train_dataset[\"quote\"], columns=['quote'])    \n",
+    "train_df['clean_text'] = train_df.map(clean_text) \n",
+    "train_df['length_clean_text'] = train_df['clean_text'].map(len)\n",
+    "\n",
+    "train_df.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>quote</th>\n",
+       "      <th>clean_text</th>\n",
+       "      <th>length_clean_text</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>The term climate change was hijacked by “progr...</td>\n",
+       "      <td>term climate change wa hijack progressive term...</td>\n",
+       "      <td>76</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Climate change is a scam.Banks and Home Owner'...</td>\n",
+       "      <td>climate change scambanks home owner insurance ...</td>\n",
+       "      <td>82</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Against the half-trillion in benefits you can ...</td>\n",
+       "      <td>halftrillion benefit weigh global warm impact ...</td>\n",
+       "      <td>337</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Do you agree with the vast majority of climate...</td>\n",
+       "      <td>agree vast majority climate scientist climate ...</td>\n",
+       "      <td>59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Global warming and climate change, even if it ...</td>\n",
+       "      <td>global warm climate change even 100 cause huma...</td>\n",
+       "      <td>165</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               quote  \\\n",
+       "0  The term climate change was hijacked by “progr...   \n",
+       "1  Climate change is a scam.Banks and Home Owner'...   \n",
+       "2  Against the half-trillion in benefits you can ...   \n",
+       "3  Do you agree with the vast majority of climate...   \n",
+       "4  Global warming and climate change, even if it ...   \n",
+       "\n",
+       "                                          clean_text  length_clean_text  \n",
+       "0  term climate change wa hijack progressive term...                 76  \n",
+       "1  climate change scambanks home owner insurance ...                 82  \n",
+       "2  halftrillion benefit weigh global warm impact ...                337  \n",
+       "3  agree vast majority climate scientist climate ...                 59  \n",
+       "4  global warm climate change even 100 cause huma...                165  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_df= pd.DataFrame(test_dataset[\"quote\"], columns=['quote'])    \n",
+    "test_df['clean_text'] = test_df.map(clean_text) \n",
+    "test_df['length_clean_text'] = test_df['clean_text'].map(len)\n",
+    "\n",
+    "test_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "27.92250449063382"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_df['clean_text'].apply(lambda x: len(x.split(\" \"))).mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "27.25948717948718"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_df['clean_text'].apply(lambda x: len(x.split(\" \"))).mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "import tensorflow.keras as keras\n",
+    "from tensorflow.keras.preprocessing.text import Tokenizer\n",
+    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
+    "from tensorflow.keras.layers import Concatenate, Dense, Input, LSTM, Embedding, Dropout, Activation, GRU, Flatten\n",
+    "from tensorflow.keras.layers import Bidirectional, GlobalMaxPool1D\n",
+    "from tensorflow.keras.models import Model, Sequential\n",
+    "from tensorflow.keras.layers import Convolution1D\n",
+    "from tensorflow.keras import initializers, regularizers, constraints, optimizers, layers\n",
+    "\n",
+    "\n",
+    "MAX_FEATURES = 6000\n",
+    "EMBED_SIZE = 28\n",
+    "tokenizer = Tokenizer(num_words=MAX_FEATURES)\n",
+    "tokenizer.fit_on_texts(train_df['clean_text'])\n",
+    "list_tokenized_train = tokenizer.texts_to_sequences(train_df['clean_text'])\n",
+    "\n",
+    "RNN_CELL_SIZE = 32\n",
+    "\n",
+    "MAX_LEN = 30   \n",
+    "\n",
+    "X_train = pad_sequences(list_tokenized_train, maxlen=MAX_LEN)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "true_labels = test_dataset[\"label\"]\n",
+    "y_train = train_dataset[\"label\"]\n",
+    "y_test = test_dataset[\"label\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Attention(tf.keras.Model):\n",
+    "    def __init__(self, units):\n",
+    "        super(Attention, self).__init__()\n",
+    "        self.W1 = tf.keras.layers.Dense(units)\n",
+    "        self.W2 = tf.keras.layers.Dense(units)\n",
+    "        self.V = tf.keras.layers.Dense(1)\n",
+    " \n",
+    "    def call(self, features, hidden):\n",
+    "        # hidden shape == (batch_size, hidden size)\n",
+    "        # hidden_with_time_axis shape == (batch_size, 1, hidden size)\n",
+    "        # we are doing this to perform addition to calculate the score\n",
+    "        hidden_with_time_axis = tf.expand_dims(hidden, 1)\n",
+    "\n",
+    "        # score shape == (batch_size, max_length, 1)\n",
+    "        # we get 1 at the last axis because we are applying score to self.V\n",
+    "        # the shape of the tensor before applying self.V is (batch_size, max_length, units)\n",
+    "        score = tf.nn.tanh(\n",
+    "            self.W1(features) + self.W2(hidden_with_time_axis))\n",
+    "        \n",
+    "        # attention_weights shape == (batch_size, max_length, 1)\n",
+    "        attention_weights = tf.nn.softmax(self.V(score), axis=1)\n",
+    "\n",
+    "        # context_vector shape after sum == (batch_size, hidden_size)\n",
+    "        context_vector = attention_weights * features\n",
+    "        context_vector = tf.reduce_sum(context_vector, axis=1)\n",
+    " \n",
+    "        return context_vector, attention_weights"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sequence_input = Input(shape=(MAX_LEN,), dtype=\"int32\")\n",
+    "embedded_sequences = Embedding(MAX_FEATURES, EMBED_SIZE)(sequence_input)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lstm = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences = True), name=\"bi_lstm_0\")(embedded_sequences)\n",
+    "\n",
+    "# Getting our LSTM outputs\n",
+    "(lstm, forward_h, forward_c, backward_h, backward_c) = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences=True, return_state=True), name=\"bi_lstm_1\")(lstm)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "state_h = Concatenate()([forward_h, backward_h])\n",
+    "state_c = Concatenate()([forward_c, backward_c])\n",
+    "\n",
+    "context_vector, attention_weights = Attention(10)(lstm, state_h)\n",
+    "\n",
+    "# Removal of the globalMaxPool1D could be trouble\n",
+    "#globmax = GlobalMaxPool1D()(context_vector)\n",
+    "dense1 = Dense(20, activation=\"relu\")(context_vector)\n",
+    "dropout = Dropout(0.05)(dense1)\n",
+    "output = Dense(8, activation=\"sigmoid\")(dropout)\n",
+    "\n",
+    "model = keras.Model(inputs=sequence_input, outputs=output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional_1\"</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\u001b[1mModel: \"functional_1\"\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃<span style=\"font-weight: bold\"> Layer (type)        </span>┃<span style=\"font-weight: bold\"> Output Shape      </span>┃<span style=\"font-weight: bold\">    Param # </span>┃<span style=\"font-weight: bold\"> Connected to      </span>┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
+       "│ input_layer_1       │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">30</span>)        │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                 │\n",
+       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)        │                   │            │                   │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ embedding_1         │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">30</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">28</span>)    │    <span style=\"color: #00af00; text-decoration-color: #00af00\">168,000</span> │ input_layer_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
+       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>)         │                   │            │                   │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ bi_lstm_0           │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">30</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)    │     <span style=\"color: #00af00; text-decoration-color: #00af00\">15,616</span> │ embedding_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
+       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>)     │                   │            │                   │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ bi_lstm_1           │ [(<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">30</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>),  │     <span style=\"color: #00af00; text-decoration-color: #00af00\">24,832</span> │ bi_lstm_0[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]   │\n",
+       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>)     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>),       │            │                   │\n",
+       "│                     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>),       │            │                   │\n",
+       "│                     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>),       │            │                   │\n",
+       "│                     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>)]       │            │                   │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ concatenate_2       │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)        │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ bi_lstm_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>],  │\n",
+       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Concatenate</span>)       │                   │            │ bi_lstm_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">3</span>]   │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ attention_1         │ [(<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>),      │      <span style=\"color: #00af00; text-decoration-color: #00af00\">1,311</span> │ bi_lstm_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>],  │\n",
+       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Attention</span>)         │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">30</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>)]    │            │ concatenate_2[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]… │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ dense_8 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">20</span>)        │      <span style=\"color: #00af00; text-decoration-color: #00af00\">1,300</span> │ attention_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ dropout_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">20</span>)        │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ dense_8[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]     │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ dense_9 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>)         │        <span style=\"color: #00af00; text-decoration-color: #00af00\">168</span> │ dropout_1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]   │\n",
+       "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)       \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape     \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m   Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to     \u001b[0m\u001b[1m \u001b[0m┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
+       "│ input_layer_1       │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m30\u001b[0m)        │          \u001b[38;5;34m0\u001b[0m │ -                 │\n",
+       "│ (\u001b[38;5;33mInputLayer\u001b[0m)        │                   │            │                   │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ embedding_1         │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m30\u001b[0m, \u001b[38;5;34m28\u001b[0m)    │    \u001b[38;5;34m168,000\u001b[0m │ input_layer_1[\u001b[38;5;34m0\u001b[0m]… │\n",
+       "│ (\u001b[38;5;33mEmbedding\u001b[0m)         │                   │            │                   │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ bi_lstm_0           │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m30\u001b[0m, \u001b[38;5;34m64\u001b[0m)    │     \u001b[38;5;34m15,616\u001b[0m │ embedding_1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
+       "│ (\u001b[38;5;33mBidirectional\u001b[0m)     │                   │            │                   │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ bi_lstm_1           │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m30\u001b[0m, \u001b[38;5;34m64\u001b[0m),  │     \u001b[38;5;34m24,832\u001b[0m │ bi_lstm_0[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m]   │\n",
+       "│ (\u001b[38;5;33mBidirectional\u001b[0m)     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m32\u001b[0m),       │            │                   │\n",
+       "│                     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m32\u001b[0m),       │            │                   │\n",
+       "│                     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m32\u001b[0m),       │            │                   │\n",
+       "│                     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m32\u001b[0m)]       │            │                   │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ concatenate_2       │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m)        │          \u001b[38;5;34m0\u001b[0m │ bi_lstm_1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m1\u001b[0m],  │\n",
+       "│ (\u001b[38;5;33mConcatenate\u001b[0m)       │                   │            │ bi_lstm_1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m3\u001b[0m]   │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ attention_1         │ [(\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m),      │      \u001b[38;5;34m1,311\u001b[0m │ bi_lstm_1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m],  │\n",
+       "│ (\u001b[38;5;33mAttention\u001b[0m)         │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m30\u001b[0m, \u001b[38;5;34m1\u001b[0m)]    │            │ concatenate_2[\u001b[38;5;34m0\u001b[0m]… │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ dense_8 (\u001b[38;5;33mDense\u001b[0m)     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m20\u001b[0m)        │      \u001b[38;5;34m1,300\u001b[0m │ attention_1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ dropout_1 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m20\u001b[0m)        │          \u001b[38;5;34m0\u001b[0m │ dense_8[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m]     │\n",
+       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
+       "│ dense_9 (\u001b[38;5;33mDense\u001b[0m)     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m)         │        \u001b[38;5;34m168\u001b[0m │ dropout_1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m]   │\n",
+       "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">211,227</span> (825.11 KB)\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m211,227\u001b[0m (825.11 KB)\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">211,227</span> (825.11 KB)\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m211,227\u001b[0m (825.11 KB)\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "# summarize layers\n",
+    "print(model.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from keras.callbacks import EarlyStopping\n",
+    "from keras import backend \n",
+    "\n",
+    "es = EarlyStopping(monitor='accuracy', mode='min', verbose=1, patience=5)\n",
+    "model.compile(loss='SparseCategoricalCrossentropy', optimizer='adam', metrics=['accuracy'])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "X_train_np = np.array(X_train)\n",
+    "y_train_np = np.array(y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 39ms/step - accuracy: 0.7935 - loss: 0.6349\n",
+      "Epoch 2/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 38ms/step - accuracy: 0.8229 - loss: 0.5661\n",
+      "Epoch 3/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 44ms/step - accuracy: 0.8691 - loss: 0.4346\n",
+      "Epoch 4/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 39ms/step - accuracy: 0.8974 - loss: 0.3836\n",
+      "Epoch 5/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 52ms/step - accuracy: 0.9059 - loss: 0.3363\n",
+      "Epoch 6/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 55ms/step - accuracy: 0.9146 - loss: 0.2993\n",
+      "Epoch 7/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 54ms/step - accuracy: 0.9364 - loss: 0.2439\n",
+      "Epoch 8/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 48ms/step - accuracy: 0.9365 - loss: 0.2423\n",
+      "Epoch 9/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 40ms/step - accuracy: 0.9464 - loss: 0.1978\n",
+      "Epoch 10/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 39ms/step - accuracy: 0.9516 - loss: 0.1880\n",
+      "Epoch 11/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 49ms/step - accuracy: 0.9478 - loss: 0.1854\n",
+      "Epoch 12/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 59ms/step - accuracy: 0.9545 - loss: 0.1586\n",
+      "Epoch 13/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 59ms/step - accuracy: 0.9563 - loss: 0.1485\n",
+      "Epoch 14/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 61ms/step - accuracy: 0.9598 - loss: 0.1378\n",
+      "Epoch 15/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 52ms/step - accuracy: 0.9575 - loss: 0.1429\n",
+      "Epoch 16/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 60ms/step - accuracy: 0.9576 - loss: 0.1285\n",
+      "Epoch 17/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 53ms/step - accuracy: 0.9585 - loss: 0.1384\n",
+      "Epoch 18/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 45ms/step - accuracy: 0.9597 - loss: 0.1333\n",
+      "Epoch 19/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 51ms/step - accuracy: 0.9671 - loss: 0.1189\n",
+      "Epoch 20/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 52ms/step - accuracy: 0.9709 - loss: 0.1102\n",
+      "Epoch 21/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 58ms/step - accuracy: 0.9691 - loss: 0.1136\n",
+      "Epoch 22/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 59ms/step - accuracy: 0.9774 - loss: 0.0918\n",
+      "Epoch 23/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 63ms/step - accuracy: 0.9777 - loss: 0.0876\n",
+      "Epoch 24/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 59ms/step - accuracy: 0.9841 - loss: 0.0615\n",
+      "Epoch 25/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 43ms/step - accuracy: 0.9781 - loss: 0.0804\n",
+      "Epoch 26/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 43ms/step - accuracy: 0.9724 - loss: 0.0936\n",
+      "Epoch 27/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 42ms/step - accuracy: 0.9711 - loss: 0.1026\n",
+      "Epoch 28/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 44ms/step - accuracy: 0.9728 - loss: 0.0933\n",
+      "Epoch 29/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 49ms/step - accuracy: 0.9771 - loss: 0.0772\n",
+      "Epoch 30/30\n",
+      "\u001b[1m39/39\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 55ms/step - accuracy: 0.9771 - loss: 0.0940\n"
+     ]
+    }
+   ],
+   "source": [
+    "BATCH_SIZE = 100\n",
+    "EPOCHS = 30\n",
+    "history = model.fit(X_train_np,y_train_np, shuffle=True,\n",
+    "                    batch_size=BATCH_SIZE, verbose=1,\n",
+    "                    epochs=EPOCHS)#, callbacks=[es])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def classifier(input_text,candidate_labels):\n",
+    "    #PREPROCESS THE INPUT TEXT\n",
+    "    input_text_cleaned = clean_text(input_text)\n",
+    "    input_sequence = tokenizer.texts_to_sequences([input_text_cleaned])\n",
+    "    input_padded = pad_sequences(input_sequence, maxlen = MAX_LEN, padding = 'post')\n",
+    "    #PREDICTION\n",
+    "    prediction = np.ravel(model.predict(input_padded))\n",
+    "    return {'sequence': input_text,'labels': candidate_labels,'scores': list(prediction)}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "candidate_labels = [\n",
+    "    \"Not related to climate change disinformation\",\n",
+    "    \"Climate change is not real and not happening\",\n",
+    "    \"Climate change is not human-induced\",\n",
+    "    \"Climate change impacts are not that bad\",\n",
+    "    \"Climate change solutions are harmful and unnecessary\",\n",
+    "    \"Climate change science is unreliable\",\n",
+    "    \"Climate change proponents are biased\",\n",
+    "    \"Fossil fuels are needed to address climate change\"\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[6, 6, 4, 0, 5, 5, 2, 4, 1, 0]"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "true_labels[:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions[:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Start tracking emissions\n",
+    "tracker.start()\n",
+    "tracker.start_task(\"inference\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "\n",
+    "from tqdm.auto import tqdm\n",
+    "predictions = []\n",
+    "\n",
+    "for i, text in tqdm(enumerate(test_dataset[\"quote\"])):\n",
+    "\n",
+    "    result = classifier(text, candidate_labels)\n",
+    "\n",
+    "    # Get index of highest scoring label\n",
+    "\n",
+    "    pred_label = candidate_labels.index(result[\"labels\"][0])\n",
+    "\n",
+    "    predictions.append(pred_label)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Stop tracking emissions\n",
+    "emissions_data = tracker.stop_task()\n",
+    "emissions_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.27"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Calculate accuracy\n",
+    "accuracy = accuracy_score(true_labels[:100], predictions[:100])\n",
+    "accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prepare results dictionary\n",
+    "results = {\n",
+    "    \"submission_timestamp\": datetime.now().isoformat(),\n",
+    "    \"accuracy\": float(accuracy),\n",
+    "    \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n",
+    "    \"emissions_gco2eq\": emissions_data.emissions * 1000,\n",
+    "    \"emissions_data\": clean_emissions_data(emissions_data),\n",
+    "    \"dataset_config\": {\n",
+    "        \"dataset_name\": request.dataset_name,\n",
+    "        \"test_size\": request.test_size,\n",
+    "        \"test_seed\": request.test_seed\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}