File size: 50,851 Bytes
dbe16d4
1
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.11.11","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":2157,"sourceType":"datasetVersion","datasetId":18}],"dockerImageVersionId":31040,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\nplt.style.use('ggplot')\n\nimport nltk","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:09:53.762318Z","iopub.execute_input":"2025-05-16T12:09:53.762680Z","iopub.status.idle":"2025-05-16T12:09:53.768747Z","shell.execute_reply.started":"2025-05-16T12:09:53.762652Z","shell.execute_reply":"2025-05-16T12:09:53.767465Z"}},"outputs":[],"execution_count":142},{"cell_type":"code","source":"# Read in data\ndf = pd.read_csv('/kaggle/input/amazon-fine-food-reviews/Reviews.csv')\nprint(df.shape)\ndf = df.head(500)\nprint(df.shape)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:09:55.047299Z","iopub.execute_input":"2025-05-16T12:09:55.047642Z","iopub.status.idle":"2025-05-16T12:09:59.654257Z","shell.execute_reply.started":"2025-05-16T12:09:55.047615Z","shell.execute_reply":"2025-05-16T12:09:59.653504Z"}},"outputs":[{"name":"stdout","text":"(568454, 10)\n(500, 10)\n","output_type":"stream"}],"execution_count":143},{"cell_type":"code","source":"ax = df['Score'].value_counts()\nax","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:03.031026Z","iopub.execute_input":"2025-05-16T12:10:03.031363Z","iopub.status.idle":"2025-05-16T12:10:03.038529Z","shell.execute_reply.started":"2025-05-16T12:10:03.031338Z","shell.execute_reply":"2025-05-16T12:10:03.037726Z"}},"outputs":[{"execution_count":147,"output_type":"execute_result","data":{"text/plain":"Score\n5    339\n4     70\n3     37\n1     36\n2     18\nName: count, dtype: int64"},"metadata":{}}],"execution_count":147},{"cell_type":"code","source":"#BAsic NLTK\nexample = df['Text'][50]\nprint(example)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:03.305406Z","iopub.execute_input":"2025-05-16T12:10:03.305964Z","iopub.status.idle":"2025-05-16T12:10:03.312420Z","shell.execute_reply.started":"2025-05-16T12:10:03.305931Z","shell.execute_reply":"2025-05-16T12:10:03.311186Z"}},"outputs":[{"name":"stdout","text":"This oatmeal is not good. Its mushy, soft, I don't like it. Quaker Oats is the way to go.\n","output_type":"stream"}],"execution_count":148},{"cell_type":"code","source":"df.head()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:03.519750Z","iopub.execute_input":"2025-05-16T12:10:03.520532Z","iopub.status.idle":"2025-05-16T12:10:03.532963Z","shell.execute_reply.started":"2025-05-16T12:10:03.520503Z","shell.execute_reply":"2025-05-16T12:10:03.531976Z"}},"outputs":[{"execution_count":149,"output_type":"execute_result","data":{"text/plain":"   Id   ProductId          UserId                      ProfileName  \\\n0   1  B001E4KFG0  A3SGXH7AUHU8GW                       delmartian   \n1   2  B00813GRG4  A1D87F6ZCVE5NK                           dll pa   \n2   3  B000LQOCH0   ABXLMWJIXXAIN  Natalia Corres \"Natalia Corres\"   \n3   4  B000UA0QIQ  A395BORC6FGVXV                             Karl   \n4   5  B006K2ZZ7K  A1UQRSCLF8GW1T    Michael D. Bigham \"M. Wassir\"   \n\n   HelpfulnessNumerator  HelpfulnessDenominator  Score        Time  \\\n0                     1                       1      5  1303862400   \n1                     0                       0      1  1346976000   \n2                     1                       1      4  1219017600   \n3                     3                       3      2  1307923200   \n4                     0                       0      5  1350777600   \n\n                 Summary                                               Text  \n0  Good Quality Dog Food  I have bought several of the Vitality canned d...  \n1      Not as Advertised  Product arrived labeled as Jumbo Salted Peanut...  \n2  \"Delight\" says it all  This is a confection that has been around a fe...  \n3         Cough Medicine  If you are looking for the secret ingredient i...  \n4            Great taffy  Great taffy at a great price.  There was a wid...  ","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Id</th>\n      <th>ProductId</th>\n      <th>UserId</th>\n      <th>ProfileName</th>\n      <th>HelpfulnessNumerator</th>\n      <th>HelpfulnessDenominator</th>\n      <th>Score</th>\n      <th>Time</th>\n      <th>Summary</th>\n      <th>Text</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>B001E4KFG0</td>\n      <td>A3SGXH7AUHU8GW</td>\n      <td>delmartian</td>\n      <td>1</td>\n      <td>1</td>\n      <td>5</td>\n      <td>1303862400</td>\n      <td>Good Quality Dog Food</td>\n      <td>I have bought several of the Vitality canned d...</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>B00813GRG4</td>\n      <td>A1D87F6ZCVE5NK</td>\n      <td>dll pa</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1346976000</td>\n      <td>Not as Advertised</td>\n      <td>Product arrived labeled as Jumbo Salted Peanut...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3</td>\n      <td>B000LQOCH0</td>\n      <td>ABXLMWJIXXAIN</td>\n      <td>Natalia Corres \"Natalia Corres\"</td>\n      <td>1</td>\n      <td>1</td>\n      <td>4</td>\n      <td>1219017600</td>\n      <td>\"Delight\" says it all</td>\n      <td>This is a confection that has been around a fe...</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4</td>\n      <td>B000UA0QIQ</td>\n      <td>A395BORC6FGVXV</td>\n      <td>Karl</td>\n      <td>3</td>\n      <td>3</td>\n      <td>2</td>\n      <td>1307923200</td>\n      <td>Cough Medicine</td>\n      <td>If you are looking for the secret ingredient i...</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>5</td>\n      <td>B006K2ZZ7K</td>\n      <td>A1UQRSCLF8GW1T</td>\n      <td>Michael D. Bigham \"M. Wassir\"</td>\n      <td>0</td>\n      <td>0</td>\n      <td>5</td>\n      <td>1350777600</td>\n      <td>Great taffy</td>\n      <td>Great taffy at a great price.  There was a wid...</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"metadata":{}}],"execution_count":149},{"cell_type":"code","source":"tokens = nltk.word_tokenize(example)\ntokens[:10]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:03.705245Z","iopub.execute_input":"2025-05-16T12:10:03.705540Z","iopub.status.idle":"2025-05-16T12:10:03.712449Z","shell.execute_reply.started":"2025-05-16T12:10:03.705522Z","shell.execute_reply":"2025-05-16T12:10:03.711521Z"}},"outputs":[{"execution_count":150,"output_type":"execute_result","data":{"text/plain":"['This', 'oatmeal', 'is', 'not', 'good', '.', 'Its', 'mushy', ',', 'soft']"},"metadata":{}}],"execution_count":150},{"cell_type":"code","source":"nltk.download('averaged_perceptron_tagger_eng')\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:03.894073Z","iopub.execute_input":"2025-05-16T12:10:03.894379Z","iopub.status.idle":"2025-05-16T12:10:04.033197Z","shell.execute_reply.started":"2025-05-16T12:10:03.894360Z","shell.execute_reply":"2025-05-16T12:10:04.032203Z"}},"outputs":[{"name":"stderr","text":"[nltk_data] Downloading package averaged_perceptron_tagger_eng to\n[nltk_data]     /usr/share/nltk_data...\n[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-\n[nltk_data]       date!\n","output_type":"stream"},{"execution_count":151,"output_type":"execute_result","data":{"text/plain":"True"},"metadata":{}}],"execution_count":151},{"cell_type":"code","source":"tagged = nltk.pos_tag(tokens)\ntagged[:10]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:04.074450Z","iopub.execute_input":"2025-05-16T12:10:04.074777Z","iopub.status.idle":"2025-05-16T12:10:04.082279Z","shell.execute_reply.started":"2025-05-16T12:10:04.074750Z","shell.execute_reply":"2025-05-16T12:10:04.081450Z"}},"outputs":[{"execution_count":152,"output_type":"execute_result","data":{"text/plain":"[('This', 'DT'),\n ('oatmeal', 'NN'),\n ('is', 'VBZ'),\n ('not', 'RB'),\n ('good', 'JJ'),\n ('.', '.'),\n ('Its', 'PRP$'),\n ('mushy', 'NN'),\n (',', ','),\n ('soft', 'JJ')]"},"metadata":{}}],"execution_count":152},{"cell_type":"code","source":"nltk.download('maxent_ne_chunker_tab')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:04.239437Z","iopub.execute_input":"2025-05-16T12:10:04.239741Z","iopub.status.idle":"2025-05-16T12:10:04.260161Z","shell.execute_reply.started":"2025-05-16T12:10:04.239720Z","shell.execute_reply":"2025-05-16T12:10:04.259463Z"}},"outputs":[{"name":"stderr","text":"[nltk_data] Downloading package maxent_ne_chunker_tab to\n[nltk_data]     /usr/share/nltk_data...\n[nltk_data]   Package maxent_ne_chunker_tab is already up-to-date!\n","output_type":"stream"},{"execution_count":153,"output_type":"execute_result","data":{"text/plain":"True"},"metadata":{}}],"execution_count":153},{"cell_type":"code","source":"entities = nltk.chunk.ne_chunk(tagged)\nentities.pprint()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:04.435074Z","iopub.execute_input":"2025-05-16T12:10:04.435391Z","iopub.status.idle":"2025-05-16T12:10:04.825979Z","shell.execute_reply.started":"2025-05-16T12:10:04.435368Z","shell.execute_reply":"2025-05-16T12:10:04.824862Z"}},"outputs":[{"name":"stdout","text":"(S\n  This/DT\n  oatmeal/NN\n  is/VBZ\n  not/RB\n  good/JJ\n  ./.\n  Its/PRP$\n  mushy/NN\n  ,/,\n  soft/JJ\n  ,/,\n  I/PRP\n  do/VBP\n  n't/RB\n  like/VB\n  it/PRP\n  ./.\n  (ORGANIZATION Quaker/NNP Oats/NNPS)\n  is/VBZ\n  the/DT\n  way/NN\n  to/TO\n  go/VB\n  ./.)\n","output_type":"stream"}],"execution_count":154},{"cell_type":"markdown","source":"**VADER Seniment Scoring**","metadata":{}},{"cell_type":"code","source":"#from nltk.sentiment import SentimentIntensityAnalyzer\n#from tqdm.notebook import tqdm\n\n#sia = SentimentIntensityAnalyzer()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:04.827443Z","iopub.execute_input":"2025-05-16T12:10:04.827812Z","iopub.status.idle":"2025-05-16T12:10:04.832293Z","shell.execute_reply.started":"2025-05-16T12:10:04.827756Z","shell.execute_reply":"2025-05-16T12:10:04.831116Z"}},"outputs":[],"execution_count":155},{"cell_type":"code","source":"#sia.polarity_scores('I am so happy!')\n#neg- Negative\n#neu- neutral\n#pos- Positivew","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:04.991592Z","iopub.execute_input":"2025-05-16T12:10:04.991954Z","iopub.status.idle":"2025-05-16T12:10:04.996113Z","shell.execute_reply.started":"2025-05-16T12:10:04.991930Z","shell.execute_reply":"2025-05-16T12:10:04.995102Z"}},"outputs":[],"execution_count":156},{"cell_type":"code","source":"#sia.polarity_scores('This is the worst thing ever.')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:05.218122Z","iopub.execute_input":"2025-05-16T12:10:05.218417Z","iopub.status.idle":"2025-05-16T12:10:05.222713Z","shell.execute_reply.started":"2025-05-16T12:10:05.218398Z","shell.execute_reply":"2025-05-16T12:10:05.221903Z"}},"outputs":[],"execution_count":157},{"cell_type":"code","source":"#sia.polarity_scores(example)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:05.440102Z","iopub.execute_input":"2025-05-16T12:10:05.440412Z","iopub.status.idle":"2025-05-16T12:10:05.444950Z","shell.execute_reply.started":"2025-05-16T12:10:05.440388Z","shell.execute_reply":"2025-05-16T12:10:05.443762Z"}},"outputs":[],"execution_count":158},{"cell_type":"code","source":"# Run the polarity score on the entire dataset\n#res = {}\n#for i, row in tqdm(df.iterrows(), total=len(df)):\n    #text = row['Text']\n    #myid = row['Id']\n    #res[myid] = sia.polarity_scores(text)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:08.869615Z","iopub.execute_input":"2025-05-16T12:10:08.870022Z","iopub.status.idle":"2025-05-16T12:10:08.874501Z","shell.execute_reply.started":"2025-05-16T12:10:08.869993Z","shell.execute_reply":"2025-05-16T12:10:08.873509Z"}},"outputs":[],"execution_count":159},{"cell_type":"code","source":"#pd.DataFrame(res)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:09.418426Z","iopub.execute_input":"2025-05-16T12:10:09.419198Z","iopub.status.idle":"2025-05-16T12:10:09.422778Z","shell.execute_reply.started":"2025-05-16T12:10:09.419167Z","shell.execute_reply":"2025-05-16T12:10:09.421882Z"}},"outputs":[],"execution_count":160},{"cell_type":"code","source":"#vaders = pd.DataFrame(res).T\n#vaders = vaders.reset_index().rename(columns={'index': 'Id'})\n#vaders = vaders.merge(df, how='left')\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:18.701668Z","iopub.execute_input":"2025-05-16T12:10:18.702024Z","iopub.status.idle":"2025-05-16T12:10:18.705719Z","shell.execute_reply.started":"2025-05-16T12:10:18.702001Z","shell.execute_reply":"2025-05-16T12:10:18.704879Z"}},"outputs":[],"execution_count":161},{"cell_type":"code","source":"#vaders.head()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:19.037033Z","iopub.execute_input":"2025-05-16T12:10:19.037333Z","iopub.status.idle":"2025-05-16T12:10:19.041663Z","shell.execute_reply.started":"2025-05-16T12:10:19.037313Z","shell.execute_reply":"2025-05-16T12:10:19.040761Z"}},"outputs":[],"execution_count":162},{"cell_type":"markdown","source":"**Plot VADER results**","metadata":{}},{"cell_type":"code","source":"#ax = sns.barplot(data=vaders, x='Score', y='compound')\n#ax.set_title('Compund Score by Amazon Star Review')\n#plt.show()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:20.819122Z","iopub.execute_input":"2025-05-16T12:10:20.819415Z","iopub.status.idle":"2025-05-16T12:10:20.823941Z","shell.execute_reply.started":"2025-05-16T12:10:20.819396Z","shell.execute_reply":"2025-05-16T12:10:20.822415Z"}},"outputs":[],"execution_count":163},{"cell_type":"code","source":"#fig, axs = plt.subplots(1, 3, figsize=(12, 3))\n#sns.barplot(data=vaders, x='Score', y='pos', ax=axs[0])\n#sns.barplot(data=vaders, x='Score', y='neu', ax=axs[1])\n#sns.barplot(data=vaders, x='Score', y='neg', ax=axs[2])\n#axs[0].set_title('Positive')\n#axs[1].set_title('Neutral')\n#axs[2].set_title('Negative')\n#plt.tight_layout()\n#plt.show()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:21.203904Z","iopub.execute_input":"2025-05-16T12:10:21.204193Z","iopub.status.idle":"2025-05-16T12:10:21.208430Z","shell.execute_reply.started":"2025-05-16T12:10:21.204175Z","shell.execute_reply":"2025-05-16T12:10:21.207452Z"}},"outputs":[],"execution_count":164},{"cell_type":"markdown","source":"**Step 3. Roberta Pretrained Model**","metadata":{}},{"cell_type":"code","source":"from transformers import AutoTokenizer\nfrom transformers import AutoModelForSequenceClassification\nfrom scipy.special import softmax","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:25.045296Z","iopub.execute_input":"2025-05-16T12:10:25.045588Z","iopub.status.idle":"2025-05-16T12:10:25.050258Z","shell.execute_reply.started":"2025-05-16T12:10:25.045568Z","shell.execute_reply":"2025-05-16T12:10:25.049400Z"}},"outputs":[],"execution_count":165},{"cell_type":"code","source":"import torch\nfrom torch.utils.data import Dataset, DataLoader\nfrom torch.optim import AdamW\nfrom transformers import AutoTokenizer, AutoModelForSequenceClassification\nfrom sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\nimport pandas as pd\nfrom tqdm import tqdm\n\n# Model and Tokenizer\nMODEL = \"cardiffnlp/twitter-roberta-base-sentiment\"\ntokenizer = AutoTokenizer.from_pretrained(MODEL)\nmodel = AutoModelForSequenceClassification.from_pretrained(\n    MODEL,\n    num_labels=2,\n    ignore_mismatched_sizes=True  # ✅ Fix size mismatch\n)\n\n# Custom Dataset\nclass ReviewDataset(Dataset):\n    def __init__(self, texts, labels, tokenizer, max_len=128):\n        self.texts = texts\n        self.labels = labels\n        self.tokenizer = tokenizer\n        self.max_len = max_len\n\n    def __len__(self):\n        return len(self.texts)\n\n    def __getitem__(self, idx):\n        encoding = self.tokenizer(self.texts[idx], truncation=True, padding='max_length', max_length=self.max_len, return_tensors=\"pt\")\n        return {\n            'input_ids': encoding['input_ids'].flatten(),\n            'attention_mask': encoding['attention_mask'].flatten(),\n            'labels': torch.tensor(self.labels[idx], dtype=torch.long)\n        }\n\n# Data prep\ndf = df[df['Score'].isin([1, 2, 4, 5])]\ndf['label'] = df['Score'].apply(lambda x: 0 if x < 3 else 1)\ntexts = df['Text'].fillna(\"\").tolist()\nlabels = df['label'].tolist()\n\n# Dataset and DataLoader\ndataset = ReviewDataset(texts, labels, tokenizer)\ndataloader = DataLoader(dataset, batch_size=8, shuffle=True)\n\n# Setup\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel = model.to(device)\noptimizer = AdamW(model.parameters(), lr=2e-5)\n\n# Training Loop (3 epochs)\nmodel.train()\nfor epoch in range(3):\n    total_loss = 0\n    print(f\"\\nEpoch {epoch+1}\")\n    for batch in tqdm(dataloader):\n        optimizer.zero_grad()\n        input_ids = batch['input_ids'].to(device)\n        attention_mask = batch['attention_mask'].to(device)\n        labels = batch['labels'].to(device)\n\n        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)\n        loss = outputs.loss\n        loss.backward()\n        optimizer.step()\n        total_loss += loss.item()\n    \n    print(f\"Epoch {epoch+1} Loss: {total_loss:.4f}\")\n\n# Evaluation\nmodel.eval()\nall_preds = []\nall_labels = []\n\nwith torch.no_grad():\n    for batch in dataloader:\n        input_ids = batch['input_ids'].to(device)\n        attention_mask = batch['attention_mask'].to(device)\n        labels = batch['labels'].to(device)\n\n        outputs = model(input_ids=input_ids, attention_mask=attention_mask)\n        preds = torch.argmax(outputs.logits, dim=1)\n\n        all_preds.extend(preds.cpu().numpy())\n        all_labels.extend(labels.cpu().numpy())\n\n# Metrics\nacc = accuracy_score(all_labels, all_preds)\nprec = precision_score(all_labels, all_preds)\nrec = recall_score(all_labels, all_preds)\nf1 = f1_score(all_labels, all_preds)\n\nprint(f\"\\n✅ Evaluation Metrics:\")\nprint(f\"Accuracy:  {acc:.4f}\")\nprint(f\"Precision: {prec:.4f}\")\nprint(f\"Recall:    {rec:.4f}\")\nprint(f\"F1 Score:  {f1:.4f}\")\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:29.281840Z","iopub.execute_input":"2025-05-16T12:10:29.282180Z","iopub.status.idle":"2025-05-16T12:26:43.681425Z","shell.execute_reply.started":"2025-05-16T12:10:29.282159Z","shell.execute_reply":"2025-05-16T12:26:43.680060Z"}},"outputs":[{"name":"stderr","text":"Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized because the shapes did not match:\n- classifier.out_proj.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated\n- classifier.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instantiated\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","output_type":"stream"},{"name":"stdout","text":"\nEpoch 1\n","output_type":"stream"},{"name":"stderr","text":"100%|██████████| 58/58 [05:03<00:00,  5.23s/it]\n","output_type":"stream"},{"name":"stdout","text":"Epoch 1 Loss: 11.8127\n\nEpoch 2\n","output_type":"stream"},{"name":"stderr","text":"100%|██████████| 58/58 [04:50<00:00,  5.02s/it]\n","output_type":"stream"},{"name":"stdout","text":"Epoch 2 Loss: 5.3162\n\nEpoch 3\n","output_type":"stream"},{"name":"stderr","text":"100%|██████████| 58/58 [04:58<00:00,  5.14s/it]\n","output_type":"stream"},{"name":"stdout","text":"Epoch 3 Loss: 4.4434\n\n✅ Evaluation Metrics:\nAccuracy:  0.9719\nPrecision: 0.9692\nRecall:    1.0000\nF1 Score:  0.9844\n","output_type":"stream"}],"execution_count":166},{"cell_type":"code","source":"model.save_pretrained('finetuned-model')\ntokenizer.save_pretrained('finetuned-model')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:48:23.780128Z","iopub.execute_input":"2025-05-16T12:48:23.780518Z","iopub.status.idle":"2025-05-16T12:48:25.872318Z","shell.execute_reply.started":"2025-05-16T12:48:23.780493Z","shell.execute_reply":"2025-05-16T12:48:25.871449Z"}},"outputs":[{"execution_count":193,"output_type":"execute_result","data":{"text/plain":"('finetuned-model/tokenizer_config.json',\n 'finetuned-model/special_tokens_map.json',\n 'finetuned-model/vocab.json',\n 'finetuned-model/merges.txt',\n 'finetuned-model/added_tokens.json',\n 'finetuned-model/tokenizer.json')"},"metadata":{}}],"execution_count":193},{"cell_type":"code","source":"MODEL = f\"cardiffnlp/twitter-roberta-base-sentiment\"\ntokenizer = AutoTokenizer.from_pretrained(MODEL)\nmodel = AutoModelForSequenceClassification.from_pretrained(MODEL)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:28:58.049543Z","iopub.execute_input":"2025-05-16T12:28:58.050393Z","iopub.status.idle":"2025-05-16T12:28:59.009301Z","shell.execute_reply.started":"2025-05-16T12:28:58.050362Z","shell.execute_reply":"2025-05-16T12:28:59.008462Z"}},"outputs":[],"execution_count":168},{"cell_type":"code","source":"# Run for Roberta Model\nencoded_text = tokenizer(example, return_tensors='pt')\noutput = model(**encoded_text)\nscores = output[0][0].detach().numpy()\nscores = softmax(scores)\nscores_dict = {\n    'roberta_neg' : scores[0],\n    'roberta_neu' : scores[1],\n    'roberta_pos' : scores[2]\n}\nprint(scores_dict)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:29:00.589339Z","iopub.execute_input":"2025-05-16T12:29:00.589638Z","iopub.status.idle":"2025-05-16T12:29:00.710322Z","shell.execute_reply.started":"2025-05-16T12:29:00.589618Z","shell.execute_reply":"2025-05-16T12:29:00.709497Z"}},"outputs":[{"name":"stdout","text":"{'roberta_neg': 0.97635514, 'roberta_neu': 0.020687463, 'roberta_pos': 0.0029573694}\n","output_type":"stream"}],"execution_count":169},{"cell_type":"code","source":"def polarity_scores_roberta(example):\n    encoded_text = tokenizer(example, return_tensors='pt')\n    output = model(**encoded_text)\n    scores = output[0][0].detach().numpy()\n    scores = softmax(scores)\n    scores_dict = {\n        'roberta_neg' : scores[0],\n        'roberta_neu' : scores[1],\n        'roberta_pos' : scores[2]\n    }\n    return scores_dict","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:29:03.504227Z","iopub.execute_input":"2025-05-16T12:29:03.504516Z","iopub.status.idle":"2025-05-16T12:29:03.510388Z","shell.execute_reply.started":"2025-05-16T12:29:03.504496Z","shell.execute_reply":"2025-05-16T12:29:03.509286Z"}},"outputs":[],"execution_count":170},{"cell_type":"code","source":"res = {}\nfor i, row in tqdm(df.iterrows(), total=len(df)):\n    try:\n        text = row['Text']\n        myid = row['Id']\n        vader_result = sia.polarity_scores(text)\n        vader_result_rename = {}\n        for key, value in vader_result.items():\n            vader_result_rename[f\"vader_{key}\"] = value\n        roberta_result = polarity_scores_roberta(text)\n        both = {**vader_result_rename, **roberta_result}\n        res[myid] = both\n    except RuntimeError:\n        print(f'Broke for id {myid}')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:29:10.870333Z","iopub.execute_input":"2025-05-16T12:29:10.870648Z","iopub.status.idle":"2025-05-16T12:30:30.988843Z","shell.execute_reply.started":"2025-05-16T12:29:10.870623Z","shell.execute_reply":"2025-05-16T12:30:30.987872Z"}},"outputs":[{"name":"stderr","text":" 17%|█▋        | 77/463 [00:13<00:44,  8.73it/s]","output_type":"stream"},{"name":"stdout","text":"Broke for id 83\n","output_type":"stream"},{"name":"stderr","text":" 37%|███▋      | 172/463 [00:29<00:28, 10.09it/s]","output_type":"stream"},{"name":"stdout","text":"Broke for id 187\n","output_type":"stream"},{"name":"stderr","text":"100%|██████████| 463/463 [01:20<00:00,  5.78it/s]\n","output_type":"stream"}],"execution_count":171},{"cell_type":"code","source":"results_df = pd.DataFrame(res).T\nresults_df = results_df.reset_index().rename(columns={'index': 'Id'})\nresults_df = results_df.merge(df, how='left')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:42.305238Z","iopub.execute_input":"2025-05-16T12:30:42.305576Z","iopub.status.idle":"2025-05-16T12:30:42.331635Z","shell.execute_reply.started":"2025-05-16T12:30:42.305549Z","shell.execute_reply":"2025-05-16T12:30:42.330498Z"}},"outputs":[],"execution_count":172},{"cell_type":"code","source":"from sklearn.metrics import precision_score,recall_score,f1_score","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:43.444222Z","iopub.execute_input":"2025-05-16T12:30:43.444545Z","iopub.status.idle":"2025-05-16T12:30:43.449101Z","shell.execute_reply.started":"2025-05-16T12:30:43.444521Z","shell.execute_reply":"2025-05-16T12:30:43.447916Z"}},"outputs":[],"execution_count":173},{"cell_type":"code","source":"results_df","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:44.704279Z","iopub.execute_input":"2025-05-16T12:30:44.704591Z","iopub.status.idle":"2025-05-16T12:30:44.729367Z","shell.execute_reply.started":"2025-05-16T12:30:44.704568Z","shell.execute_reply":"2025-05-16T12:30:44.728394Z"}},"outputs":[{"execution_count":174,"output_type":"execute_result","data":{"text/plain":"      Id  vader_neg  vader_neu  vader_pos  vader_compound  roberta_neg  \\\n0      1      0.000      0.695      0.305          0.9441     0.009624   \n1      2      0.138      0.862      0.000         -0.5664     0.508986   \n2      3      0.091      0.754      0.155          0.8265     0.003229   \n3      4      0.000      1.000      0.000          0.0000     0.002295   \n4      5      0.000      0.552      0.448          0.9468     0.001635   \n..   ...        ...        ...        ...             ...          ...   \n456  496      0.000      0.554      0.446          0.9725     0.001906   \n457  497      0.059      0.799      0.142          0.7833     0.004415   \n458  498      0.025      0.762      0.212          0.9848     0.006427   \n459  499      0.041      0.904      0.055          0.1280     0.865614   \n460  500      0.000      0.678      0.322          0.9811     0.002440   \n\n     roberta_neu  roberta_pos   ProductId          UserId  \\\n0       0.049980     0.940395  B001E4KFG0  A3SGXH7AUHU8GW   \n1       0.452414     0.038600  B00813GRG4  A1D87F6ZCVE5NK   \n2       0.098067     0.898704  B000LQOCH0   ABXLMWJIXXAIN   \n3       0.090219     0.907486  B000UA0QIQ  A395BORC6FGVXV   \n4       0.010302     0.988063  B006K2ZZ7K  A1UQRSCLF8GW1T   \n..           ...          ...         ...             ...   \n456     0.009862     0.988232  B000G6RYNE   APGAA43E3WPN7   \n457     0.034215     0.961369  B000G6RYNE    ABR7HU5H1KNE   \n458     0.074537     0.919036  B000G6RYNE   AJQD2WWJYOYFQ   \n459     0.119366     0.015020  B000G6RYNE  A16YH487W9ZYO0   \n460     0.011327     0.986233  B000G6RYNE   A83YQC1XOU4CS   \n\n                         ProfileName  HelpfulnessNumerator  \\\n0                         delmartian                     1   \n1                             dll pa                     0   \n2    Natalia Corres \"Natalia Corres\"                     1   \n3                               Karl                     3   \n4      Michael D. Bigham \"M. Wassir\"                     0   \n..                               ...                   ...   \n456                           Darren                     0   \n457                            Keith                     0   \n458                          bubbles                     0   \n459                 Bruce G. Lindsay                     0   \n460                         J. Baker                     0   \n\n     HelpfulnessDenominator  Score        Time  \\\n0                         1      5  1303862400   \n1                         0      1  1346976000   \n2                         1      4  1219017600   \n3                         3      2  1307923200   \n4                         0      5  1350777600   \n..                      ...    ...         ...   \n456                       0      5  1201392000   \n457                       0      5  1196726400   \n458                       0      4  1186617600   \n459                       0      4  1184198400   \n460                       0      5  1183420800   \n\n                             Summary  \\\n0              Good Quality Dog Food   \n1                  Not as Advertised   \n2              \"Delight\" says it all   \n3                     Cough Medicine   \n4                        Great taffy   \n..                               ...   \n456                    amazing chips   \n457                   Best Chip Ever   \n458  Tangy, spicy, and sweet- oh my!   \n459        An indulgence with a bite   \n460                The best I've had   \n\n                                                  Text  label  \n0    I have bought several of the Vitality canned d...      1  \n1    Product arrived labeled as Jumbo Salted Peanut...      0  \n2    This is a confection that has been around a fe...      1  \n3    If you are looking for the secret ingredient i...      0  \n4    Great taffy at a great price.  There was a wid...      1  \n..                                                 ...    ...  \n456  i rarely eat chips but i saw these and tried t...      1  \n457  This is easily the best potato chip that I hav...      1  \n458  Kettle Chips Spicy Thai potato chips have the ...      1  \n459  Okay, I should not eat potato chips, nor shoul...      1  \n460  I don't write very many reviews but I have to ...      1  \n\n[461 rows x 18 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Id</th>\n      <th>vader_neg</th>\n      <th>vader_neu</th>\n      <th>vader_pos</th>\n      <th>vader_compound</th>\n      <th>roberta_neg</th>\n      <th>roberta_neu</th>\n      <th>roberta_pos</th>\n      <th>ProductId</th>\n      <th>UserId</th>\n      <th>ProfileName</th>\n      <th>HelpfulnessNumerator</th>\n      <th>HelpfulnessDenominator</th>\n      <th>Score</th>\n      <th>Time</th>\n      <th>Summary</th>\n      <th>Text</th>\n      <th>label</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>0.000</td>\n      <td>0.695</td>\n      <td>0.305</td>\n      <td>0.9441</td>\n      <td>0.009624</td>\n      <td>0.049980</td>\n      <td>0.940395</td>\n      <td>B001E4KFG0</td>\n      <td>A3SGXH7AUHU8GW</td>\n      <td>delmartian</td>\n      <td>1</td>\n      <td>1</td>\n      <td>5</td>\n      <td>1303862400</td>\n      <td>Good Quality Dog Food</td>\n      <td>I have bought several of the Vitality canned d...</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>0.138</td>\n      <td>0.862</td>\n      <td>0.000</td>\n      <td>-0.5664</td>\n      <td>0.508986</td>\n      <td>0.452414</td>\n      <td>0.038600</td>\n      <td>B00813GRG4</td>\n      <td>A1D87F6ZCVE5NK</td>\n      <td>dll pa</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1346976000</td>\n      <td>Not as Advertised</td>\n      <td>Product arrived labeled as Jumbo Salted Peanut...</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3</td>\n      <td>0.091</td>\n      <td>0.754</td>\n      <td>0.155</td>\n      <td>0.8265</td>\n      <td>0.003229</td>\n      <td>0.098067</td>\n      <td>0.898704</td>\n      <td>B000LQOCH0</td>\n      <td>ABXLMWJIXXAIN</td>\n      <td>Natalia Corres \"Natalia Corres\"</td>\n      <td>1</td>\n      <td>1</td>\n      <td>4</td>\n      <td>1219017600</td>\n      <td>\"Delight\" says it all</td>\n      <td>This is a confection that has been around a fe...</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4</td>\n      <td>0.000</td>\n      <td>1.000</td>\n      <td>0.000</td>\n      <td>0.0000</td>\n      <td>0.002295</td>\n      <td>0.090219</td>\n      <td>0.907486</td>\n      <td>B000UA0QIQ</td>\n      <td>A395BORC6FGVXV</td>\n      <td>Karl</td>\n      <td>3</td>\n      <td>3</td>\n      <td>2</td>\n      <td>1307923200</td>\n      <td>Cough Medicine</td>\n      <td>If you are looking for the secret ingredient i...</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>5</td>\n      <td>0.000</td>\n      <td>0.552</td>\n      <td>0.448</td>\n      <td>0.9468</td>\n      <td>0.001635</td>\n      <td>0.010302</td>\n      <td>0.988063</td>\n      <td>B006K2ZZ7K</td>\n      <td>A1UQRSCLF8GW1T</td>\n      <td>Michael D. Bigham \"M. Wassir\"</td>\n      <td>0</td>\n      <td>0</td>\n      <td>5</td>\n      <td>1350777600</td>\n      <td>Great taffy</td>\n      <td>Great taffy at a great price.  There was a wid...</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>456</th>\n      <td>496</td>\n      <td>0.000</td>\n      <td>0.554</td>\n      <td>0.446</td>\n      <td>0.9725</td>\n      <td>0.001906</td>\n      <td>0.009862</td>\n      <td>0.988232</td>\n      <td>B000G6RYNE</td>\n      <td>APGAA43E3WPN7</td>\n      <td>Darren</td>\n      <td>0</td>\n      <td>0</td>\n      <td>5</td>\n      <td>1201392000</td>\n      <td>amazing chips</td>\n      <td>i rarely eat chips but i saw these and tried t...</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>457</th>\n      <td>497</td>\n      <td>0.059</td>\n      <td>0.799</td>\n      <td>0.142</td>\n      <td>0.7833</td>\n      <td>0.004415</td>\n      <td>0.034215</td>\n      <td>0.961369</td>\n      <td>B000G6RYNE</td>\n      <td>ABR7HU5H1KNE</td>\n      <td>Keith</td>\n      <td>0</td>\n      <td>0</td>\n      <td>5</td>\n      <td>1196726400</td>\n      <td>Best Chip Ever</td>\n      <td>This is easily the best potato chip that I hav...</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>458</th>\n      <td>498</td>\n      <td>0.025</td>\n      <td>0.762</td>\n      <td>0.212</td>\n      <td>0.9848</td>\n      <td>0.006427</td>\n      <td>0.074537</td>\n      <td>0.919036</td>\n      <td>B000G6RYNE</td>\n      <td>AJQD2WWJYOYFQ</td>\n      <td>bubbles</td>\n      <td>0</td>\n      <td>0</td>\n      <td>4</td>\n      <td>1186617600</td>\n      <td>Tangy, spicy, and sweet- oh my!</td>\n      <td>Kettle Chips Spicy Thai potato chips have the ...</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>459</th>\n      <td>499</td>\n      <td>0.041</td>\n      <td>0.904</td>\n      <td>0.055</td>\n      <td>0.1280</td>\n      <td>0.865614</td>\n      <td>0.119366</td>\n      <td>0.015020</td>\n      <td>B000G6RYNE</td>\n      <td>A16YH487W9ZYO0</td>\n      <td>Bruce G. Lindsay</td>\n      <td>0</td>\n      <td>0</td>\n      <td>4</td>\n      <td>1184198400</td>\n      <td>An indulgence with a bite</td>\n      <td>Okay, I should not eat potato chips, nor shoul...</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>460</th>\n      <td>500</td>\n      <td>0.000</td>\n      <td>0.678</td>\n      <td>0.322</td>\n      <td>0.9811</td>\n      <td>0.002440</td>\n      <td>0.011327</td>\n      <td>0.986233</td>\n      <td>B000G6RYNE</td>\n      <td>A83YQC1XOU4CS</td>\n      <td>J. Baker</td>\n      <td>0</td>\n      <td>0</td>\n      <td>5</td>\n      <td>1183420800</td>\n      <td>The best I've had</td>\n      <td>I don't write very many reviews but I have to ...</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n<p>461 rows × 18 columns</p>\n</div>"},"metadata":{}}],"execution_count":174},{"cell_type":"code","source":"results_df.columns","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:49.304348Z","iopub.execute_input":"2025-05-16T12:30:49.304698Z","iopub.status.idle":"2025-05-16T12:30:49.311838Z","shell.execute_reply.started":"2025-05-16T12:30:49.304673Z","shell.execute_reply":"2025-05-16T12:30:49.310838Z"}},"outputs":[{"execution_count":175,"output_type":"execute_result","data":{"text/plain":"Index(['Id', 'vader_neg', 'vader_neu', 'vader_pos', 'vader_compound',\n       'roberta_neg', 'roberta_neu', 'roberta_pos', 'ProductId', 'UserId',\n       'ProfileName', 'HelpfulnessNumerator', 'HelpfulnessDenominator',\n       'Score', 'Time', 'Summary', 'Text', 'label'],\n      dtype='object')"},"metadata":{}}],"execution_count":175},{"cell_type":"code","source":"results_df.query('Score == 1') \\\n    .sort_values('roberta_pos', ascending=False)['Text'].values[0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:50.850082Z","iopub.execute_input":"2025-05-16T12:30:50.850454Z","iopub.status.idle":"2025-05-16T12:30:50.865981Z","shell.execute_reply.started":"2025-05-16T12:30:50.850425Z","shell.execute_reply":"2025-05-16T12:30:50.865045Z"}},"outputs":[{"execution_count":176,"output_type":"execute_result","data":{"text/plain":"'I felt energized within five minutes, but it lasted for about 45 minutes. I paid $3.99 for this drink. I could have just drunk a cup of coffee and saved my money.'"},"metadata":{}}],"execution_count":176},{"cell_type":"code","source":"results_df.query('Score == 1') \\\n    .sort_values('vader_pos', ascending=False)['Text'].values[0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:52.729686Z","iopub.execute_input":"2025-05-16T12:30:52.730052Z","iopub.status.idle":"2025-05-16T12:30:52.743034Z","shell.execute_reply.started":"2025-05-16T12:30:52.730029Z","shell.execute_reply":"2025-05-16T12:30:52.741997Z"}},"outputs":[{"execution_count":177,"output_type":"execute_result","data":{"text/plain":"'So we cancelled the order.  It was cancelled without any problem.  That is a positive note...'"},"metadata":{}}],"execution_count":177},{"cell_type":"code","source":"results_df.query('Score == 5') \\\n    .sort_values('roberta_neg', ascending=False)['Text'].values[0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:54.004309Z","iopub.execute_input":"2025-05-16T12:30:54.004653Z","iopub.status.idle":"2025-05-16T12:30:54.017629Z","shell.execute_reply.started":"2025-05-16T12:30:54.004628Z","shell.execute_reply":"2025-05-16T12:30:54.016725Z"}},"outputs":[{"execution_count":178,"output_type":"execute_result","data":{"text/plain":"'this was sooooo deliscious but too bad i ate em too fast and gained 2 pds! my fault'"},"metadata":{}}],"execution_count":178},{"cell_type":"code","source":"results_df.query('Score == 5') \\\n    .sort_values('vader_neg', ascending=False)['Text'].values[0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:56.659434Z","iopub.execute_input":"2025-05-16T12:30:56.659812Z","iopub.status.idle":"2025-05-16T12:30:56.672111Z","shell.execute_reply.started":"2025-05-16T12:30:56.659763Z","shell.execute_reply":"2025-05-16T12:30:56.671030Z"}},"outputs":[{"execution_count":179,"output_type":"execute_result","data":{"text/plain":"'this was sooooo deliscious but too bad i ate em too fast and gained 2 pds! my fault'"},"metadata":{}}],"execution_count":179},{"cell_type":"code","source":"from transformers import pipeline\n\nsent_pipeline = pipeline(\"sentiment-analysis\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:01.859224Z","iopub.execute_input":"2025-05-16T12:31:01.859562Z","iopub.status.idle":"2025-05-16T12:31:02.059059Z","shell.execute_reply.started":"2025-05-16T12:31:01.859529Z","shell.execute_reply":"2025-05-16T12:31:02.057850Z"}},"outputs":[{"name":"stderr","text":"No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).\nUsing a pipeline without specifying a model name and revision in production is not recommended.\nDevice set to use cpu\n","output_type":"stream"}],"execution_count":180},{"cell_type":"code","source":"sent_pipeline('I Love sentiment analysis!')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:07.224530Z","iopub.execute_input":"2025-05-16T12:31:07.224936Z","iopub.status.idle":"2025-05-16T12:31:07.323207Z","shell.execute_reply.started":"2025-05-16T12:31:07.224910Z","shell.execute_reply":"2025-05-16T12:31:07.322233Z"}},"outputs":[{"execution_count":181,"output_type":"execute_result","data":{"text/plain":"[{'label': 'POSITIVE', 'score': 0.9997853636741638}]"},"metadata":{}}],"execution_count":181},{"cell_type":"code","source":"sent_pipeline('I hate sentiment analysis!')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:09.339165Z","iopub.execute_input":"2025-05-16T12:31:09.339535Z","iopub.status.idle":"2025-05-16T12:31:09.374031Z","shell.execute_reply.started":"2025-05-16T12:31:09.339501Z","shell.execute_reply":"2025-05-16T12:31:09.373182Z"}},"outputs":[{"execution_count":182,"output_type":"execute_result","data":{"text/plain":"[{'label': 'NEGATIVE', 'score': 0.9992958307266235}]"},"metadata":{}}],"execution_count":182},{"cell_type":"code","source":"sent_pipeline('Make sure to like and subscribe!')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:12.169142Z","iopub.execute_input":"2025-05-16T12:31:12.169450Z","iopub.status.idle":"2025-05-16T12:31:12.212549Z","shell.execute_reply.started":"2025-05-16T12:31:12.169427Z","shell.execute_reply":"2025-05-16T12:31:12.211123Z"}},"outputs":[{"execution_count":183,"output_type":"execute_result","data":{"text/plain":"[{'label': 'POSITIVE', 'score': 0.9991742968559265}]"},"metadata":{}}],"execution_count":183},{"cell_type":"code","source":"sent_pipeline('Make sure to not like and subscribe!')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:19.564328Z","iopub.execute_input":"2025-05-16T12:31:19.564614Z","iopub.status.idle":"2025-05-16T12:31:19.626677Z","shell.execute_reply.started":"2025-05-16T12:31:19.564591Z","shell.execute_reply":"2025-05-16T12:31:19.625882Z"}},"outputs":[{"execution_count":185,"output_type":"execute_result","data":{"text/plain":"[{'label': 'NEGATIVE', 'score': 0.8641592264175415}]"},"metadata":{}}],"execution_count":185},{"cell_type":"code","source":"sent_pipeline('booo')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:22.404560Z","iopub.execute_input":"2025-05-16T12:31:22.404972Z","iopub.status.idle":"2025-05-16T12:31:22.436025Z","shell.execute_reply.started":"2025-05-16T12:31:22.404945Z","shell.execute_reply":"2025-05-16T12:31:22.434997Z"}},"outputs":[{"execution_count":186,"output_type":"execute_result","data":{"text/plain":"[{'label': 'NEGATIVE', 'score': 0.9936267137527466}]"},"metadata":{}}],"execution_count":186},{"cell_type":"code","source":"sent_pipeline('good')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:23.698936Z","iopub.execute_input":"2025-05-16T12:31:23.699242Z","iopub.status.idle":"2025-05-16T12:31:23.730621Z","shell.execute_reply.started":"2025-05-16T12:31:23.699220Z","shell.execute_reply":"2025-05-16T12:31:23.729815Z"}},"outputs":[{"execution_count":187,"output_type":"execute_result","data":{"text/plain":"[{'label': 'POSITIVE', 'score': 0.9998161196708679}]"},"metadata":{}}],"execution_count":187},{"cell_type":"code","source":"sent_pipeline('bad')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:26.589571Z","iopub.execute_input":"2025-05-16T12:31:26.589916Z","iopub.status.idle":"2025-05-16T12:31:26.620602Z","shell.execute_reply.started":"2025-05-16T12:31:26.589890Z","shell.execute_reply":"2025-05-16T12:31:26.619886Z"}},"outputs":[{"execution_count":188,"output_type":"execute_result","data":{"text/plain":"[{'label': 'NEGATIVE', 'score': 0.999782383441925}]"},"metadata":{}}],"execution_count":188},{"cell_type":"code","source":"sent_pipeline('i like it')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:36.333938Z","iopub.execute_input":"2025-05-16T12:31:36.334229Z","iopub.status.idle":"2025-05-16T12:31:36.366534Z","shell.execute_reply.started":"2025-05-16T12:31:36.334209Z","shell.execute_reply":"2025-05-16T12:31:36.365299Z"}},"outputs":[{"execution_count":190,"output_type":"execute_result","data":{"text/plain":"[{'label': 'POSITIVE', 'score': 0.9998593330383301}]"},"metadata":{}}],"execution_count":190},{"cell_type":"code","source":"quantized_model = torch.quantization.quantize_dynamic(\n    model, {torch.nn.Linear}, dtype=torch.qint8\n)\nprint(\"Quantized model ready\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:45.384499Z","iopub.execute_input":"2025-05-16T12:31:45.384869Z","iopub.status.idle":"2025-05-16T12:31:46.332163Z","shell.execute_reply.started":"2025-05-16T12:31:45.384825Z","shell.execute_reply":"2025-05-16T12:31:46.331197Z"}},"outputs":[{"name":"stdout","text":"Quantized model ready\n","output_type":"stream"}],"execution_count":191},{"cell_type":"code","source":"torch.save(quantized_model.state_dict(), \"sentient_model.pt\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:47.999286Z","iopub.execute_input":"2025-05-16T12:31:47.999611Z","iopub.status.idle":"2025-05-16T12:31:49.408218Z","shell.execute_reply.started":"2025-05-16T12:31:47.999588Z","shell.execute_reply":"2025-05-16T12:31:49.407236Z"}},"outputs":[],"execution_count":192},{"cell_type":"code","source":"finetuned_model = AutoModelForSequenceClassification.from_pretrained('/kaggle/working/finetuned-model')\ntokenizer = AutoTokenizer.from_pretrained('/kaggle/working/finetuned-model')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:54:13.194195Z","iopub.execute_input":"2025-05-16T12:54:13.194518Z","iopub.status.idle":"2025-05-16T12:54:13.558618Z","shell.execute_reply.started":"2025-05-16T12:54:13.194495Z","shell.execute_reply":"2025-05-16T12:54:13.557691Z"}},"outputs":[],"execution_count":205},{"cell_type":"code","source":"device = 'cuda' if torch.cuda.is_available() else 'cpu'","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:54:15.548687Z","iopub.execute_input":"2025-05-16T12:54:15.549599Z","iopub.status.idle":"2025-05-16T12:54:15.553887Z","shell.execute_reply.started":"2025-05-16T12:54:15.549565Z","shell.execute_reply":"2025-05-16T12:54:15.552866Z"}},"outputs":[],"execution_count":206},{"cell_type":"code","source":"quantize_model = finetuned_model.to(dtype=torch.float16, device=device)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:54:17.678867Z","iopub.execute_input":"2025-05-16T12:54:17.679186Z","iopub.status.idle":"2025-05-16T12:54:17.900814Z","shell.execute_reply.started":"2025-05-16T12:54:17.679164Z","shell.execute_reply":"2025-05-16T12:54:17.900013Z"}},"outputs":[],"execution_count":207},{"cell_type":"code","source":"quantize_model.save_pretrained('quantized-model')\ntokenizer.save_pretrained('quantized-model')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:54:29.124181Z","iopub.execute_input":"2025-05-16T12:54:29.124534Z","iopub.status.idle":"2025-05-16T12:54:30.159407Z","shell.execute_reply.started":"2025-05-16T12:54:29.124509Z","shell.execute_reply":"2025-05-16T12:54:30.158502Z"}},"outputs":[{"execution_count":209,"output_type":"execute_result","data":{"text/plain":"('quantized-model/tokenizer_config.json',\n 'quantized-model/special_tokens_map.json',\n 'quantized-model/vocab.json',\n 'quantized-model/merges.txt',\n 'quantized-model/added_tokens.json',\n 'quantized-model/tokenizer.json')"},"metadata":{}}],"execution_count":209},{"cell_type":"code","source":"import torch.nn.functional as F","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:58:03.703528Z","iopub.execute_input":"2025-05-16T12:58:03.703881Z","iopub.status.idle":"2025-05-16T12:58:03.708330Z","shell.execute_reply.started":"2025-05-16T12:58:03.703854Z","shell.execute_reply":"2025-05-16T12:58:03.707228Z"}},"outputs":[],"execution_count":212},{"cell_type":"code","source":"# Predict\ndef predict(text):\n    inputs = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True, max_length=128)\n    with torch.no_grad():\n        outputs = quantize_model(**inputs)\n        probs = F.softmax(outputs.logits, dim=1)\n        pred = torch.argmax(probs, dim=1).item()\n        label_map = {0: \"Negative\", 1: \"Neutral\", 2: \"Positive\"}\n    return f\"Sentiment: {label_map[pred]} (Confidence: {probs[0][pred]:.2f})\"","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"# Test predictions\nprint(\"\\nTest Predictions:\")\nprint(predict(\"the product quality is just so so\"))","metadata":{"trusted":true},"outputs":[],"execution_count":null}]}