{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.11.11","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":2157,"sourceType":"datasetVersion","datasetId":18}],"dockerImageVersionId":31040,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\nplt.style.use('ggplot')\n\nimport nltk","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:09:53.762318Z","iopub.execute_input":"2025-05-16T12:09:53.762680Z","iopub.status.idle":"2025-05-16T12:09:53.768747Z","shell.execute_reply.started":"2025-05-16T12:09:53.762652Z","shell.execute_reply":"2025-05-16T12:09:53.767465Z"}},"outputs":[],"execution_count":142},{"cell_type":"code","source":"# Read in data\ndf = pd.read_csv('/kaggle/input/amazon-fine-food-reviews/Reviews.csv')\nprint(df.shape)\ndf = df.head(500)\nprint(df.shape)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:09:55.047299Z","iopub.execute_input":"2025-05-16T12:09:55.047642Z","iopub.status.idle":"2025-05-16T12:09:59.654257Z","shell.execute_reply.started":"2025-05-16T12:09:55.047615Z","shell.execute_reply":"2025-05-16T12:09:59.653504Z"}},"outputs":[{"name":"stdout","text":"(568454, 10)\n(500, 10)\n","output_type":"stream"}],"execution_count":143},{"cell_type":"code","source":"ax = df['Score'].value_counts()\nax","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:03.031026Z","iopub.execute_input":"2025-05-16T12:10:03.031363Z","iopub.status.idle":"2025-05-16T12:10:03.038529Z","shell.execute_reply.started":"2025-05-16T12:10:03.031338Z","shell.execute_reply":"2025-05-16T12:10:03.037726Z"}},"outputs":[{"execution_count":147,"output_type":"execute_result","data":{"text/plain":"Score\n5 339\n4 70\n3 37\n1 36\n2 18\nName: count, dtype: int64"},"metadata":{}}],"execution_count":147},{"cell_type":"code","source":"#BAsic NLTK\nexample = df['Text'][50]\nprint(example)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:03.305406Z","iopub.execute_input":"2025-05-16T12:10:03.305964Z","iopub.status.idle":"2025-05-16T12:10:03.312420Z","shell.execute_reply.started":"2025-05-16T12:10:03.305931Z","shell.execute_reply":"2025-05-16T12:10:03.311186Z"}},"outputs":[{"name":"stdout","text":"This oatmeal is not good. Its mushy, soft, I don't like it. Quaker Oats is the way to go.\n","output_type":"stream"}],"execution_count":148},{"cell_type":"code","source":"df.head()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:03.519750Z","iopub.execute_input":"2025-05-16T12:10:03.520532Z","iopub.status.idle":"2025-05-16T12:10:03.532963Z","shell.execute_reply.started":"2025-05-16T12:10:03.520503Z","shell.execute_reply":"2025-05-16T12:10:03.531976Z"}},"outputs":[{"execution_count":149,"output_type":"execute_result","data":{"text/plain":" Id ProductId UserId ProfileName \\\n0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian \n1 2 B00813GRG4 A1D87F6ZCVE5NK dll pa \n2 3 B000LQOCH0 ABXLMWJIXXAIN Natalia Corres \"Natalia Corres\" \n3 4 B000UA0QIQ A395BORC6FGVXV Karl \n4 5 B006K2ZZ7K A1UQRSCLF8GW1T Michael D. Bigham \"M. Wassir\" \n\n HelpfulnessNumerator HelpfulnessDenominator Score Time \\\n0 1 1 5 1303862400 \n1 0 0 1 1346976000 \n2 1 1 4 1219017600 \n3 3 3 2 1307923200 \n4 0 0 5 1350777600 \n\n Summary Text \n0 Good Quality Dog Food I have bought several of the Vitality canned d... \n1 Not as Advertised Product arrived labeled as Jumbo Salted Peanut... \n2 \"Delight\" says it all This is a confection that has been around a fe... \n3 Cough Medicine If you are looking for the secret ingredient i... \n4 Great taffy Great taffy at a great price. There was a wid... ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
IdProductIdUserIdProfileNameHelpfulnessNumeratorHelpfulnessDenominatorScoreTimeSummaryText
01B001E4KFG0A3SGXH7AUHU8GWdelmartian1151303862400Good Quality Dog FoodI have bought several of the Vitality canned d...
12B00813GRG4A1D87F6ZCVE5NKdll pa0011346976000Not as AdvertisedProduct arrived labeled as Jumbo Salted Peanut...
23B000LQOCH0ABXLMWJIXXAINNatalia Corres \"Natalia Corres\"1141219017600\"Delight\" says it allThis is a confection that has been around a fe...
34B000UA0QIQA395BORC6FGVXVKarl3321307923200Cough MedicineIf you are looking for the secret ingredient i...
45B006K2ZZ7KA1UQRSCLF8GW1TMichael D. Bigham \"M. Wassir\"0051350777600Great taffyGreat taffy at a great price. There was a wid...
\n
"},"metadata":{}}],"execution_count":149},{"cell_type":"code","source":"tokens = nltk.word_tokenize(example)\ntokens[:10]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:03.705245Z","iopub.execute_input":"2025-05-16T12:10:03.705540Z","iopub.status.idle":"2025-05-16T12:10:03.712449Z","shell.execute_reply.started":"2025-05-16T12:10:03.705522Z","shell.execute_reply":"2025-05-16T12:10:03.711521Z"}},"outputs":[{"execution_count":150,"output_type":"execute_result","data":{"text/plain":"['This', 'oatmeal', 'is', 'not', 'good', '.', 'Its', 'mushy', ',', 'soft']"},"metadata":{}}],"execution_count":150},{"cell_type":"code","source":"nltk.download('averaged_perceptron_tagger_eng')\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:03.894073Z","iopub.execute_input":"2025-05-16T12:10:03.894379Z","iopub.status.idle":"2025-05-16T12:10:04.033197Z","shell.execute_reply.started":"2025-05-16T12:10:03.894360Z","shell.execute_reply":"2025-05-16T12:10:04.032203Z"}},"outputs":[{"name":"stderr","text":"[nltk_data] Downloading package averaged_perceptron_tagger_eng to\n[nltk_data] /usr/share/nltk_data...\n[nltk_data] Package averaged_perceptron_tagger_eng is already up-to-\n[nltk_data] date!\n","output_type":"stream"},{"execution_count":151,"output_type":"execute_result","data":{"text/plain":"True"},"metadata":{}}],"execution_count":151},{"cell_type":"code","source":"tagged = nltk.pos_tag(tokens)\ntagged[:10]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:04.074450Z","iopub.execute_input":"2025-05-16T12:10:04.074777Z","iopub.status.idle":"2025-05-16T12:10:04.082279Z","shell.execute_reply.started":"2025-05-16T12:10:04.074750Z","shell.execute_reply":"2025-05-16T12:10:04.081450Z"}},"outputs":[{"execution_count":152,"output_type":"execute_result","data":{"text/plain":"[('This', 'DT'),\n ('oatmeal', 'NN'),\n ('is', 'VBZ'),\n ('not', 'RB'),\n ('good', 'JJ'),\n ('.', '.'),\n ('Its', 'PRP$'),\n ('mushy', 'NN'),\n (',', ','),\n ('soft', 'JJ')]"},"metadata":{}}],"execution_count":152},{"cell_type":"code","source":"nltk.download('maxent_ne_chunker_tab')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:04.239437Z","iopub.execute_input":"2025-05-16T12:10:04.239741Z","iopub.status.idle":"2025-05-16T12:10:04.260161Z","shell.execute_reply.started":"2025-05-16T12:10:04.239720Z","shell.execute_reply":"2025-05-16T12:10:04.259463Z"}},"outputs":[{"name":"stderr","text":"[nltk_data] Downloading package maxent_ne_chunker_tab to\n[nltk_data] /usr/share/nltk_data...\n[nltk_data] Package maxent_ne_chunker_tab is already up-to-date!\n","output_type":"stream"},{"execution_count":153,"output_type":"execute_result","data":{"text/plain":"True"},"metadata":{}}],"execution_count":153},{"cell_type":"code","source":"entities = nltk.chunk.ne_chunk(tagged)\nentities.pprint()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:04.435074Z","iopub.execute_input":"2025-05-16T12:10:04.435391Z","iopub.status.idle":"2025-05-16T12:10:04.825979Z","shell.execute_reply.started":"2025-05-16T12:10:04.435368Z","shell.execute_reply":"2025-05-16T12:10:04.824862Z"}},"outputs":[{"name":"stdout","text":"(S\n This/DT\n oatmeal/NN\n is/VBZ\n not/RB\n good/JJ\n ./.\n Its/PRP$\n mushy/NN\n ,/,\n soft/JJ\n ,/,\n I/PRP\n do/VBP\n n't/RB\n like/VB\n it/PRP\n ./.\n (ORGANIZATION Quaker/NNP Oats/NNPS)\n is/VBZ\n the/DT\n way/NN\n to/TO\n go/VB\n ./.)\n","output_type":"stream"}],"execution_count":154},{"cell_type":"markdown","source":"**VADER Seniment Scoring**","metadata":{}},{"cell_type":"code","source":"#from nltk.sentiment import SentimentIntensityAnalyzer\n#from tqdm.notebook import tqdm\n\n#sia = SentimentIntensityAnalyzer()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:04.827443Z","iopub.execute_input":"2025-05-16T12:10:04.827812Z","iopub.status.idle":"2025-05-16T12:10:04.832293Z","shell.execute_reply.started":"2025-05-16T12:10:04.827756Z","shell.execute_reply":"2025-05-16T12:10:04.831116Z"}},"outputs":[],"execution_count":155},{"cell_type":"code","source":"#sia.polarity_scores('I am so happy!')\n#neg- Negative\n#neu- neutral\n#pos- Positivew","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:04.991592Z","iopub.execute_input":"2025-05-16T12:10:04.991954Z","iopub.status.idle":"2025-05-16T12:10:04.996113Z","shell.execute_reply.started":"2025-05-16T12:10:04.991930Z","shell.execute_reply":"2025-05-16T12:10:04.995102Z"}},"outputs":[],"execution_count":156},{"cell_type":"code","source":"#sia.polarity_scores('This is the worst thing ever.')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:05.218122Z","iopub.execute_input":"2025-05-16T12:10:05.218417Z","iopub.status.idle":"2025-05-16T12:10:05.222713Z","shell.execute_reply.started":"2025-05-16T12:10:05.218398Z","shell.execute_reply":"2025-05-16T12:10:05.221903Z"}},"outputs":[],"execution_count":157},{"cell_type":"code","source":"#sia.polarity_scores(example)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:05.440102Z","iopub.execute_input":"2025-05-16T12:10:05.440412Z","iopub.status.idle":"2025-05-16T12:10:05.444950Z","shell.execute_reply.started":"2025-05-16T12:10:05.440388Z","shell.execute_reply":"2025-05-16T12:10:05.443762Z"}},"outputs":[],"execution_count":158},{"cell_type":"code","source":"# Run the polarity score on the entire dataset\n#res = {}\n#for i, row in tqdm(df.iterrows(), total=len(df)):\n #text = row['Text']\n #myid = row['Id']\n #res[myid] = sia.polarity_scores(text)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:08.869615Z","iopub.execute_input":"2025-05-16T12:10:08.870022Z","iopub.status.idle":"2025-05-16T12:10:08.874501Z","shell.execute_reply.started":"2025-05-16T12:10:08.869993Z","shell.execute_reply":"2025-05-16T12:10:08.873509Z"}},"outputs":[],"execution_count":159},{"cell_type":"code","source":"#pd.DataFrame(res)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:09.418426Z","iopub.execute_input":"2025-05-16T12:10:09.419198Z","iopub.status.idle":"2025-05-16T12:10:09.422778Z","shell.execute_reply.started":"2025-05-16T12:10:09.419167Z","shell.execute_reply":"2025-05-16T12:10:09.421882Z"}},"outputs":[],"execution_count":160},{"cell_type":"code","source":"#vaders = pd.DataFrame(res).T\n#vaders = vaders.reset_index().rename(columns={'index': 'Id'})\n#vaders = vaders.merge(df, how='left')\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:18.701668Z","iopub.execute_input":"2025-05-16T12:10:18.702024Z","iopub.status.idle":"2025-05-16T12:10:18.705719Z","shell.execute_reply.started":"2025-05-16T12:10:18.702001Z","shell.execute_reply":"2025-05-16T12:10:18.704879Z"}},"outputs":[],"execution_count":161},{"cell_type":"code","source":"#vaders.head()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:19.037033Z","iopub.execute_input":"2025-05-16T12:10:19.037333Z","iopub.status.idle":"2025-05-16T12:10:19.041663Z","shell.execute_reply.started":"2025-05-16T12:10:19.037313Z","shell.execute_reply":"2025-05-16T12:10:19.040761Z"}},"outputs":[],"execution_count":162},{"cell_type":"markdown","source":"**Plot VADER results**","metadata":{}},{"cell_type":"code","source":"#ax = sns.barplot(data=vaders, x='Score', y='compound')\n#ax.set_title('Compund Score by Amazon Star Review')\n#plt.show()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:20.819122Z","iopub.execute_input":"2025-05-16T12:10:20.819415Z","iopub.status.idle":"2025-05-16T12:10:20.823941Z","shell.execute_reply.started":"2025-05-16T12:10:20.819396Z","shell.execute_reply":"2025-05-16T12:10:20.822415Z"}},"outputs":[],"execution_count":163},{"cell_type":"code","source":"#fig, axs = plt.subplots(1, 3, figsize=(12, 3))\n#sns.barplot(data=vaders, x='Score', y='pos', ax=axs[0])\n#sns.barplot(data=vaders, x='Score', y='neu', ax=axs[1])\n#sns.barplot(data=vaders, x='Score', y='neg', ax=axs[2])\n#axs[0].set_title('Positive')\n#axs[1].set_title('Neutral')\n#axs[2].set_title('Negative')\n#plt.tight_layout()\n#plt.show()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:21.203904Z","iopub.execute_input":"2025-05-16T12:10:21.204193Z","iopub.status.idle":"2025-05-16T12:10:21.208430Z","shell.execute_reply.started":"2025-05-16T12:10:21.204175Z","shell.execute_reply":"2025-05-16T12:10:21.207452Z"}},"outputs":[],"execution_count":164},{"cell_type":"markdown","source":"**Step 3. Roberta Pretrained Model**","metadata":{}},{"cell_type":"code","source":"from transformers import AutoTokenizer\nfrom transformers import AutoModelForSequenceClassification\nfrom scipy.special import softmax","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:25.045296Z","iopub.execute_input":"2025-05-16T12:10:25.045588Z","iopub.status.idle":"2025-05-16T12:10:25.050258Z","shell.execute_reply.started":"2025-05-16T12:10:25.045568Z","shell.execute_reply":"2025-05-16T12:10:25.049400Z"}},"outputs":[],"execution_count":165},{"cell_type":"code","source":"import torch\nfrom torch.utils.data import Dataset, DataLoader\nfrom torch.optim import AdamW\nfrom transformers import AutoTokenizer, AutoModelForSequenceClassification\nfrom sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\nimport pandas as pd\nfrom tqdm import tqdm\n\n# Model and Tokenizer\nMODEL = \"cardiffnlp/twitter-roberta-base-sentiment\"\ntokenizer = AutoTokenizer.from_pretrained(MODEL)\nmodel = AutoModelForSequenceClassification.from_pretrained(\n MODEL,\n num_labels=2,\n ignore_mismatched_sizes=True # ✅ Fix size mismatch\n)\n\n# Custom Dataset\nclass ReviewDataset(Dataset):\n def __init__(self, texts, labels, tokenizer, max_len=128):\n self.texts = texts\n self.labels = labels\n self.tokenizer = tokenizer\n self.max_len = max_len\n\n def __len__(self):\n return len(self.texts)\n\n def __getitem__(self, idx):\n encoding = self.tokenizer(self.texts[idx], truncation=True, padding='max_length', max_length=self.max_len, return_tensors=\"pt\")\n return {\n 'input_ids': encoding['input_ids'].flatten(),\n 'attention_mask': encoding['attention_mask'].flatten(),\n 'labels': torch.tensor(self.labels[idx], dtype=torch.long)\n }\n\n# Data prep\ndf = df[df['Score'].isin([1, 2, 4, 5])]\ndf['label'] = df['Score'].apply(lambda x: 0 if x < 3 else 1)\ntexts = df['Text'].fillna(\"\").tolist()\nlabels = df['label'].tolist()\n\n# Dataset and DataLoader\ndataset = ReviewDataset(texts, labels, tokenizer)\ndataloader = DataLoader(dataset, batch_size=8, shuffle=True)\n\n# Setup\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel = model.to(device)\noptimizer = AdamW(model.parameters(), lr=2e-5)\n\n# Training Loop (3 epochs)\nmodel.train()\nfor epoch in range(3):\n total_loss = 0\n print(f\"\\nEpoch {epoch+1}\")\n for batch in tqdm(dataloader):\n optimizer.zero_grad()\n input_ids = batch['input_ids'].to(device)\n attention_mask = batch['attention_mask'].to(device)\n labels = batch['labels'].to(device)\n\n outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)\n loss = outputs.loss\n loss.backward()\n optimizer.step()\n total_loss += loss.item()\n \n print(f\"Epoch {epoch+1} Loss: {total_loss:.4f}\")\n\n# Evaluation\nmodel.eval()\nall_preds = []\nall_labels = []\n\nwith torch.no_grad():\n for batch in dataloader:\n input_ids = batch['input_ids'].to(device)\n attention_mask = batch['attention_mask'].to(device)\n labels = batch['labels'].to(device)\n\n outputs = model(input_ids=input_ids, attention_mask=attention_mask)\n preds = torch.argmax(outputs.logits, dim=1)\n\n all_preds.extend(preds.cpu().numpy())\n all_labels.extend(labels.cpu().numpy())\n\n# Metrics\nacc = accuracy_score(all_labels, all_preds)\nprec = precision_score(all_labels, all_preds)\nrec = recall_score(all_labels, all_preds)\nf1 = f1_score(all_labels, all_preds)\n\nprint(f\"\\n✅ Evaluation Metrics:\")\nprint(f\"Accuracy: {acc:.4f}\")\nprint(f\"Precision: {prec:.4f}\")\nprint(f\"Recall: {rec:.4f}\")\nprint(f\"F1 Score: {f1:.4f}\")\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:10:29.281840Z","iopub.execute_input":"2025-05-16T12:10:29.282180Z","iopub.status.idle":"2025-05-16T12:26:43.681425Z","shell.execute_reply.started":"2025-05-16T12:10:29.282159Z","shell.execute_reply":"2025-05-16T12:26:43.680060Z"}},"outputs":[{"name":"stderr","text":"Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized because the shapes did not match:\n- classifier.out_proj.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated\n- classifier.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instantiated\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","output_type":"stream"},{"name":"stdout","text":"\nEpoch 1\n","output_type":"stream"},{"name":"stderr","text":"100%|██████████| 58/58 [05:03<00:00, 5.23s/it]\n","output_type":"stream"},{"name":"stdout","text":"Epoch 1 Loss: 11.8127\n\nEpoch 2\n","output_type":"stream"},{"name":"stderr","text":"100%|██████████| 58/58 [04:50<00:00, 5.02s/it]\n","output_type":"stream"},{"name":"stdout","text":"Epoch 2 Loss: 5.3162\n\nEpoch 3\n","output_type":"stream"},{"name":"stderr","text":"100%|██████████| 58/58 [04:58<00:00, 5.14s/it]\n","output_type":"stream"},{"name":"stdout","text":"Epoch 3 Loss: 4.4434\n\n✅ Evaluation Metrics:\nAccuracy: 0.9719\nPrecision: 0.9692\nRecall: 1.0000\nF1 Score: 0.9844\n","output_type":"stream"}],"execution_count":166},{"cell_type":"code","source":"model.save_pretrained('finetuned-model')\ntokenizer.save_pretrained('finetuned-model')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:48:23.780128Z","iopub.execute_input":"2025-05-16T12:48:23.780518Z","iopub.status.idle":"2025-05-16T12:48:25.872318Z","shell.execute_reply.started":"2025-05-16T12:48:23.780493Z","shell.execute_reply":"2025-05-16T12:48:25.871449Z"}},"outputs":[{"execution_count":193,"output_type":"execute_result","data":{"text/plain":"('finetuned-model/tokenizer_config.json',\n 'finetuned-model/special_tokens_map.json',\n 'finetuned-model/vocab.json',\n 'finetuned-model/merges.txt',\n 'finetuned-model/added_tokens.json',\n 'finetuned-model/tokenizer.json')"},"metadata":{}}],"execution_count":193},{"cell_type":"code","source":"MODEL = f\"cardiffnlp/twitter-roberta-base-sentiment\"\ntokenizer = AutoTokenizer.from_pretrained(MODEL)\nmodel = AutoModelForSequenceClassification.from_pretrained(MODEL)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:28:58.049543Z","iopub.execute_input":"2025-05-16T12:28:58.050393Z","iopub.status.idle":"2025-05-16T12:28:59.009301Z","shell.execute_reply.started":"2025-05-16T12:28:58.050362Z","shell.execute_reply":"2025-05-16T12:28:59.008462Z"}},"outputs":[],"execution_count":168},{"cell_type":"code","source":"# Run for Roberta Model\nencoded_text = tokenizer(example, return_tensors='pt')\noutput = model(**encoded_text)\nscores = output[0][0].detach().numpy()\nscores = softmax(scores)\nscores_dict = {\n 'roberta_neg' : scores[0],\n 'roberta_neu' : scores[1],\n 'roberta_pos' : scores[2]\n}\nprint(scores_dict)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:29:00.589339Z","iopub.execute_input":"2025-05-16T12:29:00.589638Z","iopub.status.idle":"2025-05-16T12:29:00.710322Z","shell.execute_reply.started":"2025-05-16T12:29:00.589618Z","shell.execute_reply":"2025-05-16T12:29:00.709497Z"}},"outputs":[{"name":"stdout","text":"{'roberta_neg': 0.97635514, 'roberta_neu': 0.020687463, 'roberta_pos': 0.0029573694}\n","output_type":"stream"}],"execution_count":169},{"cell_type":"code","source":"def polarity_scores_roberta(example):\n encoded_text = tokenizer(example, return_tensors='pt')\n output = model(**encoded_text)\n scores = output[0][0].detach().numpy()\n scores = softmax(scores)\n scores_dict = {\n 'roberta_neg' : scores[0],\n 'roberta_neu' : scores[1],\n 'roberta_pos' : scores[2]\n }\n return scores_dict","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:29:03.504227Z","iopub.execute_input":"2025-05-16T12:29:03.504516Z","iopub.status.idle":"2025-05-16T12:29:03.510388Z","shell.execute_reply.started":"2025-05-16T12:29:03.504496Z","shell.execute_reply":"2025-05-16T12:29:03.509286Z"}},"outputs":[],"execution_count":170},{"cell_type":"code","source":"res = {}\nfor i, row in tqdm(df.iterrows(), total=len(df)):\n try:\n text = row['Text']\n myid = row['Id']\n vader_result = sia.polarity_scores(text)\n vader_result_rename = {}\n for key, value in vader_result.items():\n vader_result_rename[f\"vader_{key}\"] = value\n roberta_result = polarity_scores_roberta(text)\n both = {**vader_result_rename, **roberta_result}\n res[myid] = both\n except RuntimeError:\n print(f'Broke for id {myid}')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:29:10.870333Z","iopub.execute_input":"2025-05-16T12:29:10.870648Z","iopub.status.idle":"2025-05-16T12:30:30.988843Z","shell.execute_reply.started":"2025-05-16T12:29:10.870623Z","shell.execute_reply":"2025-05-16T12:30:30.987872Z"}},"outputs":[{"name":"stderr","text":" 17%|█▋ | 77/463 [00:13<00:44, 8.73it/s]","output_type":"stream"},{"name":"stdout","text":"Broke for id 83\n","output_type":"stream"},{"name":"stderr","text":" 37%|███▋ | 172/463 [00:29<00:28, 10.09it/s]","output_type":"stream"},{"name":"stdout","text":"Broke for id 187\n","output_type":"stream"},{"name":"stderr","text":"100%|██████████| 463/463 [01:20<00:00, 5.78it/s]\n","output_type":"stream"}],"execution_count":171},{"cell_type":"code","source":"results_df = pd.DataFrame(res).T\nresults_df = results_df.reset_index().rename(columns={'index': 'Id'})\nresults_df = results_df.merge(df, how='left')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:42.305238Z","iopub.execute_input":"2025-05-16T12:30:42.305576Z","iopub.status.idle":"2025-05-16T12:30:42.331635Z","shell.execute_reply.started":"2025-05-16T12:30:42.305549Z","shell.execute_reply":"2025-05-16T12:30:42.330498Z"}},"outputs":[],"execution_count":172},{"cell_type":"code","source":"from sklearn.metrics import precision_score,recall_score,f1_score","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:43.444222Z","iopub.execute_input":"2025-05-16T12:30:43.444545Z","iopub.status.idle":"2025-05-16T12:30:43.449101Z","shell.execute_reply.started":"2025-05-16T12:30:43.444521Z","shell.execute_reply":"2025-05-16T12:30:43.447916Z"}},"outputs":[],"execution_count":173},{"cell_type":"code","source":"results_df","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:44.704279Z","iopub.execute_input":"2025-05-16T12:30:44.704591Z","iopub.status.idle":"2025-05-16T12:30:44.729367Z","shell.execute_reply.started":"2025-05-16T12:30:44.704568Z","shell.execute_reply":"2025-05-16T12:30:44.728394Z"}},"outputs":[{"execution_count":174,"output_type":"execute_result","data":{"text/plain":" Id vader_neg vader_neu vader_pos vader_compound roberta_neg \\\n0 1 0.000 0.695 0.305 0.9441 0.009624 \n1 2 0.138 0.862 0.000 -0.5664 0.508986 \n2 3 0.091 0.754 0.155 0.8265 0.003229 \n3 4 0.000 1.000 0.000 0.0000 0.002295 \n4 5 0.000 0.552 0.448 0.9468 0.001635 \n.. ... ... ... ... ... ... \n456 496 0.000 0.554 0.446 0.9725 0.001906 \n457 497 0.059 0.799 0.142 0.7833 0.004415 \n458 498 0.025 0.762 0.212 0.9848 0.006427 \n459 499 0.041 0.904 0.055 0.1280 0.865614 \n460 500 0.000 0.678 0.322 0.9811 0.002440 \n\n roberta_neu roberta_pos ProductId UserId \\\n0 0.049980 0.940395 B001E4KFG0 A3SGXH7AUHU8GW \n1 0.452414 0.038600 B00813GRG4 A1D87F6ZCVE5NK \n2 0.098067 0.898704 B000LQOCH0 ABXLMWJIXXAIN \n3 0.090219 0.907486 B000UA0QIQ A395BORC6FGVXV \n4 0.010302 0.988063 B006K2ZZ7K A1UQRSCLF8GW1T \n.. ... ... ... ... \n456 0.009862 0.988232 B000G6RYNE APGAA43E3WPN7 \n457 0.034215 0.961369 B000G6RYNE ABR7HU5H1KNE \n458 0.074537 0.919036 B000G6RYNE AJQD2WWJYOYFQ \n459 0.119366 0.015020 B000G6RYNE A16YH487W9ZYO0 \n460 0.011327 0.986233 B000G6RYNE A83YQC1XOU4CS \n\n ProfileName HelpfulnessNumerator \\\n0 delmartian 1 \n1 dll pa 0 \n2 Natalia Corres \"Natalia Corres\" 1 \n3 Karl 3 \n4 Michael D. Bigham \"M. Wassir\" 0 \n.. ... ... \n456 Darren 0 \n457 Keith 0 \n458 bubbles 0 \n459 Bruce G. Lindsay 0 \n460 J. Baker 0 \n\n HelpfulnessDenominator Score Time \\\n0 1 5 1303862400 \n1 0 1 1346976000 \n2 1 4 1219017600 \n3 3 2 1307923200 \n4 0 5 1350777600 \n.. ... ... ... \n456 0 5 1201392000 \n457 0 5 1196726400 \n458 0 4 1186617600 \n459 0 4 1184198400 \n460 0 5 1183420800 \n\n Summary \\\n0 Good Quality Dog Food \n1 Not as Advertised \n2 \"Delight\" says it all \n3 Cough Medicine \n4 Great taffy \n.. ... \n456 amazing chips \n457 Best Chip Ever \n458 Tangy, spicy, and sweet- oh my! \n459 An indulgence with a bite \n460 The best I've had \n\n Text label \n0 I have bought several of the Vitality canned d... 1 \n1 Product arrived labeled as Jumbo Salted Peanut... 0 \n2 This is a confection that has been around a fe... 1 \n3 If you are looking for the secret ingredient i... 0 \n4 Great taffy at a great price. There was a wid... 1 \n.. ... ... \n456 i rarely eat chips but i saw these and tried t... 1 \n457 This is easily the best potato chip that I hav... 1 \n458 Kettle Chips Spicy Thai potato chips have the ... 1 \n459 Okay, I should not eat potato chips, nor shoul... 1 \n460 I don't write very many reviews but I have to ... 1 \n\n[461 rows x 18 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Idvader_negvader_neuvader_posvader_compoundroberta_negroberta_neuroberta_posProductIdUserIdProfileNameHelpfulnessNumeratorHelpfulnessDenominatorScoreTimeSummaryTextlabel
010.0000.6950.3050.94410.0096240.0499800.940395B001E4KFG0A3SGXH7AUHU8GWdelmartian1151303862400Good Quality Dog FoodI have bought several of the Vitality canned d...1
120.1380.8620.000-0.56640.5089860.4524140.038600B00813GRG4A1D87F6ZCVE5NKdll pa0011346976000Not as AdvertisedProduct arrived labeled as Jumbo Salted Peanut...0
230.0910.7540.1550.82650.0032290.0980670.898704B000LQOCH0ABXLMWJIXXAINNatalia Corres \"Natalia Corres\"1141219017600\"Delight\" says it allThis is a confection that has been around a fe...1
340.0001.0000.0000.00000.0022950.0902190.907486B000UA0QIQA395BORC6FGVXVKarl3321307923200Cough MedicineIf you are looking for the secret ingredient i...0
450.0000.5520.4480.94680.0016350.0103020.988063B006K2ZZ7KA1UQRSCLF8GW1TMichael D. Bigham \"M. Wassir\"0051350777600Great taffyGreat taffy at a great price. There was a wid...1
.........................................................
4564960.0000.5540.4460.97250.0019060.0098620.988232B000G6RYNEAPGAA43E3WPN7Darren0051201392000amazing chipsi rarely eat chips but i saw these and tried t...1
4574970.0590.7990.1420.78330.0044150.0342150.961369B000G6RYNEABR7HU5H1KNEKeith0051196726400Best Chip EverThis is easily the best potato chip that I hav...1
4584980.0250.7620.2120.98480.0064270.0745370.919036B000G6RYNEAJQD2WWJYOYFQbubbles0041186617600Tangy, spicy, and sweet- oh my!Kettle Chips Spicy Thai potato chips have the ...1
4594990.0410.9040.0550.12800.8656140.1193660.015020B000G6RYNEA16YH487W9ZYO0Bruce G. Lindsay0041184198400An indulgence with a biteOkay, I should not eat potato chips, nor shoul...1
4605000.0000.6780.3220.98110.0024400.0113270.986233B000G6RYNEA83YQC1XOU4CSJ. Baker0051183420800The best I've hadI don't write very many reviews but I have to ...1
\n

461 rows × 18 columns

\n
"},"metadata":{}}],"execution_count":174},{"cell_type":"code","source":"results_df.columns","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:49.304348Z","iopub.execute_input":"2025-05-16T12:30:49.304698Z","iopub.status.idle":"2025-05-16T12:30:49.311838Z","shell.execute_reply.started":"2025-05-16T12:30:49.304673Z","shell.execute_reply":"2025-05-16T12:30:49.310838Z"}},"outputs":[{"execution_count":175,"output_type":"execute_result","data":{"text/plain":"Index(['Id', 'vader_neg', 'vader_neu', 'vader_pos', 'vader_compound',\n 'roberta_neg', 'roberta_neu', 'roberta_pos', 'ProductId', 'UserId',\n 'ProfileName', 'HelpfulnessNumerator', 'HelpfulnessDenominator',\n 'Score', 'Time', 'Summary', 'Text', 'label'],\n dtype='object')"},"metadata":{}}],"execution_count":175},{"cell_type":"code","source":"results_df.query('Score == 1') \\\n .sort_values('roberta_pos', ascending=False)['Text'].values[0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:50.850082Z","iopub.execute_input":"2025-05-16T12:30:50.850454Z","iopub.status.idle":"2025-05-16T12:30:50.865981Z","shell.execute_reply.started":"2025-05-16T12:30:50.850425Z","shell.execute_reply":"2025-05-16T12:30:50.865045Z"}},"outputs":[{"execution_count":176,"output_type":"execute_result","data":{"text/plain":"'I felt energized within five minutes, but it lasted for about 45 minutes. I paid $3.99 for this drink. I could have just drunk a cup of coffee and saved my money.'"},"metadata":{}}],"execution_count":176},{"cell_type":"code","source":"results_df.query('Score == 1') \\\n .sort_values('vader_pos', ascending=False)['Text'].values[0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:52.729686Z","iopub.execute_input":"2025-05-16T12:30:52.730052Z","iopub.status.idle":"2025-05-16T12:30:52.743034Z","shell.execute_reply.started":"2025-05-16T12:30:52.730029Z","shell.execute_reply":"2025-05-16T12:30:52.741997Z"}},"outputs":[{"execution_count":177,"output_type":"execute_result","data":{"text/plain":"'So we cancelled the order. It was cancelled without any problem. That is a positive note...'"},"metadata":{}}],"execution_count":177},{"cell_type":"code","source":"results_df.query('Score == 5') \\\n .sort_values('roberta_neg', ascending=False)['Text'].values[0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:54.004309Z","iopub.execute_input":"2025-05-16T12:30:54.004653Z","iopub.status.idle":"2025-05-16T12:30:54.017629Z","shell.execute_reply.started":"2025-05-16T12:30:54.004628Z","shell.execute_reply":"2025-05-16T12:30:54.016725Z"}},"outputs":[{"execution_count":178,"output_type":"execute_result","data":{"text/plain":"'this was sooooo deliscious but too bad i ate em too fast and gained 2 pds! my fault'"},"metadata":{}}],"execution_count":178},{"cell_type":"code","source":"results_df.query('Score == 5') \\\n .sort_values('vader_neg', ascending=False)['Text'].values[0]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:30:56.659434Z","iopub.execute_input":"2025-05-16T12:30:56.659812Z","iopub.status.idle":"2025-05-16T12:30:56.672111Z","shell.execute_reply.started":"2025-05-16T12:30:56.659763Z","shell.execute_reply":"2025-05-16T12:30:56.671030Z"}},"outputs":[{"execution_count":179,"output_type":"execute_result","data":{"text/plain":"'this was sooooo deliscious but too bad i ate em too fast and gained 2 pds! my fault'"},"metadata":{}}],"execution_count":179},{"cell_type":"code","source":"from transformers import pipeline\n\nsent_pipeline = pipeline(\"sentiment-analysis\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:01.859224Z","iopub.execute_input":"2025-05-16T12:31:01.859562Z","iopub.status.idle":"2025-05-16T12:31:02.059059Z","shell.execute_reply.started":"2025-05-16T12:31:01.859529Z","shell.execute_reply":"2025-05-16T12:31:02.057850Z"}},"outputs":[{"name":"stderr","text":"No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).\nUsing a pipeline without specifying a model name and revision in production is not recommended.\nDevice set to use cpu\n","output_type":"stream"}],"execution_count":180},{"cell_type":"code","source":"sent_pipeline('I Love sentiment analysis!')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:07.224530Z","iopub.execute_input":"2025-05-16T12:31:07.224936Z","iopub.status.idle":"2025-05-16T12:31:07.323207Z","shell.execute_reply.started":"2025-05-16T12:31:07.224910Z","shell.execute_reply":"2025-05-16T12:31:07.322233Z"}},"outputs":[{"execution_count":181,"output_type":"execute_result","data":{"text/plain":"[{'label': 'POSITIVE', 'score': 0.9997853636741638}]"},"metadata":{}}],"execution_count":181},{"cell_type":"code","source":"sent_pipeline('I hate sentiment analysis!')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:09.339165Z","iopub.execute_input":"2025-05-16T12:31:09.339535Z","iopub.status.idle":"2025-05-16T12:31:09.374031Z","shell.execute_reply.started":"2025-05-16T12:31:09.339501Z","shell.execute_reply":"2025-05-16T12:31:09.373182Z"}},"outputs":[{"execution_count":182,"output_type":"execute_result","data":{"text/plain":"[{'label': 'NEGATIVE', 'score': 0.9992958307266235}]"},"metadata":{}}],"execution_count":182},{"cell_type":"code","source":"sent_pipeline('Make sure to like and subscribe!')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:12.169142Z","iopub.execute_input":"2025-05-16T12:31:12.169450Z","iopub.status.idle":"2025-05-16T12:31:12.212549Z","shell.execute_reply.started":"2025-05-16T12:31:12.169427Z","shell.execute_reply":"2025-05-16T12:31:12.211123Z"}},"outputs":[{"execution_count":183,"output_type":"execute_result","data":{"text/plain":"[{'label': 'POSITIVE', 'score': 0.9991742968559265}]"},"metadata":{}}],"execution_count":183},{"cell_type":"code","source":"sent_pipeline('Make sure to not like and subscribe!')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:19.564328Z","iopub.execute_input":"2025-05-16T12:31:19.564614Z","iopub.status.idle":"2025-05-16T12:31:19.626677Z","shell.execute_reply.started":"2025-05-16T12:31:19.564591Z","shell.execute_reply":"2025-05-16T12:31:19.625882Z"}},"outputs":[{"execution_count":185,"output_type":"execute_result","data":{"text/plain":"[{'label': 'NEGATIVE', 'score': 0.8641592264175415}]"},"metadata":{}}],"execution_count":185},{"cell_type":"code","source":"sent_pipeline('booo')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:22.404560Z","iopub.execute_input":"2025-05-16T12:31:22.404972Z","iopub.status.idle":"2025-05-16T12:31:22.436025Z","shell.execute_reply.started":"2025-05-16T12:31:22.404945Z","shell.execute_reply":"2025-05-16T12:31:22.434997Z"}},"outputs":[{"execution_count":186,"output_type":"execute_result","data":{"text/plain":"[{'label': 'NEGATIVE', 'score': 0.9936267137527466}]"},"metadata":{}}],"execution_count":186},{"cell_type":"code","source":"sent_pipeline('good')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:23.698936Z","iopub.execute_input":"2025-05-16T12:31:23.699242Z","iopub.status.idle":"2025-05-16T12:31:23.730621Z","shell.execute_reply.started":"2025-05-16T12:31:23.699220Z","shell.execute_reply":"2025-05-16T12:31:23.729815Z"}},"outputs":[{"execution_count":187,"output_type":"execute_result","data":{"text/plain":"[{'label': 'POSITIVE', 'score': 0.9998161196708679}]"},"metadata":{}}],"execution_count":187},{"cell_type":"code","source":"sent_pipeline('bad')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:26.589571Z","iopub.execute_input":"2025-05-16T12:31:26.589916Z","iopub.status.idle":"2025-05-16T12:31:26.620602Z","shell.execute_reply.started":"2025-05-16T12:31:26.589890Z","shell.execute_reply":"2025-05-16T12:31:26.619886Z"}},"outputs":[{"execution_count":188,"output_type":"execute_result","data":{"text/plain":"[{'label': 'NEGATIVE', 'score': 0.999782383441925}]"},"metadata":{}}],"execution_count":188},{"cell_type":"code","source":"sent_pipeline('i like it')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:36.333938Z","iopub.execute_input":"2025-05-16T12:31:36.334229Z","iopub.status.idle":"2025-05-16T12:31:36.366534Z","shell.execute_reply.started":"2025-05-16T12:31:36.334209Z","shell.execute_reply":"2025-05-16T12:31:36.365299Z"}},"outputs":[{"execution_count":190,"output_type":"execute_result","data":{"text/plain":"[{'label': 'POSITIVE', 'score': 0.9998593330383301}]"},"metadata":{}}],"execution_count":190},{"cell_type":"code","source":"quantized_model = torch.quantization.quantize_dynamic(\n model, {torch.nn.Linear}, dtype=torch.qint8\n)\nprint(\"Quantized model ready\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:45.384499Z","iopub.execute_input":"2025-05-16T12:31:45.384869Z","iopub.status.idle":"2025-05-16T12:31:46.332163Z","shell.execute_reply.started":"2025-05-16T12:31:45.384825Z","shell.execute_reply":"2025-05-16T12:31:46.331197Z"}},"outputs":[{"name":"stdout","text":"Quantized model ready\n","output_type":"stream"}],"execution_count":191},{"cell_type":"code","source":"torch.save(quantized_model.state_dict(), \"sentient_model.pt\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:31:47.999286Z","iopub.execute_input":"2025-05-16T12:31:47.999611Z","iopub.status.idle":"2025-05-16T12:31:49.408218Z","shell.execute_reply.started":"2025-05-16T12:31:47.999588Z","shell.execute_reply":"2025-05-16T12:31:49.407236Z"}},"outputs":[],"execution_count":192},{"cell_type":"code","source":"finetuned_model = AutoModelForSequenceClassification.from_pretrained('/kaggle/working/finetuned-model')\ntokenizer = AutoTokenizer.from_pretrained('/kaggle/working/finetuned-model')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:54:13.194195Z","iopub.execute_input":"2025-05-16T12:54:13.194518Z","iopub.status.idle":"2025-05-16T12:54:13.558618Z","shell.execute_reply.started":"2025-05-16T12:54:13.194495Z","shell.execute_reply":"2025-05-16T12:54:13.557691Z"}},"outputs":[],"execution_count":205},{"cell_type":"code","source":"device = 'cuda' if torch.cuda.is_available() else 'cpu'","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:54:15.548687Z","iopub.execute_input":"2025-05-16T12:54:15.549599Z","iopub.status.idle":"2025-05-16T12:54:15.553887Z","shell.execute_reply.started":"2025-05-16T12:54:15.549565Z","shell.execute_reply":"2025-05-16T12:54:15.552866Z"}},"outputs":[],"execution_count":206},{"cell_type":"code","source":"quantize_model = finetuned_model.to(dtype=torch.float16, device=device)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:54:17.678867Z","iopub.execute_input":"2025-05-16T12:54:17.679186Z","iopub.status.idle":"2025-05-16T12:54:17.900814Z","shell.execute_reply.started":"2025-05-16T12:54:17.679164Z","shell.execute_reply":"2025-05-16T12:54:17.900013Z"}},"outputs":[],"execution_count":207},{"cell_type":"code","source":"quantize_model.save_pretrained('quantized-model')\ntokenizer.save_pretrained('quantized-model')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:54:29.124181Z","iopub.execute_input":"2025-05-16T12:54:29.124534Z","iopub.status.idle":"2025-05-16T12:54:30.159407Z","shell.execute_reply.started":"2025-05-16T12:54:29.124509Z","shell.execute_reply":"2025-05-16T12:54:30.158502Z"}},"outputs":[{"execution_count":209,"output_type":"execute_result","data":{"text/plain":"('quantized-model/tokenizer_config.json',\n 'quantized-model/special_tokens_map.json',\n 'quantized-model/vocab.json',\n 'quantized-model/merges.txt',\n 'quantized-model/added_tokens.json',\n 'quantized-model/tokenizer.json')"},"metadata":{}}],"execution_count":209},{"cell_type":"code","source":"import torch.nn.functional as F","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-16T12:58:03.703528Z","iopub.execute_input":"2025-05-16T12:58:03.703881Z","iopub.status.idle":"2025-05-16T12:58:03.708330Z","shell.execute_reply.started":"2025-05-16T12:58:03.703854Z","shell.execute_reply":"2025-05-16T12:58:03.707228Z"}},"outputs":[],"execution_count":212},{"cell_type":"code","source":"# Predict\ndef predict(text):\n inputs = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True, max_length=128)\n with torch.no_grad():\n outputs = quantize_model(**inputs)\n probs = F.softmax(outputs.logits, dim=1)\n pred = torch.argmax(probs, dim=1).item()\n label_map = {0: \"Negative\", 1: \"Neutral\", 2: \"Positive\"}\n return f\"Sentiment: {label_map[pred]} (Confidence: {probs[0][pred]:.2f})\"","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"# Test predictions\nprint(\"\\nTest Predictions:\")\nprint(predict(\"the product quality is just so so\"))","metadata":{"trusted":true},"outputs":[],"execution_count":null}]}