umang018 commited on
Commit
708ef37
·
verified ·
1 Parent(s): 80bf476

Create RobertaGoEmotionClassifier.py

Browse files
Files changed (1) hide show
  1. RobertaGoEmotionClassifier.py +35 -0
RobertaGoEmotionClassifier.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import torch
4
+ from datasets import load_dataset
5
+
6
+ # Load the Enron dataset
7
+ dataset = load_dataset("Hellisotherpeople/enron_emails_parsed")
8
+ enron_data = pd.DataFrame(dataset['train'])
9
+
10
+ # Load the model and tokenizer
11
+ model_name = "modelSamLowe/roberta-base-go_emotions"
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
14
+
15
+ # Define the emotion labels (based on the GoEmotions dataset)
16
+ emotion_labels = ["admiration", "amusement", "anger", "annoyance", "approval",
17
+ "caring", "confusion", "curiosity", "desire", "disappointment",
18
+ "disapproval", "disgust", "embarrassment", "excitement", "fear",
19
+ "gratitude", "grief", "joy", "love", "nervousness", "optimism",
20
+ "pride", "realization", "relief", "remorse", "sadness", "surprise",
21
+ "neutral"]
22
+
23
+ # Function to classify emotion
24
+ def classify_emotion(text):
25
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
26
+ outputs = model(**inputs)
27
+ logits = outputs.logits
28
+ predicted_class_id = torch.argmax(logits, dim=-1).item()
29
+ return emotion_labels[predicted_class_id]
30
+
31
+ # Apply emotion classification to the email content
32
+ enron_data['emotion'] = enron_data['body'].apply(classify_emotion)
33
+
34
+ # Save the results to a CSV file
35
+ enron_data.to_csv("enron_emails_with_emotions.csv", index=False)