umang018 commited on
Commit
80bf476
·
verified ·
1 Parent(s): f1a79c9

Delete RobertaGoEmotionClassifier

Browse files
Files changed (1) hide show
  1. RobertaGoEmotionClassifier +0 -35
RobertaGoEmotionClassifier DELETED
@@ -1,35 +0,0 @@
1
- import pandas as pd
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
- import torch
4
- from datasets import load_dataset
5
-
6
- # Load the Enron dataset
7
- dataset = load_dataset("Hellisotherpeople/enron_emails_parsed")
8
- enron_data = pd.DataFrame(dataset['train'])
9
-
10
- # Load the model and tokenizer
11
- model_name = "modelSamLowe/roberta-base-go_emotions"
12
- tokenizer = AutoTokenizer.from_pretrained(model_name)
13
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
14
-
15
- # Define the emotion labels (based on the GoEmotions dataset)
16
- emotion_labels = ["admiration", "amusement", "anger", "annoyance", "approval",
17
- "caring", "confusion", "curiosity", "desire", "disappointment",
18
- "disapproval", "disgust", "embarrassment", "excitement", "fear",
19
- "gratitude", "grief", "joy", "love", "nervousness", "optimism",
20
- "pride", "realization", "relief", "remorse", "sadness", "surprise",
21
- "neutral"]
22
-
23
- # Function to classify emotion
24
- def classify_emotion(text):
25
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
26
- outputs = model(**inputs)
27
- logits = outputs.logits
28
- predicted_class_id = torch.argmax(logits, dim=-1).item()
29
- return emotion_labels[predicted_class_id]
30
-
31
- # Apply emotion classification to the email content
32
- enron_data['emotion'] = enron_data['body'].apply(classify_emotion)
33
-
34
- # Save the results to a CSV file
35
- enron_data.to_csv("enron_emails_with_emotions.csv", index=False)