umang018 commited on
Commit
a72bbed
·
verified ·
1 Parent(s): 616d1b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -9
app.py CHANGED
@@ -1,15 +1,13 @@
1
  import pandas as pd
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  from datasets import load_dataset
 
5
 
6
  # Check if GPU is available
7
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
 
9
- # Load the Enron dataset
10
- dataset = load_dataset("Hellisotherpeople/enron_emails_parsed")
11
- enron_data = pd.DataFrame(dataset['train'])
12
-
13
  # Load the model and tokenizer
14
  model_name = "SamLowe/roberta-base-go_emotions"
15
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -26,6 +24,7 @@ emotion_labels = ["admiration", "amusement", "anger", "annoyance", "approval",
26
  # Function to classify emotions in batches
27
  def classify_emotions_in_batches(texts, batch_size=32):
28
  results = []
 
29
  for i in range(0, len(texts), batch_size):
30
  batch = texts[i:i+batch_size]
31
  inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
@@ -34,11 +33,48 @@ def classify_emotions_in_batches(texts, batch_size=32):
34
  logits = outputs.logits
35
  predicted_class_ids = torch.argmax(logits, dim=-1).tolist()
36
  results.extend(predicted_class_ids)
 
 
 
 
 
37
  return results
38
 
39
- # Apply emotion classification to the email content in batches
40
- email_texts = enron_data['body'].tolist()
41
- enron_data['emotion'] = classify_emotions_in_batches(email_texts, batch_size=32)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- # Save the results to a CSV file
44
- enron_data.to_csv("enron_emails_with_emotions.csv", index=False)
 
1
  import pandas as pd
2
+ import streamlit as st
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import torch
5
  from datasets import load_dataset
6
+ import time
7
 
8
  # Check if GPU is available
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
 
 
 
 
 
11
  # Load the model and tokenizer
12
  model_name = "SamLowe/roberta-base-go_emotions"
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
24
  # Function to classify emotions in batches
25
  def classify_emotions_in_batches(texts, batch_size=32):
26
  results = []
27
+ start_time = time.time()
28
  for i in range(0, len(texts), batch_size):
29
  batch = texts[i:i+batch_size]
30
  inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
 
33
  logits = outputs.logits
34
  predicted_class_ids = torch.argmax(logits, dim=-1).tolist()
35
  results.extend(predicted_class_ids)
36
+
37
+ # Log progress
38
+ batch_time = time.time() - start_time
39
+ st.write(f"Processed batch {i//batch_size + 1} of {len(texts)//batch_size + 1} in {batch_time:.2f} seconds")
40
+ start_time = time.time()
41
  return results
42
 
43
+ # Streamlit interface
44
+ st.title("Enron Emails Emotion Analysis")
45
+
46
+ # Button to run the inference script
47
+ if st.button("Run Inference"):
48
+ # Load the Enron dataset
49
+ with st.spinner('Loading dataset...'):
50
+ dataset = load_dataset("Hellisotherpeople/enron_emails_parsed")
51
+ enron_data = pd.DataFrame(dataset['train'])
52
+
53
+ # Apply emotion classification to the email content
54
+ with st.spinner('Running inference...'):
55
+ email_texts = enron_data['body'].tolist()
56
+ enron_data['emotion'] = classify_emotions_in_batches(email_texts, batch_size=32)
57
+
58
+ # Save the results to a CSV file
59
+ enron_data.to_csv("enron_emails_with_emotions.csv", index=False)
60
+ st.success("Inference completed and results saved!")
61
+
62
+ # Check if the results file exists and load it
63
+ try:
64
+ enron_data = pd.read_csv("enron_emails_with_emotions.csv")
65
+
66
+ # Dropdown for selecting an emotion
67
+ selected_emotion = st.selectbox("Select Emotion", emotion_labels)
68
+
69
+ # Filter emails based on the selected emotion
70
+ filtered_emails = enron_data[enron_data['emotion'] == selected_emotion].head(10)
71
+
72
+ # Display the filtered emails in a table
73
+ if not filtered_emails.empty:
74
+ st.write("Top 10 emails with emotion:", selected_emotion)
75
+ st.table(filtered_emails[['From', 'To', 'body', 'emotion']])
76
+ else:
77
+ st.write("No emails found with the selected emotion.")
78
+ except FileNotFoundError:
79
+ st.warning("Run inference first by clicking the 'Run Inference' button.")
80