umang018 commited on
Commit
20aca11
·
verified ·
1 Parent(s): e895c07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -27
app.py CHANGED
@@ -21,10 +21,14 @@ emotion_labels = ["admiration", "amusement", "anger", "annoyance", "approval",
21
  "pride", "realization", "relief", "remorse", "sadness", "surprise",
22
  "neutral"]
23
 
24
- # Function to classify emotions in batches
25
- def classify_emotions_in_batches(texts, batch_size=32):
26
  results = []
27
  start_time = time.time()
 
 
 
 
28
  for i in range(0, len(texts), batch_size):
29
  batch = texts[i:i+batch_size]
30
  inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
@@ -33,12 +37,26 @@ def classify_emotions_in_batches(texts, batch_size=32):
33
  logits = outputs.logits
34
  predicted_class_ids = torch.argmax(logits, dim=-1).tolist()
35
  results.extend(predicted_class_ids)
36
-
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # Log progress
38
  batch_time = time.time() - start_time
39
  st.write(f"Processed batch {i//batch_size + 1} of {len(texts)//batch_size + 1} in {batch_time:.2f} seconds")
40
  start_time = time.time()
41
- return results
 
42
 
43
  # Streamlit interface
44
  st.title("Enron Emails Emotion Analysis")
@@ -53,28 +71,6 @@ if st.button("Run Inference"):
53
  # Apply emotion classification to the email content
54
  with st.spinner('Running inference...'):
55
  email_texts = enron_data['body'].tolist()
56
- enron_data['emotion'] = classify_emotions_in_batches(email_texts, batch_size=32)
57
 
58
- # Save the results to a CSV file
59
- enron_data.to_csv("enron_emails_with_emotions.csv", index=False)
60
  st.success("Inference completed and results saved!")
61
-
62
- # Check if the results file exists and load it
63
- try:
64
- enron_data = pd.read_csv("enron_emails_with_emotions.csv")
65
-
66
- # Dropdown for selecting an emotion
67
- selected_emotion = st.selectbox("Select Emotion", emotion_labels)
68
-
69
- # Filter emails based on the selected emotion
70
- filtered_emails = enron_data[enron_data['emotion'] == selected_emotion].head(10)
71
-
72
- # Display the filtered emails in a table
73
- if not filtered_emails.empty:
74
- st.write("Top 10 emails with emotion:", selected_emotion)
75
- st.table(filtered_emails[['From', 'To', 'body', 'emotion']])
76
- else:
77
- st.write("No emails found with the selected emotion.")
78
- except FileNotFoundError:
79
- st.warning("Run inference first by clicking the 'Run Inference' button.")
80
-
 
21
  "pride", "realization", "relief", "remorse", "sadness", "surprise",
22
  "neutral"]
23
 
24
+ # Function to classify emotions in batches and save results after each batch
25
+ def classify_emotions_in_batches(texts, batch_size=64, output_file="enron_emails_with_emotions.csv"):
26
  results = []
27
  start_time = time.time()
28
+
29
+ # DataFrame to store the results
30
+ result_df = pd.DataFrame(columns=['From', 'To', 'body', 'emotion'])
31
+
32
  for i in range(0, len(texts), batch_size):
33
  batch = texts[i:i+batch_size]
34
  inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
 
37
  logits = outputs.logits
38
  predicted_class_ids = torch.argmax(logits, dim=-1).tolist()
39
  results.extend(predicted_class_ids)
40
+
41
+ # Save the batch results
42
+ batch_results = {
43
+ 'From': enron_data['From'][i:i+batch_size],
44
+ 'To': enron_data['To'][i:i+batch_size],
45
+ 'body': batch,
46
+ 'emotion': [emotion_labels[idx] for idx in predicted_class_ids]
47
+ }
48
+ batch_df = pd.DataFrame(batch_results)
49
+ result_df = pd.concat([result_df, batch_df])
50
+
51
+ # Save to CSV
52
+ result_df.to_csv(output_file, index=False)
53
+
54
  # Log progress
55
  batch_time = time.time() - start_time
56
  st.write(f"Processed batch {i//batch_size + 1} of {len(texts)//batch_size + 1} in {batch_time:.2f} seconds")
57
  start_time = time.time()
58
+
59
+ return result_df
60
 
61
  # Streamlit interface
62
  st.title("Enron Emails Emotion Analysis")
 
71
  # Apply emotion classification to the email content
72
  with st.spinner('Running inference...'):
73
  email_texts = enron_data['body'].tolist()
74
+ classify_emotions_in_batches(email_texts, batch_size=64)
75
 
 
 
76
  st.success("Inference completed and results saved!")