umang018 commited on
Commit
602bb55
·
verified ·
1 Parent(s): 20aca11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -23
app.py CHANGED
@@ -21,14 +21,10 @@ emotion_labels = ["admiration", "amusement", "anger", "annoyance", "approval",
21
  "pride", "realization", "relief", "remorse", "sadness", "surprise",
22
  "neutral"]
23
 
24
- # Function to classify emotions in batches and save results after each batch
25
- def classify_emotions_in_batches(texts, batch_size=64, output_file="enron_emails_with_emotions.csv"):
26
  results = []
27
  start_time = time.time()
28
-
29
- # DataFrame to store the results
30
- result_df = pd.DataFrame(columns=['From', 'To', 'body', 'emotion'])
31
-
32
  for i in range(0, len(texts), batch_size):
33
  batch = texts[i:i+batch_size]
34
  inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
@@ -37,26 +33,12 @@ def classify_emotions_in_batches(texts, batch_size=64, output_file="enron_emails
37
  logits = outputs.logits
38
  predicted_class_ids = torch.argmax(logits, dim=-1).tolist()
39
  results.extend(predicted_class_ids)
40
-
41
- # Save the batch results
42
- batch_results = {
43
- 'From': enron_data['From'][i:i+batch_size],
44
- 'To': enron_data['To'][i:i+batch_size],
45
- 'body': batch,
46
- 'emotion': [emotion_labels[idx] for idx in predicted_class_ids]
47
- }
48
- batch_df = pd.DataFrame(batch_results)
49
- result_df = pd.concat([result_df, batch_df])
50
-
51
- # Save to CSV
52
- result_df.to_csv(output_file, index=False)
53
-
54
  # Log progress
55
  batch_time = time.time() - start_time
56
  st.write(f"Processed batch {i//batch_size + 1} of {len(texts)//batch_size + 1} in {batch_time:.2f} seconds")
57
  start_time = time.time()
58
-
59
- return result_df
60
 
61
  # Streamlit interface
62
  st.title("Enron Emails Emotion Analysis")
@@ -71,6 +53,8 @@ if st.button("Run Inference"):
71
  # Apply emotion classification to the email content
72
  with st.spinner('Running inference...'):
73
  email_texts = enron_data['body'].tolist()
74
- classify_emotions_in_batches(email_texts, batch_size=64)
75
 
 
 
76
  st.success("Inference completed and results saved!")
 
21
  "pride", "realization", "relief", "remorse", "sadness", "surprise",
22
  "neutral"]
23
 
24
+ # Function to classify emotions in batches
25
+ def classify_emotions_in_batches(texts, batch_size=64):
26
  results = []
27
  start_time = time.time()
 
 
 
 
28
  for i in range(0, len(texts), batch_size):
29
  batch = texts[i:i+batch_size]
30
  inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
 
33
  logits = outputs.logits
34
  predicted_class_ids = torch.argmax(logits, dim=-1).tolist()
35
  results.extend(predicted_class_ids)
36
+
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # Log progress
38
  batch_time = time.time() - start_time
39
  st.write(f"Processed batch {i//batch_size + 1} of {len(texts)//batch_size + 1} in {batch_time:.2f} seconds")
40
  start_time = time.time()
41
+ return results
 
42
 
43
  # Streamlit interface
44
  st.title("Enron Emails Emotion Analysis")
 
53
  # Apply emotion classification to the email content
54
  with st.spinner('Running inference...'):
55
  email_texts = enron_data['body'].tolist()
56
+ enron_data['emotion'] = classify_emotions_in_batches(email_texts, batch_size=64)
57
 
58
+ # Save the results to a CSV file
59
+ enron_data.to_csv("enron_emails_with_emotions.csv", index=False)
60
  st.success("Inference completed and results saved!")