Spaces:

umang018
/

pg3

Sleeping

App Files Files Community

umang018 commited on Jul 23, 2024

Commit

a72bbed

verified ·

1 Parent(s): 616d1b5

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -9

app.py CHANGED Viewed

@@ -1,15 +1,13 @@
 import pandas as pd
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 from datasets import load_dataset
 # Check if GPU is available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Load the Enron dataset
-dataset = load_dataset("Hellisotherpeople/enron_emails_parsed")
-enron_data = pd.DataFrame(dataset['train'])
 # Load the model and tokenizer
 model_name = "SamLowe/roberta-base-go_emotions"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -26,6 +24,7 @@ emotion_labels = ["admiration", "amusement", "anger", "annoyance", "approval",
 # Function to classify emotions in batches
 def classify_emotions_in_batches(texts, batch_size=32):
     results = []
     for i in range(0, len(texts), batch_size):
         batch = texts[i:i+batch_size]
         inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
@@ -34,11 +33,48 @@ def classify_emotions_in_batches(texts, batch_size=32):
             logits = outputs.logits
             predicted_class_ids = torch.argmax(logits, dim=-1).tolist()
             results.extend(predicted_class_ids)
     return results
-# Apply emotion classification to the email content in batches
-email_texts = enron_data['body'].tolist()
-enron_data['emotion'] = classify_emotions_in_batches(email_texts, batch_size=32)
-# Save the results to a CSV file
-enron_data.to_csv("enron_emails_with_emotions.csv", index=False)

 import pandas as pd
+import streamlit as st
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 from datasets import load_dataset
+import time
 # Check if GPU is available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load the model and tokenizer
 model_name = "SamLowe/roberta-base-go_emotions"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Function to classify emotions in batches
 def classify_emotions_in_batches(texts, batch_size=32):
     results = []
+    start_time = time.time()
     for i in range(0, len(texts), batch_size):
         batch = texts[i:i+batch_size]
         inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
             logits = outputs.logits
             predicted_class_ids = torch.argmax(logits, dim=-1).tolist()
             results.extend(predicted_class_ids)
+        # Log progress
+        batch_time = time.time() - start_time
+        st.write(f"Processed batch {i//batch_size + 1} of {len(texts)//batch_size + 1} in {batch_time:.2f} seconds")
+        start_time = time.time()
     return results
+# Streamlit interface
+st.title("Enron Emails Emotion Analysis")
+# Button to run the inference script
+if st.button("Run Inference"):
+    # Load the Enron dataset
+    with st.spinner('Loading dataset...'):
+        dataset = load_dataset("Hellisotherpeople/enron_emails_parsed")
+        enron_data = pd.DataFrame(dataset['train'])
+    # Apply emotion classification to the email content
+    with st.spinner('Running inference...'):
+        email_texts = enron_data['body'].tolist()
+        enron_data['emotion'] = classify_emotions_in_batches(email_texts, batch_size=32)
+    # Save the results to a CSV file
+    enron_data.to_csv("enron_emails_with_emotions.csv", index=False)
+    st.success("Inference completed and results saved!")
+# Check if the results file exists and load it
+try:
+    enron_data = pd.read_csv("enron_emails_with_emotions.csv")
+    # Dropdown for selecting an emotion
+    selected_emotion = st.selectbox("Select Emotion", emotion_labels)
+    # Filter emails based on the selected emotion
+    filtered_emails = enron_data[enron_data['emotion'] == selected_emotion].head(10)
+    # Display the filtered emails in a table
+    if not filtered_emails.empty:
+        st.write("Top 10 emails with emotion:", selected_emotion)
+        st.table(filtered_emails[['From', 'To', 'body', 'emotion']])
+    else:
+        st.write("No emails found with the selected emotion.")
+except FileNotFoundError:
+    st.warning("Run inference first by clicking the 'Run Inference' button.")