|
import streamlit as st |
|
import pandas as pd |
|
from datasets import load_dataset |
|
from transformers import pipeline |
|
|
|
|
|
@st.cache |
|
def load_data(): |
|
|
|
dataset = load_dataset("Hellisotherpeople/enron_emails_parsed") |
|
return dataset['train'] |
|
|
|
data = load_data() |
|
|
|
|
|
sentiment_model = pipeline('sentiment-analysis') |
|
ner_model = pipeline('ner', aggregation_strategy="simple") |
|
topic_model = pipeline('zero-shot-classification', model='facebook/bart-large-mnli') |
|
|
|
|
|
st.title('Enron Email Analysis') |
|
|
|
st.sidebar.title('Options') |
|
email_id = st.sidebar.selectbox('Select Email ID', range(len(data))) |
|
|
|
email_text = data[email_id]['body'] |
|
st.write(f"## Email Content\n{email_text}") |
|
|
|
|
|
st.write("## Sentiment Analysis") |
|
sentiment = sentiment_model(email_text) |
|
st.write(sentiment) |
|
|
|
|
|
st.write("## Named Entity Recognition (NER)") |
|
entities = ner_model(email_text) |
|
st.write(entities) |
|
|
|
|
|
st.write("## Topic Modeling") |
|
labels = ['business', 'personal', 'financial', 'legal', 'politics'] |
|
topics = topic_model(email_text, candidate_labels=labels) |
|
st.write(topics) |
|
|