Spaces:
Runtime error
Runtime error
File size: 1,699 Bytes
c6b92c7 072885d d8f9678 072885d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import streamlit as st
import pandas as pd
import bertopic
import plotly.express as px
st.set_page_config(page_title="Topic Modeling with Bertopic")
# Function to read the uploaded file and return a Pandas DataFrame
def read_file(file):
if file.type == 'text/plain':
df = pd.read_csv(file, header=None, names=['data'])
elif file.type == 'text/csv':
df = pd.read_csv(file)
else:
st.error("Unsupported file format. Please upload a TXT or CSV file.")
return None
return df
# Sidebar to upload the file
st.sidebar.title("Upload File")
file = st.sidebar.file_uploader("Choose a file", type=["txt", "csv"])
# Perform topic modeling when the user clicks the "Visualize" button
if st.sidebar.button("Visualize"):
# Read the uploaded file
df = read_file(file)
if df is None:
st.stop()
# Perform topic modeling using Bertopic
model = bertopic.Bertopic()
topics, probabilities = model.fit_transform(df['data'])
# Create a plot of the topic distribution
fig = px.histogram(x=topics, nbins=max(topics)+1, color_discrete_sequence=px.colors.qualitative.Pastel)
fig.update_layout(
title="Distribution of Topics",
xaxis_title="Topic",
yaxis_title="Count",
)
st.plotly_chart(fig)
# Display the top words in each topic
st.write("Top words in each topic:")
for topic_id in range(max(topics)+1):
st.write(f"Topic {topic_id}: {model.get_topic(topic_id)}")
# Display the clusters
st.write("Clusters:")
for cluster_id, docs in model.get_clusters().items():
st.write(f"Cluster {cluster_id}:")
for doc in docs:
st.write(f"\t{doc}")
|