Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import bertopic | |
import plotly.express as px | |
st.set_page_config(page_title="Topic Modeling with Bertopic") | |
# Function to read the uploaded file and return a Pandas DataFrame | |
def read_file(file): | |
if file.type == 'text/plain': | |
df = pd.read_csv(file, header=None, names=['data']) | |
elif file.type == 'text/csv': | |
df = pd.read_csv(file) | |
else: | |
st.error("Unsupported file format. Please upload a TXT or CSV file.") | |
return None | |
return df | |
# Sidebar to upload the file | |
st.sidebar.title("Upload File") | |
file = st.sidebar.file_uploader("Choose a file", type=["txt", "csv"]) | |
# Perform topic modeling when the user clicks the "Visualize" button | |
if st.sidebar.button("Visualize"): | |
# Read the uploaded file | |
df = read_file(file) | |
if df is None: | |
st.stop() | |
# Perform topic modeling using Bertopic | |
model = bertopic.Bertopic() | |
topics, probabilities = model.fit_transform(df['data']) | |
# Create a plot of the topic distribution | |
fig = px.histogram(x=topics, nbins=max(topics)+1, color_discrete_sequence=px.colors.qualitative.Pastel) | |
fig.update_layout( | |
title="Distribution of Topics", | |
xaxis_title="Topic", | |
yaxis_title="Count", | |
) | |
st.plotly_chart(fig) | |
# Display the top words in each topic | |
st.write("Top words in each topic:") | |
for topic_id in range(max(topics)+1): | |
st.write(f"Topic {topic_id}: {model.get_topic(topic_id)}") | |
# Display the clusters | |
st.write("Clusters:") | |
for cluster_id, docs in model.get_clusters().items(): | |
st.write(f"Cluster {cluster_id}:") | |
for doc in docs: | |
st.write(f"\t{doc}") | |