Spaces:

awacke1
/

Topic-Wizard-SKlearn

Runtime error

Topic-Wizard-SKlearn / app.py

Update app.py

072885d over 2 years ago

1.7 kB

	import streamlit as st
	import pandas as pd
	import bertopic
	import plotly.express as px

	st.set_page_config(page_title="Topic Modeling with Bertopic")

	# Function to read the uploaded file and return a Pandas DataFrame
	def read_file(file):
	if file.type == 'text/plain':
	df = pd.read_csv(file, header=None, names=['data'])
	elif file.type == 'text/csv':
	df = pd.read_csv(file)
	else:
	st.error("Unsupported file format. Please upload a TXT or CSV file.")
	return None
	return df

	# Sidebar to upload the file
	st.sidebar.title("Upload File")
	file = st.sidebar.file_uploader("Choose a file", type=["txt", "csv"])

	# Perform topic modeling when the user clicks the "Visualize" button
	if st.sidebar.button("Visualize"):

	# Read the uploaded file
	df = read_file(file)
	if df is None:
	st.stop()

	# Perform topic modeling using Bertopic
	model = bertopic.Bertopic()
	topics, probabilities = model.fit_transform(df['data'])

	# Create a plot of the topic distribution
	fig = px.histogram(x=topics, nbins=max(topics)+1, color_discrete_sequence=px.colors.qualitative.Pastel)
	fig.update_layout(
	title="Distribution of Topics",
	xaxis_title="Topic",
	yaxis_title="Count",
	)
	st.plotly_chart(fig)

	# Display the top words in each topic
	st.write("Top words in each topic:")
	for topic_id in range(max(topics)+1):
	st.write(f"Topic {topic_id}: {model.get_topic(topic_id)}")

	# Display the clusters
	st.write("Clusters:")
	for cluster_id, docs in model.get_clusters().items():
	st.write(f"Cluster {cluster_id}:")
	for doc in docs:
	st.write(f"\t{doc}")