File size: 1,312 Bytes
1b0a5d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import streamlit as st
import pandas as pd
import streamlit.components.v1 as components

st.sidebar.image("images/logo.png", use_column_width=True)
st.sidebar.write("Bunka Summarizes & Visualizes Information as Maps using LLMs.")
st.sidebar.title("Github Page")
st.sidebar.write(
    "Have a look at the following package on GitHub: https://github.com/charlesdedampierre/BunkaTopics"
)
st.sidebar.title("Dataset")
st.sidebar.write(
    "We used a subset of Wikipedia dataset: https://huggingface.co/datasets/OpenAssistant/oasst2"
)

st.title("How to understand large textual datasets?")

df = pd.read_csv("data/data_sample.csv", index_col=[0])
df = df[["message_id", "text"]]
df = df.head(300)
st.dataframe(df, use_container_width=True)
st.title("Inside the OASST2 dataset")
element = open("images/map_prompt.html", "r", encoding="utf-8")

components.html(element.read(), height=900, width=900)

st.title("Some insights by territory")
df_info = pd.read_csv("data/topics_info.csv", index_col=[0])
df_info = df_info[["name", "size", "percent"]]
df_info["percent"] = df_info["percent"].apply(lambda x: str(int(x)) + "%")
df_info = df_info.reset_index(drop=True)

st.dataframe(df_info, use_container_width=True)

st.title("Bunka Exploration Engine")
st.image(
    "images/pipeline.png",
    use_column_width=True,
)