File size: 1,276 Bytes
ac9d2b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import streamlit as st
import pandas as pd
from pandas_profiling import ProfileReport

st.set_page_config(page_title="File Upload and Profiling", layout="wide")

st.title("File Upload and Profiling")

uploaded_file = st.file_uploader("Upload a CSV file", type="csv")

if uploaded_file is not None:
    # Load the data using pandas
    df = pd.read_csv(uploaded_file)

    # Generate the pandas profiling report
    profile = ProfileReport(df, explorative=True)

    # Display the pandas profiling report using streamlit
    st.header("Data Profiling Report")
    st.write(profile.to_html(), unsafe_allow_html=True)

    # Display word statistics for each categorical string column
    cat_cols = df.select_dtypes(include='object').columns
    st.header("Word Statistics for Categorical Columns")
    for col in cat_cols:
        st.subheader(col)
        word_count = df[col].str.split().apply(len).value_counts().sort_index()
        st.bar_chart(word_count)

    # Grouped count by each feature
    num_cols = df.select_dtypes(include=['float', 'int']).columns
    st.header("Grouped Count by Each Feature")
    for col in num_cols:
        st.subheader(col)
        count_by_feature = df.groupby(col).size().reset_index(name='count')
        st.bar_chart(count_by_feature)