File size: 2,337 Bytes
292e395
 
 
 
 
 
 
 
 
 
 
e0c2b63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292e395
 
 
 
 
 
 
 
 
 
 
c22dcd0
 
292e395
c22dcd0
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#pass="Leswhdc2023$!"

import streamlit as st
import pandas as pd
import plotly.express as px
import cx_Oracle as ora
import pandas as pd
from pandas_profiling import ProfileReport

st.title("File Upload and Profiling")

# uploaded_file = st.file_uploader("Upload a CSV file", type="csv")

# RunProfiler=False
# if uploaded_file is not None:
#     if RunProfiler:

#         # Load the data using pandas
#         df = pd.read_csv(uploaded_file)

#         # Generate the pandas profiling report
#         profile = ProfileReport(df, explorative=True)

#         # Display the pandas profiling report using streamlit
#         st.header("Data Profiling Report")
#         st.write(profile.to_html(), unsafe_allow_html=True)

#         # Display word statistics for each categorical string column
#         cat_cols = df.select_dtypes(include='object').columns
#         st.header("Word Statistics for Categorical Columns")
#         for col in cat_cols:
#             st.subheader(col)
#             word_count = df[col].str.split().apply(len).value_counts().sort_index()
#             st.bar_chart(word_count)

#     # Grouped count by each feature
#     num_cols = df.select_dtypes(include=['float', 'int']).columns
#     st.header("Grouped Count by Each Feature")
#     for col in num_cols:
#         st.subheader(col)
#         count_by_feature = df.groupby(col).size().reset_index(name='count')
#         st.bar_chart(count_by_feature)

# Upload a CSV dataset
uploaded_file = st.file_uploader("Upload your dataset", type=["csv"])
if uploaded_file is not None:
    # Load the dataset and display the first 5 rows
    df = pd.read_csv(uploaded_file)
    st.dataframe(df.head())

    # Generate a treemap or sunburst plot based on data types
    numerical_cols = df.select_dtypes(include=["float", "int"]).columns
    categorical_cols = df.select_dtypes(include=["object"]).columns

    fig = px.treemap(df, path=categorical_cols)
    st.plotly_chart(fig)
        
    # if len(numerical_cols) >= 2:
    #     fig = px.scatter_matrix(df, dimensions=numerical_cols)
    #     st.plotly_chart(fig)
    # elif len(categorical_cols) >= 2:
    #     fig = px.treemap(df, path=categorical_cols)
    #     st.plotly_chart(fig)
    # else:
    #     fig = px.sunburst(df, path=categorical_cols + numerical_cols)
    #     st.plotly_chart(fig)