File size: 4,370 Bytes
ff27984
68fece7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff27984
68fece7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm, skew
import platform

# ํ•œ๊ธ€ ํฐํŠธ ์„ค์ • (Windows, Mac, Linux ํ™˜๊ฒฝ์— ๋งž๊ฒŒ ์ž๋™ ์„ค์ •)
if platform.system() == 'Windows':
    plt.rc('font', family='Malgun Gothic')
elif platform.system() == 'Darwin': # Mac
    plt.rc('font', family='AppleGothic')
else: # Linux
    # ๋‚˜๋ˆ”๊ณ ๋”• ํฐํŠธ๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
    # sudo apt-get install fonts-nanum*
    plt.rc('font', family='NanumGothic')

plt.rcParams['axes.unicode_minus'] = False # ๋งˆ์ด๋„ˆ์Šค ํฐํŠธ ๊นจ์ง ๋ฐฉ์ง€

def main():
    """
    ์ŠคํŠธ๋ฆผ๋ฆฟ์„ ์ด์šฉํ•œ ํ•™์ƒ ์ ์ˆ˜ ๋ถ„ํฌ ๋ถ„์„ ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜
    """
    st.title("ํ•™์ƒ ์ ์ˆ˜ ๋ถ„ํฌ ๋ถ„์„ ๋„๊ตฌ ๐Ÿ“Š")
    st.write("CSV ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์—ฌ ํ•™์ƒ๋“ค์˜ ์ ์ˆ˜ ๋ถ„ํฌ๋ฅผ ํ™•์ธํ•˜๊ณ , ์ •๊ทœ๋ถ„ํฌ์™€์˜ ์ฐจ์ด ๋ฐ ์™œ๋„(skewness)๋ฅผ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")
    st.write("---")

    # ํŒŒ์ผ ์—…๋กœ๋“œ ์œ„์ ฏ
    uploaded_file = st.file_uploader("์ ์ˆ˜ ๋ฐ์ดํ„ฐ๊ฐ€ ํฌํ•จ๋œ CSV ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”.", type="csv")

    if uploaded_file is not None:
        try:
            # utf-8-sig ์ธ์ฝ”๋”ฉ์œผ๋กœ CSV ํŒŒ์ผ ์ฝ๊ธฐ
            df = pd.read_csv(uploaded_file, encoding='utf-8-sig')

            st.subheader("์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
            st.dataframe(df.head())

            # ๋ถ„์„ํ•  ์ ์ˆ˜ ์—ด ์„ ํƒ
            score_column = st.selectbox("๋ถ„์„ํ•  ์ ์ˆ˜ ์—ด(column)์„ ์„ ํƒํ•˜์„ธ์š”:", df.columns)

            if score_column:
                # ์„ ํƒ๋œ ์—ด์˜ ๋ฐ์ดํ„ฐ ์ถ”์ถœ (๊ฒฐ์ธก์น˜ ์ œ๊ฑฐ)
                scores = df[score_column].dropna()

                if pd.api.types.is_numeric_dtype(scores):
                    st.subheader(f"'{score_column}' ์ ์ˆ˜ ๋ถ„ํฌ ๋ถ„์„ ๊ฒฐ๊ณผ")

                    # 1. ๊ธฐ์ˆ  ํ†ต๊ณ„๋Ÿ‰ ํ‘œ์‹œ
                    st.write("#### ๐Ÿ“ˆ ๊ธฐ์ˆ  ํ†ต๊ณ„๋Ÿ‰")
                    st.table(scores.describe())

                    # 2. ๋ถ„ํฌ ์‹œ๊ฐํ™” ๋ฐ ์ •๊ทœ๋ถ„ํฌ ๋น„๊ต
                    st.write("#### ๐ŸŽจ ์ ์ˆ˜ ๋ถ„ํฌ ์‹œ๊ฐํ™”")
                    fig, ax = plt.subplots(figsize=(10, 6))

                    # ํžˆ์Šคํ† ๊ทธ๋žจ ๋ฐ KDE ํ”Œ๋กฏ
                    sns.histplot(scores, kde=True, stat='density', label='ํ•™์ƒ ์ ์ˆ˜ ๋ถ„ํฌ', ax=ax)

                    # ์ •๊ทœ๋ถ„ํฌ ๊ณก์„  ์ถ”๊ฐ€
                    mu, std = norm.fit(scores)
                    xmin, xmax = plt.xlim()
                    x = np.linspace(xmin, xmax, 100)
                    p = norm.pdf(x, mu, std)
                    ax.plot(x, p, 'k', linewidth=2, label='์ •๊ทœ๋ถ„ํฌ ๊ณก์„ ')

                    title = f"'{score_column}' ์ ์ˆ˜ ๋ถ„ํฌ (ํ‰๊ท : {mu:.2f}, ํ‘œ์ค€ํŽธ์ฐจ: {std:.2f})"
                    ax.set_title(title)
                    ax.set_xlabel('์ ์ˆ˜')
                    ax.set_ylabel('๋ฐ€๋„')
                    ax.legend()
                    st.pyplot(fig)

                    # 3. ์™œ๋„(Skewness) ๊ณ„์‚ฐ ๋ฐ ํ•ด์„
                    st.write("#### ๐Ÿ“ ์™œ๋„ (Skewness) ๋ถ„์„")
                    skewness = skew(scores)
                    st.metric(label="์™œ๋„ (Skewness)", value=f"{skewness:.4f}")

                    if skewness > 0.5:
                        st.info("๊ผฌ๋ฆฌ๊ฐ€ ์˜ค๋ฅธ์ชฝ์œผ๋กœ ๊ธด ๋ถ„ํฌ (Positive Skew): ๋Œ€๋ถ€๋ถ„์˜ ํ•™์ƒ๋“ค์ด ํ‰๊ท ๋ณด๋‹ค ๋‚ฎ์€ ์ ์ˆ˜์— ๋ชฐ๋ ค์žˆ๊ณ , ์ผ๋ถ€ ํ•™์ƒ๋“ค์ด ๋งค์šฐ ๋†’์€ ์ ์ˆ˜๋ฅผ ๋ฐ›์•˜์Šต๋‹ˆ๋‹ค.")
                    elif skewness < -0.5:
                        st.info("๊ผฌ๋ฆฌ๊ฐ€ ์™ผ์ชฝ์œผ๋กœ ๊ธด ๋ถ„ํฌ (Negative Skew): ๋Œ€๋ถ€๋ถ„์˜ ํ•™์ƒ๋“ค์ด ํ‰๊ท ๋ณด๋‹ค ๋†’์€ ์ ์ˆ˜์— ๋ชฐ๋ ค์žˆ๊ณ , ์ผ๋ถ€ ํ•™์ƒ๋“ค์ด ๋งค์šฐ ๋‚ฎ์€ ์ ์ˆ˜๋ฅผ ๋ฐ›์•˜์Šต๋‹ˆ๋‹ค.")
                    else:
                        st.info("๋Œ€์นญ์— ๊ฐ€๊นŒ์šด ๋ถ„ํฌ: ์ ์ˆ˜๊ฐ€ ํ‰๊ท ์„ ์ค‘์‹ฌ์œผ๋กœ ๋น„๊ต์  ๊ณ ๋ฅด๊ฒŒ ๋ถ„ํฌ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.")

                else:
                    st.error(f"์˜ค๋ฅ˜: ์„ ํƒํ•˜์‹  '{score_column}' ์—ด์€ ์ˆซ์ž ๋ฐ์ดํ„ฐ๊ฐ€ ์•„๋‹™๋‹ˆ๋‹ค. ์ˆซ์ž ๋ฐ์ดํ„ฐ๋กœ ๊ตฌ์„ฑ๋œ ์—ด์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”.")

        except Exception as e:
            st.error(f"ํŒŒ์ผ์„ ์ฝ๋Š” ๋„์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}")
            st.warning("CSV ํŒŒ์ผ์ด 'utf-8-sig' ๋˜๋Š” 'utf-8' ์ธ์ฝ”๋”ฉ ํ˜•์‹์ธ์ง€ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")


if __name__ == '__main__':
    main()