File size: 4,722 Bytes
ff27984
68fece7
 
 
 
 
 
 
a63a0bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68fece7
 
a63a0bc
68fece7
a63a0bc
68fece7
 
a63a0bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff27984
68fece7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm, skew
import platform

# ํ•œ๊ธ€ ํฐํŠธ ์„ค์ • (๋‹ค์–‘ํ•œ OS ํ™˜๊ฒฝ ์ง€์›)
def set_korean_font():
    if platform.system() == 'Windows':
        plt.rc('font', family='Malgun Gothic')
    elif platform.system() == 'Darwin': # Mac
        plt.rc('font', family='AppleGothic')
    else: # Linux
        # ๋‚˜๋ˆ”๊ณ ๋”• ํฐํŠธ๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
        # ํ„ฐ๋ฏธ๋„์—์„œ `sudo apt-get install -y fonts-nanum*` ์‹คํ–‰
        try:
            plt.rc('font', family='NanumGothic')
        except:
            st.warning("๋‚˜๋ˆ”๊ณ ๋”• ํฐํŠธ๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ์ง€ ์•Š์•„ ํ•œ๊ธ€์ด ๊นจ์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. 'sudo apt-get install -y fonts-nanum*' ๋ช…๋ น์–ด๋กœ ํฐํŠธ๋ฅผ ์„ค์น˜ํ•ด์ฃผ์„ธ์š”.")
    plt.rcParams['axes.unicode_minus'] = False # ๋งˆ์ด๋„ˆ์Šค ํฐํŠธ ๊นจ์ง ๋ฐฉ์ง€

def analyze_scores(df):
    """๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์„ ๋ฐ›์•„ ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ํ‘œ์‹œํ•˜๋Š” ํ•จ์ˆ˜"""
    st.subheader("๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ (์ƒ์œ„ 5๊ฐœ)")
    st.dataframe(df.head())

    # ๋ถ„์„ํ•  ์ ์ˆ˜ ์—ด ์„ ํƒ
    score_column = st.selectbox("๋ถ„์„ํ•  ์ ์ˆ˜ ์—ด(column)์„ ์„ ํƒํ•˜์„ธ์š”:", df.columns)

    if score_column:
        scores = df[score_column].dropna()

        if pd.api.types.is_numeric_dtype(scores):
            st.subheader(f"'{score_column}' ์ ์ˆ˜ ๋ถ„ํฌ ๋ถ„์„ ๊ฒฐ๊ณผ")

            # 1. ๊ธฐ์ˆ  ํ†ต๊ณ„๋Ÿ‰
            st.write("#### ๐Ÿ“ˆ ๊ธฐ์ˆ  ํ†ต๊ณ„๋Ÿ‰")
            st.table(scores.describe())

            # 2. ๋ถ„ํฌ ์‹œ๊ฐํ™”
            st.write("#### ๐ŸŽจ ์ ์ˆ˜ ๋ถ„ํฌ ์‹œ๊ฐํ™”")
            fig, ax = plt.subplots(figsize=(10, 6))
            sns.histplot(scores, kde=True, stat='density', label='ํ•™์ƒ ์ ์ˆ˜ ๋ถ„ํฌ', ax=ax)
            mu, std = norm.fit(scores)
            xmin, xmax = plt.xlim()
            x = np.linspace(xmin, xmax, 100)
            p = norm.pdf(x, mu, std)
            ax.plot(x, p, 'k', linewidth=2, label='์ •๊ทœ๋ถ„ํฌ ๊ณก์„ ')
            ax.set_title(f"'{score_column}' ์ ์ˆ˜ ๋ถ„ํฌ (ํ‰๊ท : {mu:.2f}, ํ‘œ์ค€ํŽธ์ฐจ: {std:.2f})")
            ax.set_xlabel('์ ์ˆ˜'); ax.set_ylabel('๋ฐ€๋„'); ax.legend()
            st.pyplot(fig)

            # 3. ์™œ๋„(Skewness) ๋ถ„์„
            st.write("#### ๐Ÿ“ ์™œ๋„ (Skewness) ๋ถ„์„")
            skewness = skew(scores)
            st.metric(label="์™œ๋„ (Skewness)", value=f"{skewness:.4f}")
            if skewness > 0.5:
                st.info("๊ผฌ๋ฆฌ๊ฐ€ ์˜ค๋ฅธ์ชฝ์œผ๋กœ ๊ธด ๋ถ„ํฌ (Positive Skew): ๋Œ€๋ถ€๋ถ„์˜ ํ•™์ƒ๋“ค์ด ํ‰๊ท ๋ณด๋‹ค ๋‚ฎ์€ ์ ์ˆ˜์— ๋ชฐ๋ ค์žˆ๊ณ , ์ผ๋ถ€ ๊ณ ๋“์ ์ž๋“ค์ด ํ‰๊ท ์„ ๋†’์ด๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.")
            elif skewness < -0.5:
                st.info("๊ผฌ๋ฆฌ๊ฐ€ ์™ผ์ชฝ์œผ๋กœ ๊ธด ๋ถ„ํฌ (Negative Skew): ๋Œ€๋ถ€๋ถ„์˜ ํ•™์ƒ๋“ค์ด ํ‰๊ท ๋ณด๋‹ค ๋†’์€ ์ ์ˆ˜์— ๋ชฐ๋ ค์žˆ๊ณ , ์ผ๋ถ€ ์ €๋“์ ์ž๋“ค์ด ํ‰๊ท ์„ ๋‚ฎ์ถ”๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.")
            else:
                st.info("๋Œ€์นญ์— ๊ฐ€๊นŒ์šด ๋ถ„ํฌ: ์ ์ˆ˜๊ฐ€ ํ‰๊ท ์„ ์ค‘์‹ฌ์œผ๋กœ ๋น„๊ต์  ๊ณ ๋ฅด๊ฒŒ ๋ถ„ํฌ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.")
        else:
            st.error(f"์˜ค๋ฅ˜: ์„ ํƒํ•˜์‹  '{score_column}' ์—ด์€ ์ˆซ์ž ๋ฐ์ดํ„ฐ๊ฐ€ ์•„๋‹™๋‹ˆ๋‹ค. ์ˆซ์ž ํ˜•์‹์˜ ์—ด์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”.")

def main():
    set_korean_font()
    st.title("ํ•™์ƒ ์ ์ˆ˜ ๋ถ„ํฌ ๋ถ„์„ ๋„๊ตฌ ๐Ÿ“Š")
    st.write("CSV ํŒŒ์ผ์„ ์ง์ ‘ ์—…๋กœ๋“œํ•˜๊ฑฐ๋‚˜ Google Sheets URL์„ ๋ถ™์—ฌ๋„ฃ์–ด ํ•™์ƒ ์ ์ˆ˜ ๋ถ„ํฌ๋ฅผ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")
    st.write("---")

    st.sidebar.title("๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ")
    source_option = st.sidebar.radio("๋ฐ์ดํ„ฐ ์†Œ์Šค๋ฅผ ์„ ํƒํ•˜์„ธ์š”:", ("Google Sheets URL", "CSV ํŒŒ์ผ ์—…๋กœ๋“œ"))
    
    df = None

    if source_option == "Google Sheets URL":
        url = st.sidebar.text_input("์›น์— ๊ฒŒ์‹œ๋œ Google Sheets CSV URL์„ ์ž…๋ ฅํ•˜์„ธ์š”.")
        if url:
            try:
                df = pd.read_csv(url)
            except Exception as e:
                st.error(f"URL๋กœ๋ถ€ํ„ฐ ๋ฐ์ดํ„ฐ๋ฅผ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}")
                st.warning("์˜ฌ๋ฐ”๋ฅธ Google Sheets '์›น ๊ฒŒ์‹œ' CSV URL์ธ์ง€ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
    
    elif source_option == "CSV ํŒŒ์ผ ์—…๋กœ๋“œ":
        uploaded_file = st.sidebar.file_uploader("CSV ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”.", type="csv")
        if uploaded_file:
            try:
                df = pd.read_csv(uploaded_file, encoding='utf-8-sig')
            except Exception as e:
                st.error(f"ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}")

    if df is not None:
        analyze_scores(df)
    else:
        st.info("์‚ฌ์ด๋“œ๋ฐ”์—์„œ ๋ฐ์ดํ„ฐ ์†Œ์Šค๋ฅผ ์„ ํƒํ•˜๊ณ  ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์™€์ฃผ์„ธ์š”.")

if __name__ == '__main__':
    main()