Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +285 -180
src/streamlit_app.py
CHANGED
@@ -7,274 +7,379 @@ from scipy.stats import norm, skew
|
|
7 |
import platform
|
8 |
import os
|
9 |
import matplotlib.font_manager as fm
|
|
|
|
|
10 |
|
11 |
-
|
12 |
-
|
|
|
13 |
|
14 |
-
# matplotlib
|
|
|
|
|
|
|
15 |
plt.rcdefaults()
|
16 |
|
17 |
-
#
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
# ํฐํธ ๋งค๋์ ์บ์ ํด๋ฆฌ์ด
|
25 |
-
fm.fontManager.__init__()
|
26 |
-
# ํฐํธ ํ์ผ์ ์์คํ
์ ๋ฑ๋ก
|
27 |
-
fm.fontManager.addfont(font_path)
|
28 |
-
font_prop = fm.FontProperties(fname=font_path)
|
29 |
-
selected_font = font_prop.get_name()
|
30 |
-
st.sidebar.success(f"์ฌ์ฉ์ ํฐํธ '{selected_font}' ๋ก๋ฉ ์ฑ๊ณต!")
|
31 |
-
except Exception as e:
|
32 |
-
st.sidebar.warning(f"์ฌ์ฉ์ ํฐํธ ๋ก๋ฉ ์คํจ: {e}")
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
try:
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
48 |
continue
|
49 |
-
|
50 |
-
if korean_font_candidates:
|
51 |
-
selected_font = korean_font_candidates[0]
|
52 |
-
st.sidebar.info(f"์์คํ
ํ๊ธ ํฐํธ '{selected_font}' ๋ฐ๊ฒฌ!")
|
53 |
|
54 |
-
#
|
55 |
-
if not selected_font:
|
56 |
-
if platform.system() == 'Windows':
|
57 |
-
selected_font = 'Malgun Gothic'
|
58 |
-
elif platform.system() == 'Darwin': # macOS
|
59 |
-
selected_font = 'AppleGothic'
|
60 |
-
else: # Linux - ๋๋ํฐํธ๊ฐ ์์ผ๋ฉด ๊ธฐ๋ณธ Sans ์ฌ์ฉ
|
61 |
-
selected_font = 'sans-serif'
|
62 |
-
|
63 |
-
st.sidebar.warning(f"๊ธฐ๋ณธ ํฐํธ '{selected_font}' ์ฌ์ฉ")
|
64 |
-
|
65 |
-
# 4. matplotlib ์ค์ ๊ฐ์ ์ ์ฉ
|
66 |
plt.rcParams.update({
|
67 |
-
'font.family':
|
68 |
-
'font.sans-serif': [
|
69 |
'axes.unicode_minus': False,
|
70 |
-
'font.size':
|
|
|
|
|
|
|
|
|
71 |
})
|
72 |
|
73 |
-
#
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
st.sidebar.text(f"์ ์ฉ๋ ํฐํธ: {current_font}")
|
80 |
|
81 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
def analyze_scores(df):
|
84 |
"""๋ฐ์ดํฐํ๋ ์์ ๋ฐ์ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ํ์ํ๋ ํจ์"""
|
85 |
-
st.subheader("๋ฐ์ดํฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ (์์ 5๊ฐ)")
|
86 |
st.dataframe(df.head())
|
87 |
|
88 |
-
# ์ซ์ ํ์์ ์ด๋ง ์ ํ์ง๋ก
|
89 |
numeric_columns = df.select_dtypes(include=np.number).columns.tolist()
|
90 |
if not numeric_columns:
|
91 |
-
st.error("๋ฐ์ดํฐ์์ ๋ถ์ ๊ฐ๋ฅํ ์ซ์ ํ์์ ์ด์ ์ฐพ์ ์ ์์ต๋๋ค.")
|
92 |
return
|
93 |
|
94 |
-
score_column = st.selectbox("๋ถ์ํ ์ ์ ์ด(column)์ ์ ํํ์ธ์:", numeric_columns)
|
95 |
|
96 |
if score_column:
|
97 |
scores = df[score_column].dropna()
|
98 |
|
99 |
-
# ์ ํจํ ๋ฐ์ดํฐ๊ฐ ์๋์ง ํ์ธ
|
100 |
if len(scores) == 0:
|
101 |
-
st.error("์ ํํ ์ด์ ์ ํจํ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.")
|
102 |
return
|
103 |
|
104 |
-
st.subheader(f"'{score_column}' ์ ์ ๋ถํฌ ๋ถ์ ๊ฒฐ๊ณผ")
|
105 |
|
106 |
# 1. ๊ธฐ์ ํต๊ณ๋
|
107 |
-
st.write("####
|
108 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
-
# 2. ๋ถํฌ ์๊ฐํ
|
111 |
st.write("#### ๐จ ์ ์ ๋ถํฌ ์๊ฐํ")
|
112 |
|
113 |
-
# ๋งค๋ฒ ์๋ก์ด figure ์์ฑ ์ ํฐํธ ์ค์ ์ฌ์ ์ฉ
|
114 |
-
plt.rcParams.update({
|
115 |
-
'font.family': plt.rcParams.get('font.family', 'sans-serif'),
|
116 |
-
'axes.unicode_minus': False
|
117 |
-
})
|
118 |
-
|
119 |
-
fig, ax = plt.subplots(figsize=(12, 7))
|
120 |
-
|
121 |
try:
|
122 |
-
#
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
# ์ ๊ท๋ถํฌ ๊ณก์ ์ถ๊ฐ
|
126 |
mu, std = norm.fit(scores)
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
|
|
|
|
|
|
131 |
|
132 |
-
# ์ ๋ชฉ๊ณผ ๋ผ๋ฒจ
|
133 |
-
ax.set_title(f'{score_column} ์ ์ ๋ถํฌ ๋ถ์', fontsize=
|
134 |
-
ax.set_xlabel('์ ์', fontsize=
|
135 |
-
ax.set_ylabel('๋ฐ๋', fontsize=
|
136 |
-
ax.legend(fontsize=10)
|
137 |
-
ax.grid(True, alpha=0.3)
|
138 |
|
139 |
-
#
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
ax.text(0.02, 0.98, stats_text, transform=ax.transAxes,
|
142 |
-
fontsize=10, verticalalignment='top',
|
143 |
-
bbox=dict(boxstyle='round', facecolor='
|
144 |
|
145 |
plt.tight_layout()
|
146 |
st.pyplot(fig)
|
147 |
|
148 |
except Exception as e:
|
149 |
-
st.error(f"๊ทธ๋ํ ์์ฑ ์ค
|
150 |
-
|
|
|
|
|
151 |
fig2, ax2 = plt.subplots(figsize=(10, 6))
|
152 |
-
ax2.hist(scores, bins=
|
153 |
-
ax2.set_title('
|
154 |
ax2.set_xlabel('Score')
|
155 |
ax2.set_ylabel('Frequency')
|
|
|
156 |
st.pyplot(fig2)
|
157 |
plt.close(fig2)
|
158 |
finally:
|
159 |
-
|
|
|
160 |
|
161 |
-
# 3. ์๋
|
162 |
-
st.write("#### ๐
|
163 |
try:
|
164 |
skewness = skew(scores)
|
165 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
except Exception as e:
|
174 |
-
st.
|
175 |
|
176 |
def main():
|
177 |
st.set_page_config(
|
178 |
-
page_title="ํ์ ์ ์ ๋ถ์",
|
179 |
page_icon="๐",
|
180 |
-
layout="wide"
|
|
|
181 |
)
|
182 |
|
183 |
-
#
|
184 |
-
|
185 |
-
|
186 |
-
st.title("ํ์ ์ ์ ๋ถํฌ ๋ถ์ ๋๊ตฌ ๐")
|
187 |
-
st.write("CSV ํ์ผ์ ์ง์ ์
๋ก๋ํ๊ฑฐ๋ Google Sheets URL์ ๋ถ์ฌ๋ฃ์ด ํ์ ์ ์ ๋ถํฌ๋ฅผ ๋ถ์ํฉ๋๋ค.")
|
188 |
|
189 |
-
# ํฐํธ
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
-
st.
|
205 |
|
|
|
206 |
st.sidebar.title("๐ ๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ")
|
207 |
source_option = st.sidebar.radio(
|
208 |
-
"๋ฐ์ดํฐ
|
209 |
-
("CSV ํ์ผ ์
๋ก๋", "Google Sheets URL")
|
210 |
)
|
211 |
|
212 |
df = None
|
213 |
|
214 |
-
if source_option == "CSV ํ์ผ ์
๋ก๋":
|
215 |
uploaded_file = st.sidebar.file_uploader(
|
216 |
-
"CSV ํ์ผ์
|
217 |
type=["csv"],
|
218 |
-
help="UTF-8
|
219 |
)
|
220 |
if uploaded_file:
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
except Exception as e:
|
236 |
-
st.error(f"ํ์ผ์ ์ฝ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}")
|
237 |
|
238 |
-
elif source_option == "Google Sheets URL":
|
239 |
-
st.sidebar.info("Google Sheets๋ฅผ '์น์ ๊ฒ์'
|
240 |
-
sample_url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQ2Z8kzJq2sM7w2_9gXo-jZ-mO5o-BvC-w5p2nJ6oJ7oJ9xL-w3kZ9j5Z3kX7vN1aQ4mB1cW8jB7fR/pub?gid=0&single=true&output=csv"
|
241 |
url = st.sidebar.text_input(
|
242 |
"Google Sheets CSV URL",
|
243 |
-
value="",
|
244 |
placeholder="https://docs.google.com/spreadsheets/d/..."
|
245 |
)
|
246 |
|
247 |
-
if url:
|
248 |
-
|
249 |
-
st.
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
st.error(f"URL๋ก๋ถํฐ ๋ฐ์ดํฐ๋ฅผ ์ฝ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}")
|
257 |
-
st.warning("์ฌ๋ฐ๋ฅธ Google Sheets '์น ๊ฒ์' CSV URL์ธ์ง ํ์ธํด์ฃผ์ธ์.")
|
258 |
|
259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
if df is not None and not df.empty:
|
261 |
-
st.success(f"๋ฐ์ดํฐ ๋ก๋ฉ ์๋ฃ!
|
262 |
analyze_scores(df)
|
263 |
else:
|
264 |
-
st.info("๐
|
265 |
|
266 |
-
#
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
|
|
278 |
|
279 |
if __name__ == '__main__':
|
280 |
main()
|
|
|
7 |
import platform
|
8 |
import os
|
9 |
import matplotlib.font_manager as fm
|
10 |
+
import warnings
|
11 |
+
warnings.filterwarnings('ignore')
|
12 |
|
13 |
+
# ์ ์ญ ํฐํธ ์ค์ - ์ฑ ์์ ์ฆ์ ์คํ
|
14 |
+
def configure_matplotlib_korean():
|
15 |
+
"""matplotlib ํ๊ธ ํฐํธ ๊ฐ์ ์ค์ """
|
16 |
|
17 |
+
# matplotlib ๋ฐฑ์๋ ์ค์
|
18 |
+
plt.switch_backend('Agg')
|
19 |
+
|
20 |
+
# ๊ธฐ์กด ์ค์ ์์ ์ด๊ธฐํ
|
21 |
plt.rcdefaults()
|
22 |
|
23 |
+
# ํฐํธ ์บ์ ์์ ์ญ์ ๋ฐ ์ฌ๊ตฌ์ฑ
|
24 |
+
try:
|
25 |
+
fm._load_fontmanager(try_read_cache=False)
|
26 |
+
except:
|
27 |
+
pass
|
28 |
|
29 |
+
# ์ด์์ฒด์ ๋ณ ํ๊ธ ํฐํธ ๊ฒฝ๋ก ์ง์ ์ง์
|
30 |
+
korean_font_paths = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
if platform.system() == 'Windows':
|
33 |
+
korean_font_paths = [
|
34 |
+
'C:/Windows/Fonts/malgun.ttf',
|
35 |
+
'C:/Windows/Fonts/gulim.ttc',
|
36 |
+
'C:/Windows/Fonts/batang.ttc',
|
37 |
+
]
|
38 |
+
fallback_font = 'Malgun Gothic'
|
39 |
+
elif platform.system() == 'Darwin': # macOS
|
40 |
+
korean_font_paths = [
|
41 |
+
'/System/Library/Fonts/AppleGothic.ttf',
|
42 |
+
'/System/Library/Fonts/Helvetica.ttc',
|
43 |
+
]
|
44 |
+
fallback_font = 'AppleGothic'
|
45 |
+
else: # Linux
|
46 |
+
korean_font_paths = [
|
47 |
+
'/usr/share/fonts/truetype/nanum/NanumGothic.ttf',
|
48 |
+
'/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf',
|
49 |
+
]
|
50 |
+
fallback_font = 'DejaVu Sans'
|
51 |
+
|
52 |
+
# ์ฌ์ฉ์ ์ ์ ํฐํธ ํ์ผ ๊ฒฝ๋ก๋ ์ถ๊ฐ
|
53 |
+
user_font = os.path.join(os.getcwd(), "NanumGaRamYeonGgoc.ttf")
|
54 |
+
if os.path.exists(user_font):
|
55 |
+
korean_font_paths.insert(0, user_font)
|
56 |
+
|
57 |
+
selected_font_path = None
|
58 |
+
selected_font_name = fallback_font
|
59 |
+
|
60 |
+
# ์ค์ ์กด์ฌํ๋ ํฐํธ ํ์ผ ์ฐพ๊ธฐ
|
61 |
+
for font_path in korean_font_paths:
|
62 |
+
if os.path.exists(font_path):
|
63 |
try:
|
64 |
+
font_prop = fm.FontProperties(fname=font_path)
|
65 |
+
selected_font_name = font_prop.get_name()
|
66 |
+
selected_font_path = font_path
|
67 |
+
|
68 |
+
# ํฐํธ ๋งค๋์ ์ ๊ฐ์ ๋ฑ๋ก
|
69 |
+
fm.fontManager.addfont(font_path)
|
70 |
+
break
|
71 |
+
except Exception as e:
|
72 |
continue
|
|
|
|
|
|
|
|
|
73 |
|
74 |
+
# matplotlib rcParams ๊ฐ์ ์ค์
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
plt.rcParams.update({
|
76 |
+
'font.family': 'sans-serif',
|
77 |
+
'font.sans-serif': [selected_font_name, 'DejaVu Sans', 'Arial', 'sans-serif'],
|
78 |
'axes.unicode_minus': False,
|
79 |
+
'font.size': 12,
|
80 |
+
'figure.dpi': 100,
|
81 |
+
'savefig.dpi': 100,
|
82 |
+
'figure.facecolor': 'white',
|
83 |
+
'axes.facecolor': 'white'
|
84 |
})
|
85 |
|
86 |
+
# ์ ์ญ ํฐํธ ์์ฑ ๊ฐ์ฒด ์์ฑ
|
87 |
+
if selected_font_path:
|
88 |
+
global KOREAN_FONT_PROP
|
89 |
+
KOREAN_FONT_PROP = fm.FontProperties(fname=selected_font_path)
|
90 |
+
else:
|
91 |
+
KOREAN_FONT_PROP = fm.FontProperties(family=selected_font_name)
|
|
|
92 |
|
93 |
+
return selected_font_name, selected_font_path
|
94 |
+
|
95 |
+
# ์ฑ ์์ ์ ์ฆ์ ํฐํธ ์ค์
|
96 |
+
FONT_NAME, FONT_PATH = configure_matplotlib_korean()
|
97 |
+
KOREAN_FONT_PROP = None
|
98 |
+
|
99 |
+
def apply_korean_font_to_plot():
|
100 |
+
"""๊ฐ๋ณ ํ๋กฏ์ ํ๊ธ ํฐํธ ์ง์ ์ ์ฉ"""
|
101 |
+
if FONT_PATH and os.path.exists(FONT_PATH):
|
102 |
+
font_prop = fm.FontProperties(fname=FONT_PATH)
|
103 |
+
return font_prop
|
104 |
+
else:
|
105 |
+
return fm.FontProperties(family=FONT_NAME)
|
106 |
|
107 |
def analyze_scores(df):
|
108 |
"""๋ฐ์ดํฐํ๋ ์์ ๋ฐ์ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ํ์ํ๋ ํจ์"""
|
109 |
+
st.subheader("๐ ๋ฐ์ดํฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ (์์ 5๊ฐ)")
|
110 |
st.dataframe(df.head())
|
111 |
|
112 |
+
# ์ซ์ ํ์์ ์ด๋ง ์ ํ์ง๋ก ์ ๊ณต
|
113 |
numeric_columns = df.select_dtypes(include=np.number).columns.tolist()
|
114 |
if not numeric_columns:
|
115 |
+
st.error("โ ๋ฐ์ดํฐ์์ ๋ถ์ ๊ฐ๋ฅํ ์ซ์ ํ์์ ์ด์ ์ฐพ์ ์ ์์ต๋๋ค.")
|
116 |
return
|
117 |
|
118 |
+
score_column = st.selectbox("๐ ๋ถ์ํ ์ ์ ์ด(column)์ ์ ํํ์ธ์:", numeric_columns)
|
119 |
|
120 |
if score_column:
|
121 |
scores = df[score_column].dropna()
|
122 |
|
|
|
123 |
if len(scores) == 0:
|
124 |
+
st.error("โ ์ ํํ ์ด์ ์ ํจํ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.")
|
125 |
return
|
126 |
|
127 |
+
st.subheader(f"๐ '{score_column}' ์ ์ ๋ถํฌ ๋ถ์ ๊ฒฐ๊ณผ")
|
128 |
|
129 |
# 1. ๊ธฐ์ ํต๊ณ๋
|
130 |
+
st.write("#### ๐ ๊ธฐ๋ณธ ํต๊ณ๋")
|
131 |
+
col1, col2, col3, col4 = st.columns(4)
|
132 |
+
with col1:
|
133 |
+
st.metric("ํ๊ท ", f"{scores.mean():.2f}")
|
134 |
+
with col2:
|
135 |
+
st.metric("ํ์คํธ์ฐจ", f"{scores.std():.2f}")
|
136 |
+
with col3:
|
137 |
+
st.metric("์ต์๊ฐ", f"{scores.min():.2f}")
|
138 |
+
with col4:
|
139 |
+
st.metric("์ต๋๊ฐ", f"{scores.max():.2f}")
|
140 |
+
|
141 |
+
# ์์ธ ํต๊ณ
|
142 |
+
st.write("#### ๐ ์์ธ ํต๊ณ๋")
|
143 |
+
st.dataframe(scores.describe().to_frame().T)
|
144 |
|
145 |
+
# 2. ๋ถํฌ ์๊ฐํ - ๊ฐํ๋ ํ๊ธ ํฐํธ ์ ์ฉ
|
146 |
st.write("#### ๐จ ์ ์ ๋ถํฌ ์๊ฐํ")
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
try:
|
149 |
+
# ํฐํธ ์์ฑ ๊ฐ์ฒด ์์ฑ
|
150 |
+
korean_font = apply_korean_font_to_plot()
|
151 |
+
|
152 |
+
# Figure ์์ฑ ๋ฐ ์ค์
|
153 |
+
fig, ax = plt.subplots(figsize=(14, 8))
|
154 |
+
fig.patch.set_facecolor('white')
|
155 |
+
|
156 |
+
# ํ์คํ ๊ทธ๋จ ์์ฑ
|
157 |
+
n, bins, patches = ax.hist(scores, bins=20, density=True, alpha=0.7,
|
158 |
+
color='skyblue', edgecolor='navy', linewidth=0.8)
|
159 |
+
|
160 |
+
# KDE ๊ณก์ ์ถ๊ฐ
|
161 |
+
try:
|
162 |
+
from scipy.stats import gaussian_kde
|
163 |
+
kde = gaussian_kde(scores)
|
164 |
+
x_range = np.linspace(scores.min(), scores.max(), 200)
|
165 |
+
ax.plot(x_range, kde(x_range), 'orange', linewidth=3, label='์ค์ ๋ถํฌ ๊ณก์ ')
|
166 |
+
except:
|
167 |
+
pass
|
168 |
|
169 |
# ์ ๊ท๋ถํฌ ๊ณก์ ์ถ๊ฐ
|
170 |
mu, std = norm.fit(scores)
|
171 |
+
x_norm = np.linspace(scores.min(), scores.max(), 100)
|
172 |
+
y_norm = norm.pdf(x_norm, mu, std)
|
173 |
+
ax.plot(x_norm, y_norm, 'red', linewidth=2, linestyle='--',
|
174 |
+
label=f'์ ๊ท๋ถํฌ (ํ๊ท ={mu:.1f}, ํ์คํธ์ฐจ={std:.1f})')
|
175 |
+
|
176 |
+
# ํ๊ท ์ ์ถ๊ฐ
|
177 |
+
ax.axvline(mu, color='red', linestyle=':', linewidth=2, alpha=0.8, label=f'ํ๊ท : {mu:.1f}')
|
178 |
|
179 |
+
# ์ ๋ชฉ๊ณผ ๋ผ๋ฒจ - ํ๊ธ ํฐํธ ์ง์ ์ ์ฉ
|
180 |
+
ax.set_title(f'{score_column} ์ ์ ๋ถํฌ ๋ถ์', fontproperties=korean_font, fontsize=18, pad=20)
|
181 |
+
ax.set_xlabel('์ ์', fontproperties=korean_font, fontsize=14)
|
182 |
+
ax.set_ylabel('๋ฐ๋', fontproperties=korean_font, fontsize=14)
|
|
|
|
|
183 |
|
184 |
+
# ๋ฒ๋ก ์ค์
|
185 |
+
legend = ax.legend(prop=korean_font, fontsize=11, loc='upper right')
|
186 |
+
legend.get_frame().set_alpha(0.9)
|
187 |
+
|
188 |
+
# ๊ฒฉ์ ์ถ๊ฐ
|
189 |
+
ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5)
|
190 |
+
|
191 |
+
# ํต๊ณ ์ ๋ณด ๋ฐ์ค
|
192 |
+
stats_text = f'์ํ ์: {len(scores)}\nํ๊ท : {mu:.2f}\nํ์คํธ์ฐจ: {std:.2f}\n์ต์๊ฐ: {scores.min():.1f}\n์ต๋๊ฐ: {scores.max():.1f}'
|
193 |
ax.text(0.02, 0.98, stats_text, transform=ax.transAxes,
|
194 |
+
fontproperties=korean_font, fontsize=10, verticalalignment='top',
|
195 |
+
bbox=dict(boxstyle='round,pad=0.5', facecolor='lightblue', alpha=0.8))
|
196 |
|
197 |
plt.tight_layout()
|
198 |
st.pyplot(fig)
|
199 |
|
200 |
except Exception as e:
|
201 |
+
st.error(f"โ ์์ธ ๊ทธ๋ํ ์์ฑ ์ค ์ค๋ฅ: {e}")
|
202 |
+
|
203 |
+
# ๋์ฒด ๊ทธ๋ํ (์์ด๋ง ์ฌ์ฉ)
|
204 |
+
st.write("**Alternative Chart (English only):**")
|
205 |
fig2, ax2 = plt.subplots(figsize=(10, 6))
|
206 |
+
ax2.hist(scores, bins=15, alpha=0.7, color='lightcoral', edgecolor='black')
|
207 |
+
ax2.set_title(f'Distribution of {score_column}', fontsize=14)
|
208 |
ax2.set_xlabel('Score')
|
209 |
ax2.set_ylabel('Frequency')
|
210 |
+
ax2.grid(True, alpha=0.3)
|
211 |
st.pyplot(fig2)
|
212 |
plt.close(fig2)
|
213 |
finally:
|
214 |
+
if 'fig' in locals():
|
215 |
+
plt.close(fig)
|
216 |
|
217 |
+
# 3. ์๋ ๋ถ์
|
218 |
+
st.write("#### ๐ ๋ถํฌ ํํ ๋ถ์ (์๋)")
|
219 |
try:
|
220 |
skewness = skew(scores)
|
221 |
+
col1, col2 = st.columns([1, 2])
|
222 |
+
|
223 |
+
with col1:
|
224 |
+
st.metric("์๋ (Skewness)", f"{skewness:.4f}")
|
225 |
+
|
226 |
+
with col2:
|
227 |
+
if skewness > 0.5:
|
228 |
+
st.success("๐ด **์์ ์๋ (Right Skewed)**: ๋๋ถ๋ถ ํ์์ด ๋ฎ์ ์ ์๋์ ๋ถํฌํ๊ณ , ์์์ ๊ณ ๋์ ์๊ฐ ์กด์ฌํฉ๋๋ค.")
|
229 |
+
elif skewness < -0.5:
|
230 |
+
st.success("๐ต **์์ ์๋ (Left Skewed)**: ๋๋ถ๋ถ ํ์์ด ๋์ ์ ์๋์ ๋ถํฌํ๊ณ , ์์์ ์ ๋์ ์๊ฐ ์กด์ฌํฉ๋๋ค.")
|
231 |
+
else:
|
232 |
+
st.success("๐ข **๋์นญ ๋ถํฌ**: ์ ์๊ฐ ํ๊ท ์ ์ค์ฌ์ผ๋ก ๊ณ ๋ฅด๊ฒ ๋ถํฌ๋์ด ์์ต๋๋ค.")
|
233 |
+
|
234 |
+
except Exception as e:
|
235 |
+
st.error(f"์๋ ๊ณ์ฐ ์ค ์ค๋ฅ: {e}")
|
236 |
|
237 |
+
# 4. ์ถ๊ฐ ๋ถ์
|
238 |
+
st.write("#### ๐ ๊ตฌ๊ฐ๋ณ ๋ถํฌ")
|
239 |
+
|
240 |
+
# ์ ์ ๊ตฌ๊ฐ ๋๋๊ธฐ
|
241 |
+
if scores.max() <= 100: # 100์ ๋ง์ ๊ฐ์
|
242 |
+
bins_labels = ['0-60', '61-70', '71-80', '81-90', '91-100']
|
243 |
+
bins_edges = [0, 60, 70, 80, 90, 100]
|
244 |
+
else:
|
245 |
+
# ๋์ ๊ตฌ๊ฐ ์์ฑ
|
246 |
+
min_score, max_score = scores.min(), scores.max()
|
247 |
+
interval = (max_score - min_score) / 5
|
248 |
+
bins_edges = [min_score + i * interval for i in range(6)]
|
249 |
+
bins_labels = [f'{bins_edges[i]:.0f}-{bins_edges[i+1]:.0f}' for i in range(5)]
|
250 |
+
|
251 |
+
try:
|
252 |
+
score_counts = pd.cut(scores, bins=bins_edges, labels=bins_labels, include_lowest=True).value_counts().sort_index()
|
253 |
+
score_percentages = (score_counts / len(scores) * 100).round(1)
|
254 |
+
|
255 |
+
result_df = pd.DataFrame({
|
256 |
+
'๊ตฌ๊ฐ': score_counts.index,
|
257 |
+
'ํ์ ์': score_counts.values,
|
258 |
+
'๋น์จ (%)': score_percentages.values
|
259 |
+
})
|
260 |
+
st.dataframe(result_df)
|
261 |
+
|
262 |
except Exception as e:
|
263 |
+
st.warning(f"๊ตฌ๊ฐ ๋ถ์ ์ค ์ค๋ฅ: {e}")
|
264 |
|
265 |
def main():
|
266 |
st.set_page_config(
|
267 |
+
page_title="ํ์ ์ ์ ๋ถ์ ๋๊ตฌ",
|
268 |
page_icon="๐",
|
269 |
+
layout="wide",
|
270 |
+
initial_sidebar_state="expanded"
|
271 |
)
|
272 |
|
273 |
+
# ์ ๋ชฉ
|
274 |
+
st.title("๐ ํ์ ์ ์ ๋ถํฌ ๋ถ์ ๋๊ตฌ")
|
275 |
+
st.markdown("**CSV ํ์ผ์ ์
๋ก๋ํ๊ฑฐ๋ Google Sheets URL์ ์
๋ ฅํ์ฌ ์ ์ ๋ถํฌ๋ฅผ ๋ถ์ํ์ธ์**")
|
|
|
|
|
276 |
|
277 |
+
# ํฐํธ ์ ๋ณด ํ์
|
278 |
+
with st.expander("๐ง ํฐํธ ์ค์ ์ ๋ณด"):
|
279 |
+
st.write(f"**ํ์ฌ ํฐํธ**: {FONT_NAME}")
|
280 |
+
st.write(f"**ํฐํธ ๊ฒฝ๋ก**: {FONT_PATH if FONT_PATH else '์์คํ
๊ธฐ๋ณธ'}")
|
281 |
+
|
282 |
+
# ํฐํธ ํ
์คํธ
|
283 |
+
if st.button("ํฐํธ ํ
์คํธ"):
|
284 |
+
try:
|
285 |
+
test_fig, test_ax = plt.subplots(figsize=(8, 3))
|
286 |
+
korean_font = apply_korean_font_to_plot()
|
287 |
+
test_ax.text(0.5, 0.7, 'ํ๊ธ ํฐํธ ํ
์คํธ', ha='center', va='center',
|
288 |
+
fontproperties=korean_font, fontsize=16)
|
289 |
+
test_ax.text(0.5, 0.3, '์ ์ ๋ถํฌ ๋ถ์ ๊ทธ๋ํ', ha='center', va='center',
|
290 |
+
fontproperties=korean_font, fontsize=14)
|
291 |
+
test_ax.set_xlim(0, 1)
|
292 |
+
test_ax.set_ylim(0, 1)
|
293 |
+
test_ax.axis('off')
|
294 |
+
st.pyplot(test_fig)
|
295 |
+
plt.close(test_fig)
|
296 |
+
except Exception as e:
|
297 |
+
st.error(f"ํฐํธ ํ
์คํธ ์คํจ: {e}")
|
298 |
|
299 |
+
st.markdown("---")
|
300 |
|
301 |
+
# ์ฌ์ด๋๋ฐ - ๋ฐ์ดํฐ ์
๋ ฅ
|
302 |
st.sidebar.title("๐ ๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ")
|
303 |
source_option = st.sidebar.radio(
|
304 |
+
"๋ฐ์ดํฐ ์์ค ์ ํ:",
|
305 |
+
("๐ค CSV ํ์ผ ์
๋ก๋", "๐ Google Sheets URL", "๐ฒ ์ํ ๋ฐ์ดํฐ")
|
306 |
)
|
307 |
|
308 |
df = None
|
309 |
|
310 |
+
if source_option == "๐ค CSV ํ์ผ ์
๋ก๋":
|
311 |
uploaded_file = st.sidebar.file_uploader(
|
312 |
+
"CSV ํ์ผ์ ์ ํํ์ธ์",
|
313 |
type=["csv"],
|
314 |
+
help="UTF-8, CP949 ๋ฑ ๋ค์ํ ์ธ์ฝ๋ฉ์ ์๋์ผ๋ก ๊ฐ์งํฉ๋๋ค"
|
315 |
)
|
316 |
if uploaded_file:
|
317 |
+
encodings = ['utf-8-sig', 'utf-8', 'cp949', 'euc-kr', 'latin1']
|
318 |
+
for encoding in encodings:
|
319 |
+
try:
|
320 |
+
df = pd.read_csv(uploaded_file, encoding=encoding)
|
321 |
+
st.sidebar.success(f"โ
ํ์ผ ๋ก๋ฉ ์ฑ๊ณต! (์ธ์ฝ๋ฉ: {encoding})")
|
322 |
+
break
|
323 |
+
except UnicodeDecodeError:
|
324 |
+
continue
|
325 |
+
except Exception as e:
|
326 |
+
st.sidebar.error(f"ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ: {e}")
|
327 |
+
break
|
328 |
+
|
329 |
+
if df is None:
|
330 |
+
st.sidebar.error("โ ํ์ผ ์ธ์ฝ๋ฉ์ ์ธ์ํ ์ ์์ต๋๋ค.")
|
|
|
|
|
331 |
|
332 |
+
elif source_option == "๐ Google Sheets URL":
|
333 |
+
st.sidebar.info("๐ก Google Sheets๋ฅผ '์น์ ๊ฒ์'ํ ํ CSV URL์ ์
๋ ฅํ์ธ์")
|
|
|
334 |
url = st.sidebar.text_input(
|
335 |
"Google Sheets CSV URL",
|
|
|
336 |
placeholder="https://docs.google.com/spreadsheets/d/..."
|
337 |
)
|
338 |
|
339 |
+
if url and "docs.google.com" in url:
|
340 |
+
try:
|
341 |
+
with st.spinner("๐ฅ ๋ฐ์ดํฐ ๋ก๋ฉ ์ค..."):
|
342 |
+
df = pd.read_csv(url)
|
343 |
+
st.sidebar.success("โ
Google Sheets ๋ก๋ฉ ์ฑ๊ณต!")
|
344 |
+
except Exception as e:
|
345 |
+
st.sidebar.error(f"โ URL ๋ก๋ฉ ์คํจ: {e}")
|
346 |
+
elif url:
|
347 |
+
st.sidebar.warning("โ ๏ธ ์ฌ๋ฐ๋ฅธ Google Sheets URL์ ์
๋ ฅํ์ธ์")
|
|
|
|
|
348 |
|
349 |
+
elif source_option == "๐ฒ ์ํ ๋ฐ์ดํฐ":
|
350 |
+
if st.sidebar.button("์ํ ๋ฐ์ดํฐ ์์ฑ"):
|
351 |
+
np.random.seed(42)
|
352 |
+
sample_size = st.sidebar.slider("์ํ ํฌ๊ธฐ", 50, 500, 100)
|
353 |
+
|
354 |
+
df = pd.DataFrame({
|
355 |
+
'ํ์๋ฒํธ': range(1, sample_size + 1),
|
356 |
+
'์ํ์ ์': np.random.normal(75, 15, sample_size).clip(0, 100).round(1),
|
357 |
+
'์์ด์ ์': np.random.normal(80, 12, sample_size).clip(0, 100).round(1),
|
358 |
+
'๊ณผํ์ ์': np.random.normal(70, 18, sample_size).clip(0, 100).round(1),
|
359 |
+
'๊ตญ์ด์ ์': np.random.normal(77, 14, sample_size).clip(0, 100).round(1)
|
360 |
+
})
|
361 |
+
st.sidebar.success(f"โ
{sample_size}๋ช
์ ์ํ ๋ฐ์ดํฐ ์์ฑ!")
|
362 |
+
|
363 |
+
# ๋ฉ์ธ ๋ถ์
|
364 |
if df is not None and not df.empty:
|
365 |
+
st.success(f"๐ ๋ฐ์ดํฐ ๋ก๋ฉ ์๋ฃ! **{len(df)}๊ฐ ํ, {len(df.columns)}๊ฐ ์ด**")
|
366 |
analyze_scores(df)
|
367 |
else:
|
368 |
+
st.info("๐ **์ฌ์ด๋๋ฐ์์ ๋ฐ์ดํฐ๋ฅผ ์ ํํ์ธ์**")
|
369 |
|
370 |
+
# ๊ธฐ๋ฅ ์๋ด
|
371 |
+
st.markdown("""
|
372 |
+
### ๐ ์ฃผ์ ๊ธฐ๋ฅ
|
373 |
+
- **๐ ๊ธฐ๋ณธ ํต๊ณ**: ํ๊ท , ํ์คํธ์ฐจ, ์ต์๊ฐ, ์ต๋๊ฐ ๋ฑ
|
374 |
+
- **๐ ๋ถํฌ ์๊ฐํ**: ํ์คํ ๊ทธ๋จ, KDE ๊ณก์ , ์ ๊ท๋ถํฌ ๋น๊ต
|
375 |
+
- **๐ ์๋ ๋ถ์**: ๋ถํฌ์ ๋น๋์นญ์ฑ ์ธก์
|
376 |
+
- **๐ ๊ตฌ๊ฐ๋ณ ๋ถํฌ**: ์ ์ ๊ตฌ๊ฐ๋ณ ํ์ ์ ๋ฐ ๋น์จ
|
377 |
+
|
378 |
+
### ๐ ์ง์ ํ์
|
379 |
+
- **CSV ํ์ผ**: UTF-8, CP949, EUC-KR ๋ฑ ์๋ ์ธ์ฝ๋ฉ ๊ฐ์ง
|
380 |
+
- **Google Sheets**: ์น์ ๊ฒ์๋ ์ํธ์ CSV URL
|
381 |
+
- **์ํ ๋ฐ์ดํฐ**: ํ
์คํธ์ฉ ๊ฐ์ ์ ์ ๋ฐ์ดํฐ
|
382 |
+
""")
|
383 |
|
384 |
if __name__ == '__main__':
|
385 |
main()
|