Spaces:

JUNGU
/

streamlit-example-vibe

Running

App Files Files Community

streamlit-example-vibe / src /streamlit_app.py

JUNGU

Update src/streamlit_app.py

43f1594 verified 20 days ago

raw

history blame

15.5 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from scipy.stats import norm, skew
	import platform
	import os
	import matplotlib.font_manager as fm
	import warnings
	warnings.filterwarnings('ignore')

	# 전역 폰트 설정 - 앱 시작 즉시 실행
	def configure_matplotlib_korean():
	"""matplotlib 한글 폰트 강제 설정"""

	# matplotlib 백엔드 설정
	plt.switch_backend('Agg')

	# 기존 설정 완전 초기화
	plt.rcdefaults()

	# 폰트 캐시 완전 삭제 및 재구성
	try:
	fm._load_fontmanager(try_read_cache=False)
	except:
	pass

	# 운영체제별 한글 폰트 경로 직접 지정
	korean_font_paths = []

	if platform.system() == 'Windows':
	korean_font_paths = [
	'C:/Windows/Fonts/malgun.ttf',
	'C:/Windows/Fonts/gulim.ttc',
	'C:/Windows/Fonts/batang.ttc',
	]
	fallback_font = 'Malgun Gothic'
	elif platform.system() == 'Darwin': # macOS
	korean_font_paths = [
	'/System/Library/Fonts/AppleGothic.ttf',
	'/System/Library/Fonts/Helvetica.ttc',
	]
	fallback_font = 'AppleGothic'
	else: # Linux
	korean_font_paths = [
	'/usr/share/fonts/truetype/nanum/NanumGothic.ttf',
	'/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf',
	]
	fallback_font = 'DejaVu Sans'

	# 사용자 정의 폰트 파일 경로도 추가
	user_font = os.path.join(os.getcwd(), "NanumGaRamYeonGgoc.ttf")
	if os.path.exists(user_font):
	korean_font_paths.insert(0, user_font)

	selected_font_path = None
	selected_font_name = fallback_font

	# 실제 존재하는 폰트 파일 찾기
	for font_path in korean_font_paths:
	if os.path.exists(font_path):
	try:
	font_prop = fm.FontProperties(fname=font_path)
	selected_font_name = font_prop.get_name()
	selected_font_path = font_path

	# 폰트 매니저에 강제 등록
	fm.fontManager.addfont(font_path)
	break
	except Exception as e:
	continue

	# matplotlib rcParams 강제 설정
	plt.rcParams.update({
	'font.family': 'sans-serif',
	'font.sans-serif': [selected_font_name, 'DejaVu Sans', 'Arial', 'sans-serif'],
	'axes.unicode_minus': False,
	'font.size': 12,
	'figure.dpi': 100,
	'savefig.dpi': 100,
	'figure.facecolor': 'white',
	'axes.facecolor': 'white'
	})

	# 전역 폰트 속성 객체 생성
	if selected_font_path:
	global KOREAN_FONT_PROP
	KOREAN_FONT_PROP = fm.FontProperties(fname=selected_font_path)
	else:
	KOREAN_FONT_PROP = fm.FontProperties(family=selected_font_name)

	return selected_font_name, selected_font_path

	# 앱 시작 시 즉시 폰트 설정
	FONT_NAME, FONT_PATH = configure_matplotlib_korean()
	KOREAN_FONT_PROP = None

	def apply_korean_font_to_plot():
	"""개별 플롯에 한글 폰트 직접 적용"""
	if FONT_PATH and os.path.exists(FONT_PATH):
	font_prop = fm.FontProperties(fname=FONT_PATH)
	return font_prop
	else:
	return fm.FontProperties(family=FONT_NAME)

	def analyze_scores(df):
	"""데이터프레임을 받아 분석 결과를 표시하는 함수"""
	st.subheader("📋 데이터 미리보기 (상위 5개)")
	st.dataframe(df.head())

	# 숫자 형식의 열만 선택지로 제공
	numeric_columns = df.select_dtypes(include=np.number).columns.tolist()
	if not numeric_columns:
	st.error("❌ 데이터에서 분석 가능한 숫자 형식의 열을 찾을 수 없습니다.")
	return

	score_column = st.selectbox("📊 분석할 점수 열(column)을 선택하세요:", numeric_columns)

	if score_column:
	scores = df[score_column].dropna()

	if len(scores) == 0:
	st.error("❌ 선택한 열에 유효한 데이터가 없습니다.")
	return

	st.subheader(f"📈 '{score_column}' 점수 분포 분석 결과")

	# 1. 기술 통계량
	st.write("#### 📊 기본 통계량")
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("평균", f"{scores.mean():.2f}")
	with col2:
	st.metric("표준편차", f"{scores.std():.2f}")
	with col3:
	st.metric("최솟값", f"{scores.min():.2f}")
	with col4:
	st.metric("최댓값", f"{scores.max():.2f}")

	# 상세 통계
	st.write("#### 📋 상세 통계량")
	st.dataframe(scores.describe().to_frame().T)

	# 2. 분포 시각화 - 강화된 한글 폰트 적용
	st.write("#### 🎨 점수 분포 시각화")

	try:
	# 폰트 속성 객체 생성
	korean_font = apply_korean_font_to_plot()

	# Figure 생성 및 설정
	fig, ax = plt.subplots(figsize=(14, 8))
	fig.patch.set_facecolor('white')

	# 히스토그램 생성
	n, bins, patches = ax.hist(scores, bins=20, density=True, alpha=0.7,
	color='skyblue', edgecolor='navy', linewidth=0.8)

	# KDE 곡선 추가
	try:
	from scipy.stats import gaussian_kde
	kde = gaussian_kde(scores)
	x_range = np.linspace(scores.min(), scores.max(), 200)
	ax.plot(x_range, kde(x_range), 'orange', linewidth=3, label='실제 분포 곡선')
	except:
	pass

	# 정규분포 곡선 추가
	mu, std = norm.fit(scores)
	x_norm = np.linspace(scores.min(), scores.max(), 100)
	y_norm = norm.pdf(x_norm, mu, std)
	ax.plot(x_norm, y_norm, 'red', linewidth=2, linestyle='--',
	label=f'정규분포 (평균={mu:.1f}, 표준편차={std:.1f})')

	# 평균선 추가
	ax.axvline(mu, color='red', linestyle=':', linewidth=2, alpha=0.8, label=f'평균: {mu:.1f}')

	# 제목과 라벨 - 한글 폰트 직접 적용
	ax.set_title(f'{score_column} 점수 분포 분석', fontproperties=korean_font, fontsize=18, pad=20)
	ax.set_xlabel('점수', fontproperties=korean_font, fontsize=14)
	ax.set_ylabel('밀도', fontproperties=korean_font, fontsize=14)

	# 범례 설정
	legend = ax.legend(prop=korean_font, fontsize=11, loc='upper right')
	legend.get_frame().set_alpha(0.9)

	# 격자 추가
	ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5)

	# 통계 정보 박스
	stats_text = f'샘플 수: {len(scores)}\n평균: {mu:.2f}\n표준편차: {std:.2f}\n최솟값: {scores.min():.1f}\n최댓값: {scores.max():.1f}'
	ax.text(0.02, 0.98, stats_text, transform=ax.transAxes,
	fontproperties=korean_font, fontsize=10, verticalalignment='top',
	bbox=dict(boxstyle='round,pad=0.5', facecolor='lightblue', alpha=0.8))

	plt.tight_layout()
	st.pyplot(fig)

	except Exception as e:
	st.error(f"❌ 상세 그래프 생성 중 오류: {e}")

	# 대체 그래프 (영어만 사용)
	st.write("Alternative Chart (English only):")
	fig2, ax2 = plt.subplots(figsize=(10, 6))
	ax2.hist(scores, bins=15, alpha=0.7, color='lightcoral', edgecolor='black')
	ax2.set_title(f'Distribution of {score_column}', fontsize=14)
	ax2.set_xlabel('Score')
	ax2.set_ylabel('Frequency')
	ax2.grid(True, alpha=0.3)
	st.pyplot(fig2)
	plt.close(fig2)
	finally:
	if 'fig' in locals():
	plt.close(fig)

	# 3. 왜도 분석
	st.write("#### 📐 분포 형태 분석 (왜도)")
	try:
	skewness = skew(scores)
	col1, col2 = st.columns([1, 2])

	with col1:
	st.metric("왜도 (Skewness)", f"{skewness:.4f}")

	with col2:
	if skewness > 0.5:
	st.success("🔴 양의 왜도 (Right Skewed): 대부분 학생이 낮은 점수대에 분포하고, 소수의 고득점자가 존재합니다.")
	elif skewness < -0.5:
	st.success("🔵 음의 왜도 (Left Skewed): 대부분 학생이 높은 점수대에 분포하고, 소수의 저득점자가 존재합니다.")
	else:
	st.success("🟢 대칭 분포: 점수가 평균을 중심으로 고르게 분포되어 있습니다.")

	except Exception as e:
	st.error(f"왜도 계산 중 오류: {e}")

	# 4. 추가 분석
	st.write("#### 📋 구간별 분포")

	# 점수 구간 나누기
	if scores.max() <= 100: # 100점 만점 가정
	bins_labels = ['0-60', '61-70', '71-80', '81-90', '91-100']
	bins_edges = [0, 60, 70, 80, 90, 100]
	else:
	# 동적 구간 생성
	min_score, max_score = scores.min(), scores.max()
	interval = (max_score - min_score) / 5
	bins_edges = [min_score + i * interval for i in range(6)]
	bins_labels = [f'{bins_edges[i]:.0f}-{bins_edges[i+1]:.0f}' for i in range(5)]

	try:
	score_counts = pd.cut(scores, bins=bins_edges, labels=bins_labels, include_lowest=True).value_counts().sort_index()
	score_percentages = (score_counts / len(scores) * 100).round(1)

	result_df = pd.DataFrame({
	'구간': score_counts.index,
	'학생 수': score_counts.values,
	'비율 (%)': score_percentages.values
	})
	st.dataframe(result_df)

	except Exception as e:
	st.warning(f"구간 분석 중 오류: {e}")

	def main():
	st.set_page_config(
	page_title="학생 점수 분석 도구",
	page_icon="📊",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# 제목
	st.title("📊 학생 점수 분포 분석 도구")
	st.markdown("CSV 파일을 업로드하거나 Google Sheets URL을 입력하여 점수 분포를 분석하세요")

	# 폰트 정보 표시
	with st.expander("🔧 폰트 설정 정보"):
	st.write(f"현재 폰트: {FONT_NAME}")
	st.write(f"폰트 경로: {FONT_PATH if FONT_PATH else '시스템 기본'}")

	# 폰트 테스트
	if st.button("폰트 테스트"):
	try:
	test_fig, test_ax = plt.subplots(figsize=(8, 3))
	korean_font = apply_korean_font_to_plot()
	test_ax.text(0.5, 0.7, '한글 폰트 테스트', ha='center', va='center',
	fontproperties=korean_font, fontsize=16)
	test_ax.text(0.5, 0.3, '점수 분포 분석 그래프', ha='center', va='center',
	fontproperties=korean_font, fontsize=14)
	test_ax.set_xlim(0, 1)
	test_ax.set_ylim(0, 1)
	test_ax.axis('off')
	st.pyplot(test_fig)
	plt.close(test_fig)
	except Exception as e:
	st.error(f"폰트 테스트 실패: {e}")

	st.markdown("---")

	# 사이드바 - 데이터 입력
	st.sidebar.title("📁 데이터 가져오기")
	source_option = st.sidebar.radio(
	"데이터 소스 선택:",
	("📤 CSV 파일 업로드", "🔗 Google Sheets URL", "🎲 샘플 데이터")
	)

	df = None

	if source_option == "📤 CSV 파일 업로드":
	uploaded_file = st.sidebar.file_uploader(
	"CSV 파일을 선택하세요",
	type=["csv"],
	help="UTF-8, CP949 등 다양한 인코딩을 자동으로 감지합니다"
	)
	if uploaded_file:
	encodings = ['utf-8-sig', 'utf-8', 'cp949', 'euc-kr', 'latin1']
	for encoding in encodings:
	try:
	df = pd.read_csv(uploaded_file, encoding=encoding)
	st.sidebar.success(f"✅ 파일 로딩 성공! (인코딩: {encoding})")
	break
	except UnicodeDecodeError:
	continue
	except Exception as e:
	st.sidebar.error(f"파일 읽기 오류: {e}")
	break

	if df is None:
	st.sidebar.error("❌ 파일 인코딩을 인식할 수 없습니다.")

	elif source_option == "🔗 Google Sheets URL":
	st.sidebar.info("💡 Google Sheets를 '웹에 게시'한 후 CSV URL을 입력하세요")
	url = st.sidebar.text_input(
	"Google Sheets CSV URL",
	placeholder="https://docs.google.com/spreadsheets/d/..."
	)

	if url and "docs.google.com" in url:
	try:
	with st.spinner("📥 데이터 로딩 중..."):
	df = pd.read_csv(url)
	st.sidebar.success("✅ Google Sheets 로딩 성공!")
	except Exception as e:
	st.sidebar.error(f"❌ URL 로딩 실패: {e}")
	elif url:
	st.sidebar.warning("⚠️ 올바른 Google Sheets URL을 입력하세요")

	elif source_option == "🎲 샘플 데이터":
	if st.sidebar.button("샘플 데이터 생성"):
	np.random.seed(42)
	sample_size = st.sidebar.slider("샘플 크기", 50, 500, 100)

	df = pd.DataFrame({
	'학생번호': range(1, sample_size + 1),
	'수학점수': np.random.normal(75, 15, sample_size).clip(0, 100).round(1),
	'영어점수': np.random.normal(80, 12, sample_size).clip(0, 100).round(1),
	'과학점수': np.random.normal(70, 18, sample_size).clip(0, 100).round(1),
	'국어점수': np.random.normal(77, 14, sample_size).clip(0, 100).round(1)
	})
	st.sidebar.success(f"✅ {sample_size}명의 샘플 데이터 생성!")

	# 메인 분석
	if df is not None and not df.empty:
	st.success(f"🎉 데이터 로딩 완료! {len(df)}개 행, {len(df.columns)}개 열")
	analyze_scores(df)
	else:
	st.info("👈 사이드바에서 데이터를 선택하세요")

	# 기능 안내
	st.markdown("""
	### 🔍 주요 기능
	- 📊 기본 통계: 평균, 표준편차, 최솟값, 최댓값 등
	- 📈 분포 시각화: 히스토그램, KDE 곡선, 정규분포 비교
	- 📐 왜도 분석: 분포의 비대칭성 측정
	- 📋 구간별 분포: 점수 구간별 학생 수 및 비율

	### 📝 지원 형식
	- CSV 파일: UTF-8, CP949, EUC-KR 등 자동 인코딩 감지
	- Google Sheets: 웹에 게시된 시트의 CSV URL
	- 샘플 데이터: 테스트용 가상 점수 데이터
	""")

	if __name__ == '__main__':
	main()