Rooobert's picture
Update app.py
1935af6 verified
raw
history blame
22.6 kB
import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np
from datetime import datetime
from dataclasses import dataclass, field
from typing import Dict, List, Tuple, Any
# 📥 讀取 Google 試算表函數
def read_google_sheet(sheet_id, sheet_number=0):
"""📥 從 Google Sheets 讀取數據"""
url = f'https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={sheet_number}'
try:
df = pd.read_csv(url)
return df
except Exception as e:
st.error(f"❌ 讀取失敗:{str(e)}")
return None
# 📊 Google Sheets ID
sheet_id = "1Wc15DZWq48MxL7nXAsROJ6sRvH5njSa1ea0aaOGUOVk"
gid = "1168424766"
@dataclass
class SurveyMappings:
"""📋 問卷數據對應"""
gender: Dict[str, int] = field(default_factory=lambda: {'男性': 1, '女性': 2})
education: Dict[str, int] = field(default_factory=lambda: {
'國小(含)以下': 1, '國/初中': 2, '高中/職': 3, '專科': 4, '大學': 5, '研究所(含)以上': 6})
frequency: Dict[str, int] = field(default_factory=lambda: {
'第1次': 1, '2-3次': 2, '4-6次': 3, '6次以上': 4, '經常來學習,忘記次數了': 5})
class SurveyAnalyzer:
"""📊 問卷分析類"""
def __init__(self):
self.mappings = SurveyMappings()
self.satisfaction_columns = [
'1. 示範場域提供多元的數位課程與活動',
'2.示範場域的數位課程與活動對我的生活應用有幫助',
'3. 示範場域的服務人員親切有禮貌',
'4.示範場域的服務空間與數位設備友善方便',
'5.在示範場域可以獲得需要的協助',
'6.對於示範場域的服務感到滿意'
]
self.satisfaction_short_names = [
'多元課程與活動',
'生活應用有幫助',
'服務人員親切',
'空間設備友善',
'獲得需要協助',
'整體服務滿意'
]
def calculate_age(self, birth_year_column):
"""🔢 計算年齡(從民國年到實際年齡)"""
# 獲取當前年份(西元年)
current_year = datetime.now().year
# 將 NaN 或無效值處理為 NaN
birth_years = pd.to_numeric(birth_year_column, errors='coerce')
# 民國年份轉西元年份 (民國年+1911=西元年)
western_years = birth_years + 1911
# 計算年齡
ages = current_year - western_years
return ages
def generate_report(self, df: pd.DataFrame) -> Dict[str, Any]:
"""📝 生成問卷調查報告"""
# 計算年齡
ages = self.calculate_age(df['2.出生年(民國__年)'])
# 取得教育程度分布(帶計數單位)
education_counts = df['3.教育程度'].value_counts().to_dict()
education_with_counts = {k: f"{v}人" for k, v in education_counts.items()}
# 性別分布(帶計數單位)
gender_counts = df['1. 性別'].value_counts().to_dict()
gender_with_counts = {k: f"{v}人" for k, v in gender_counts.items()}
# 計算每個滿意度項目的平均分數和標準差
satisfaction_stats = {}
for i, col in enumerate(self.satisfaction_columns):
mean_score = df[col].mean()
std_dev = df[col].std()
satisfaction_stats[self.satisfaction_short_names[i]] = {
'平均分數': f"{mean_score:.2f}",
'標準差': f"{std_dev:.2f}"
}
return {
'基本統計': {
'總受訪人數': len(df),
'性別分布': gender_with_counts,
'教育程度分布': education_with_counts,
'平均年齡': f"{ages.mean():.1f}歲"
},
'滿意度統計': {
'整體平均滿意度': f"{df[self.satisfaction_columns].mean().mean():.2f}",
'各項滿意度': satisfaction_stats
}
}
def plot_satisfaction_scores(self, df: pd.DataFrame, venues=None, month=None, age_range=None):
"""📊 各項滿意度平均分數圖表 - 美化版"""
# 過濾數據
filtered_df = df.copy()
if venues and '全部' not in venues:
filtered_df = filtered_df[filtered_df['場域名稱'].isin(venues)]
if month and month != '全部':
# 假設有一個月份欄位,如果沒有請調整
filtered_df = filtered_df[filtered_df['月份'] == month]
# 年齡篩選
if age_range:
ages = self.calculate_age(filtered_df['2.出生年(民國__年)'])
age_mask = (ages >= age_range[0]) & (ages <= age_range[1])
filtered_df = filtered_df[age_mask]
# 計算過濾後數據的平均和標準差
satisfaction_means = [filtered_df[col].mean() for col in self.satisfaction_columns]
satisfaction_stds = [filtered_df[col].std() for col in self.satisfaction_columns]
# 創建數據框
satisfaction_df = pd.DataFrame({
'滿意度項目': self.satisfaction_short_names,
'平均分數': satisfaction_means,
'標準差': satisfaction_stds
})
# 排序結果(可選)
satisfaction_df = satisfaction_df.sort_values(by='平均分數', ascending=False)
# 建立顏色漸變映射
color_scale = [
[0, '#90CAF9'], # 淺藍色
[0.5, '#2196F3'], # 中藍色
[1, '#1565C0'] # 深藍色
]
# 繪製條形圖
fig = px.bar(
satisfaction_df,
x='滿意度項目',
y='平均分數',
error_y='標準差',
title='📊 各項滿意度平均分數與標準差分析',
color='平均分數',
color_continuous_scale=color_scale,
text='平均分數',
hover_data={
'滿意度項目': True,
'平均分數': ':.2f',
'標準差': ':.2f'
}
)
# 調整圖表佈局
fig.update_layout(
font=dict(family="Arial", size=16),
title_font=dict(family="Arial Black", size=24),
title_x=0.5, # 標題置中
xaxis_title="滿意度項目",
yaxis_title="平均分數",
yaxis_range=[0, 5], # 評分範圍從0開始,視覺上更明顯
plot_bgcolor='rgba(240,240,240,0.8)', # 淺灰色背景
paper_bgcolor='white',
xaxis_tickangle=-25, # 斜角標籤,避免重疊
margin=dict(l=40, r=40, t=80, b=60),
legend_title_text="平均分數",
shapes=[
# 添加參考線 - 例如4分
dict(
type='line',
yref='y', y0=4, y1=4,
xref='paper', x0=0, x1=1,
line=dict(color='rgba(220,20,60,0.5)', width=2, dash='dash')
)
],
annotations=[
# 參考線標籤
dict(
x=0.02, y=4.1,
xref='paper', yref='y',
text='優良標準 (4分)',
showarrow=False,
font=dict(size=14, color='rgba(220,20,60,0.8)')
)
]
)
# 調整文字格式
fig.update_traces(
texttemplate='%{y:.2f}',
textposition='outside',
marker_line_color='rgb(8,48,107)',
marker_line_width=1.5,
opacity=0.85
)
# 添加受訪人數標註
num_respondents = len(filtered_df)
fig.add_annotation(
x=0.5, y=0,
xref='paper', yref='paper',
text=f'受訪人數: {num_respondents}人',
showarrow=False,
font=dict(size=16),
bgcolor='rgba(255,255,255,0.8)',
bordercolor='rgba(0,0,0,0.2)',
borderwidth=1,
borderpad=4,
y=-0.2
)
st.plotly_chart(fig, use_container_width=True)
def plot_gender_distribution(self, df: pd.DataFrame, venues=None, month=None, age_range=None):
"""🟠 性別分佈圓餅圖 - 增強精緻版"""
# 過濾數據
filtered_df = df.copy()
if venues and '全部' not in venues:
filtered_df = filtered_df[filtered_df['場域名稱'].isin(venues)]
if month and month != '全部':
# 假設有一個月份欄位,如果沒有請調整
filtered_df = filtered_df[filtered_df['月份'] == month]
# 年齡篩選
if age_range:
ages = self.calculate_age(filtered_df['2.出生年(民國__年)'])
age_mask = (ages >= age_range[0]) & (ages <= age_range[1])
filtered_df = filtered_df[age_mask]
# 取得性別資料
gender_counts = filtered_df['1. 性別'].value_counts().reset_index()
gender_counts.columns = ['性別', '人數']
# 計算百分比
total = gender_counts['人數'].sum()
gender_counts['百分比'] = (gender_counts['人數'] / total * 100).round(1)
gender_counts['標籤'] = gender_counts.apply(lambda x: f"{x['性別']}: {x['人數']}人 ({x['百分比']}%)", axis=1)
# 獲取篩選條件說明
filter_description = []
if venues and '全部' not in venues:
filter_description.append(f"場域: {', '.join(venues)}")
if month and month != '全部':
filter_description.append(f"月份: {month}")
if age_range and (age_range[0] != min(self.calculate_age(df['2.出生年(民國__年)'])) or
age_range[1] != max(self.calculate_age(df['2.出生年(民國__年)']))):
filter_description.append(f"年齡: {age_range[0]}-{age_range[1]}歲")
filter_text = "(" + ", ".join(filter_description) + ")" if filter_description else ""
# 設定顏色映射 - 男性藍色,女性紅色 - 使用更精緻的顏色
color_map = {'男性': '#1976D2', '女性': '#D32F2F'}
# 建立子圖佈局以添加更多自定義元素
fig = px.pie(
gender_counts,
names='性別',
values='人數',
title=f'👥 受訪者性別分布{filter_text}',
color='性別',
color_discrete_map=color_map,
hover_data=['人數', '百分比'],
labels={'人數': '人數', '百分比': '百分比'},
custom_data=['標籤']
)
# 更新圖表佈局
fig.update_layout(
font=dict(family="Arial", size=16),
title_font=dict(family="Arial Black", size=24),
title_x=0.5, # 標題置中
legend_title_text="性別",
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.2,
xanchor="center",
x=0.5,
font=dict(size=16),
bordercolor="#E0E0E0",
borderwidth=2
),
margin=dict(l=20, r=20, t=80, b=100),
paper_bgcolor='white',
annotations=[
dict(
text=f"總受訪人數: {total}人",
x=0.5, y=-0.3,
xref="paper",
yref="paper",
showarrow=False,
font=dict(size=16, color="#616161")
)
]
)
# 添加男女比例標籤
male_count = gender_counts.loc[gender_counts['性別'] == '男性', '人數'].values[0] if '男性' in gender_counts['性別'].values else 0
female_count = gender_counts.loc[gender_counts['性別'] == '女性', '人數'].values[0] if '女性' in gender_counts['性別'].values else 0
# 計算男女比例
if male_count > 0 and female_count > 0:
ratio = round(male_count / female_count, 2)
ratio_text = f"男女比例 = {ratio}:1"
elif male_count > 0 and female_count == 0:
ratio_text = "僅有男性"
elif female_count > 0 and male_count == 0:
ratio_text = "僅有女性"
else:
ratio_text = "無性別數據"
fig.add_annotation(
text=ratio_text,
x=0.5, y=-0.15,
xref="paper",
yref="paper",
showarrow=False,
font=dict(size=16, color="#424242", family="Arial Bold")
)
# 更新懸停資訊
fig.update_traces(
textinfo='percent+label',
hovertemplate='%{customdata[0]}',
textfont_size=16,
marker=dict(line=dict(color='#FFFFFF', width=2)),
pull=[0.03, 0.03], # 稍微分離餅圖片段
rotation=45 # 旋轉角度
)
st.plotly_chart(fig, use_container_width=True)
# 在圓餅圖下方添加簡單分析
st.markdown("""
<div style="background-color:#F5F5F5; padding:15px; border-radius:10px; margin-top:10px; border-left:5px solid #1976D2;">
<h4 style="color:#1976D2;">📊 性別分佈簡易分析</h4>
""", unsafe_allow_html=True)
# 生成簡單分析文字
if total > 0:
majority_gender = '男性' if male_count > female_count else '女性' if female_count > male_count else '男女相等'
majority_pct = max(male_count, female_count) / total * 100 if male_count != female_count else 50
if male_count != female_count:
st.markdown(f"""
<p>本次調查中,<strong>{majority_gender}</strong>佔多數,約佔總體的<strong>{majority_pct:.1f}%</strong>。</p>
""", unsafe_allow_html=True)
else:
st.markdown("<p>本次調查中,男女比例相等,各佔50%。</p>", unsafe_allow_html=True)
else:
st.markdown("<p>目前沒有足夠的性別數據進行分析。</p>", unsafe_allow_html=True)
st.markdown("</div>", unsafe_allow_html=True)
# 🎨 Streamlit UI
def main():
st.set_page_config(
page_title="數位示範場域問卷調查分析",
layout="wide",
initial_sidebar_state="expanded"
)
# 自定義CSS樣式
st.markdown("""
<style>
.main-header {
font-size: 42px;
font-weight: bold;
color: #1E88E5;
text-align: center;
margin-bottom: 10px;
padding-bottom: 15px;
border-bottom: 2px solid #e0e0e0;
}
.sub-header {
font-size: 24px;
color: #424242;
text-align: center;
margin-bottom: 30px;
}
.unit-name {
font-size: 28px;
font-weight: bold;
color: #1565C0;
text-align: center;
padding: 10px;
background-color: #E3F2FD;
border-radius: 8px;
margin: 20px 0;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.card {
padding: 20px;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
margin-bottom: 20px;
background-color: white;
}
</style>
""", unsafe_allow_html=True)
# 主標題與副標題
st.markdown('<div class="main-header">📊 數位示範場域問卷調查分析報告</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-header">本報告提供全面的問卷調查分析與視覺化圖表,協助了解民眾滿意度與使用者特性</div>', unsafe_allow_html=True)
# 讀取數據
df = read_google_sheet(sheet_id, gid)
if df is not None:
analyzer = SurveyAnalyzer()
# 設置單位名稱(假設有「單位名稱」欄位,若無則顯示預設值)
unit_name = "數位示範場域滿意度調查中心"
if '單位名稱' in df.columns and not df['單位名稱'].isnull().all():
unit_names = df['單位名稱'].unique()
if len(unit_names) == 1:
unit_name = unit_names[0]
# 顯示單位名稱
st.markdown(f'<div class="unit-name">{unit_name}</div>', unsafe_allow_html=True)
# 新增場域和月份篩選器(使用更美觀的設計)
st.sidebar.markdown("### 🔍 **數據篩選**")
st.sidebar.markdown("---")
# 場域選擇
if '場域名稱' in df.columns:
venues = ['全部'] + sorted(df['場域名稱'].unique().tolist())
else:
# 如果沒有場域欄位,創建10個虛擬場域供選擇
venue_names = [
"臺北數位樂學園", "新北創新學院", "桃園智慧中心",
"臺中數位學苑", "臺南創客基地", "高雄創新園區",
"宜蘭數位中心", "花蓮創新基地", "臺東學習中心", "金門數位樂園"
]
venues = ['全部'] + venue_names
selected_venues = st.sidebar.multiselect(
"📍 **選擇場域**",
venues,
default=['全部'],
help="可選擇多個場域進行數據分析比較"
)
# 月份選擇
if '月份' in df.columns:
months = ['全部'] + sorted(df['月份'].unique().tolist())
else:
# 如果沒有月份欄位,可以創建虛擬月份選項
current_year = datetime.now().year
months = ['全部'] + [f'{current_year}{i+1}月' for i in range(12)]
selected_month = st.sidebar.selectbox(
"📅 **選擇月份**",
months,
help="選擇特定月份查看數據趨勢"
)
# 年齡區間篩選
st.sidebar.markdown("### 📊 **年齡區間篩選**")
ages = analyzer.calculate_age(df['2.出生年(民國__年)'])
min_age, max_age = int(ages.min()), int(ages.max())
age_range = st.sidebar.slider(
"選擇年齡範圍",
min_age,
max_age,
(min_age, max_age),
help="拖曳調整以篩選特定年齡區間的受訪者"
)
# 📌 基本統計數據
st.sidebar.header("📌 選擇數據分析")
selected_analysis = st.sidebar.radio("選擇要查看的分析",
["📋 問卷統計報告", "📊 滿意度統計", "🟠 性別分佈"])
# 應用所有篩選條件
filtered_df = df.copy()
if selected_venues and '全部' not in selected_venues:
if '場域名稱' in filtered_df.columns:
filtered_df = filtered_df[filtered_df['場域名稱'].isin(selected_venues)]
if selected_month and selected_month != '全部':
if '月份' in filtered_df.columns:
filtered_df = filtered_df[filtered_df['月份'] == selected_month]
# 年齡篩選
if age_range:
ages = analyzer.calculate_age(filtered_df['2.出生年(民國__年)'])
age_mask = (ages >= age_range[0]) & (ages <= age_range[1])
filtered_df = filtered_df[age_mask]
# 顯示當前選擇的篩選器
filter_status = []
if selected_venues and '全部' not in selected_venues:
filter_status.append(f"📍 場域: {', '.join(selected_venues)}")
if selected_month and selected_month != '全部':
filter_status.append(f"📅 月份: {selected_month}")
if age_range and (age_range[0] != min(analyzer.calculate_age(df['2.出生年(民國__年)'])) or
age_range[1] != max(analyzer.calculate_age(df['2.出生年(民國__年)']))):
filter_status.append(f"👥 年齡: {age_range[0]}-{age_range[1]}歲")
if filter_status:
st.markdown("""
<div style="background-color:#E3F2FD; padding:10px; border-radius:8px; margin-bottom:20px; border-left:4px solid #1976D2;">
<h4 style="margin-bottom:10px; color:#1565C0;">🔍 當前篩選條件</h4>
""", unsafe_allow_html=True)
for status in filter_status:
st.markdown(f"<p style='margin:5px 0;'>{status}</p>", unsafe_allow_html=True)
# 顯示篩選後的樣本數
st.markdown(f"""
<p style='margin-top:10px; font-weight:bold;'>📊 篩選後樣本數: {len(filtered_df)}人</p>
</div>
""", unsafe_allow_html=True)
# 數據分析區塊
if selected_analysis == "📋 問卷統計報告":
st.markdown('<h2 style="color:#1976D2;">📋 問卷統計報告</h2>', unsafe_allow_html=True)
# 生成目前篩選條件下的報告
report = analyzer.generate_report(filtered_df)
# 使用卡片樣式顯示統計信息
col1, col2 = st.columns(2)
with col1:
st.markdown('<div class="card">', unsafe_allow_html=True)
st.markdown('<h3 style="color:#1976D2; border-bottom:1px solid #e0e0e0; padding-bottom:10px;">📊 基本統計數據</h3>', unsafe_allow_html=True)
for key, value in report['基本統計'].items():
if isinstance(value, dict):
st.markdown(f"<p><strong>{key}:</strong></p>", unsafe_allow_html=True)
for k, v in value.items():
st.markdown(f"<p style='margin-left:20px;'>- {k}: {v}</p>", unsafe_allow_html=True)
else:
st.markdown(f"<p><strong>{key}:</strong> {value}</p>", unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
with col2:
st.markdown('<div class="card">', unsafe_allow_html=True)
if __name__ == "__main__":
main()