Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -44,12 +44,9 @@ def scrape_booking_hotel():
|
|
44 |
|
45 |
for hotel in hotel_cards:
|
46 |
try:
|
47 |
-
name_elem = hotel.find('div', {'data-testid': 'title'
|
48 |
name = name_elem.text.strip() if name_elem else "無資料"
|
49 |
-
price_elem = hotel.find('span', {
|
50 |
-
'data-testid': 'price-and-discounted-price',
|
51 |
-
'class': 'f6431b446c'
|
52 |
-
})
|
53 |
price = price_elem.text.strip() if price_elem else "無資料"
|
54 |
price = price.replace('TWD', '').replace(' ', '').replace(',', '').strip()
|
55 |
rating_container = hotel.find('div', {'class': 'a3b8729ab1'})
|
@@ -57,9 +54,9 @@ def scrape_booking_hotel():
|
|
57 |
rating = rating_elem.text.strip() if rating_elem else "無評分"
|
58 |
description_elem = hotel.find('div', {'data-testid': 'recommended-units'})
|
59 |
if description_elem:
|
60 |
-
room_type = description_elem.find('h4'
|
61 |
room_type = room_type.text.strip() if room_type else ""
|
62 |
-
bed_info = description_elem.find('div'
|
63 |
bed_info = bed_info.text.strip() if bed_info else ""
|
64 |
cancellation = description_elem.find('strong', text='可免費取消')
|
65 |
cancellation = "可免費取消" if cancellation else ""
|
@@ -89,15 +86,14 @@ def clean_rating(x):
|
|
89 |
|
90 |
def create_price_rating_scatter(df):
|
91 |
if df.empty:
|
92 |
-
st.warning("
|
93 |
-
return None
|
94 |
-
|
95 |
fig = px.scatter(
|
96 |
df,
|
97 |
x='價格',
|
98 |
y='評分',
|
99 |
text='飯店名稱',
|
100 |
-
size='價格', #
|
101 |
color='評分',
|
102 |
title='台南飯店價格與評分關係圖',
|
103 |
labels={'價格': '房價 (TWD)', '評分': '評分 (0-10)'}
|
@@ -106,6 +102,9 @@ def create_price_rating_scatter(df):
|
|
106 |
return fig
|
107 |
|
108 |
def create_price_distribution(df):
|
|
|
|
|
|
|
109 |
fig = go.Figure()
|
110 |
fig.add_trace(go.Histogram(
|
111 |
x=df['價格'],
|
@@ -135,7 +134,7 @@ if mode == "資料爬取":
|
|
135 |
if st.button("開始爬取"):
|
136 |
df = scrape_booking_hotel()
|
137 |
if not df.empty:
|
138 |
-
st.dataframe(df)
|
139 |
df.to_csv('booking_hotels_tainan.csv', index=False, encoding='utf-8-sig')
|
140 |
st.success("資料爬取成功,已儲存至 booking_hotels_tainan.csv")
|
141 |
else:
|
@@ -145,10 +144,8 @@ elif mode == "資料視覺化":
|
|
145 |
st.header("分析與視覺化")
|
146 |
try:
|
147 |
df = pd.read_csv('booking_hotels_tainan.csv', encoding='utf-8-sig')
|
148 |
-
|
149 |
-
# 清理數據
|
150 |
df['價格'] = pd.to_numeric(df['價格'], errors='coerce') # 無效值轉為 NaN
|
151 |
-
df['評分'] = df['評分'].apply(clean_rating) #
|
152 |
df = df.dropna(subset=['價格']) # 移除價格為 NaN 的行
|
153 |
|
154 |
# 顯示數據摘要
|
@@ -156,8 +153,12 @@ elif mode == "資料視覺化":
|
|
156 |
st.write(f"價格缺失值數量:{df['價格'].isna().sum()}")
|
157 |
|
158 |
# 繪製圖表
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
|
|
161 |
except Exception as e:
|
162 |
st.error(f"讀取或分析資料時發生錯誤:{e}")
|
163 |
|
|
|
44 |
|
45 |
for hotel in hotel_cards:
|
46 |
try:
|
47 |
+
name_elem = hotel.find('div', {'data-testid': 'title'})
|
48 |
name = name_elem.text.strip() if name_elem else "無資料"
|
49 |
+
price_elem = hotel.find('span', {'data-testid': 'price-and-discounted-price'})
|
|
|
|
|
|
|
50 |
price = price_elem.text.strip() if price_elem else "無資料"
|
51 |
price = price.replace('TWD', '').replace(' ', '').replace(',', '').strip()
|
52 |
rating_container = hotel.find('div', {'class': 'a3b8729ab1'})
|
|
|
54 |
rating = rating_elem.text.strip() if rating_elem else "無評分"
|
55 |
description_elem = hotel.find('div', {'data-testid': 'recommended-units'})
|
56 |
if description_elem:
|
57 |
+
room_type = description_elem.find('h4')
|
58 |
room_type = room_type.text.strip() if room_type else ""
|
59 |
+
bed_info = description_elem.find('div')
|
60 |
bed_info = bed_info.text.strip() if bed_info else ""
|
61 |
cancellation = description_elem.find('strong', text='可免費取消')
|
62 |
cancellation = "可免費取消" if cancellation else ""
|
|
|
86 |
|
87 |
def create_price_rating_scatter(df):
|
88 |
if df.empty:
|
89 |
+
st.warning("數據為空,無法生成圖表。")
|
90 |
+
return None
|
|
|
91 |
fig = px.scatter(
|
92 |
df,
|
93 |
x='價格',
|
94 |
y='評分',
|
95 |
text='飯店名稱',
|
96 |
+
size='價格', # 點大小基於價格
|
97 |
color='評分',
|
98 |
title='台南飯店價格與評分關係圖',
|
99 |
labels={'價格': '房價 (TWD)', '評分': '評分 (0-10)'}
|
|
|
102 |
return fig
|
103 |
|
104 |
def create_price_distribution(df):
|
105 |
+
if df.empty:
|
106 |
+
st.warning("數據為空,無法生成圖表。")
|
107 |
+
return None
|
108 |
fig = go.Figure()
|
109 |
fig.add_trace(go.Histogram(
|
110 |
x=df['價格'],
|
|
|
134 |
if st.button("開始爬取"):
|
135 |
df = scrape_booking_hotel()
|
136 |
if not df.empty:
|
137 |
+
st.dataframe(df) # 顯示數據
|
138 |
df.to_csv('booking_hotels_tainan.csv', index=False, encoding='utf-8-sig')
|
139 |
st.success("資料爬取成功,已儲存至 booking_hotels_tainan.csv")
|
140 |
else:
|
|
|
144 |
st.header("分析與視覺化")
|
145 |
try:
|
146 |
df = pd.read_csv('booking_hotels_tainan.csv', encoding='utf-8-sig')
|
|
|
|
|
147 |
df['價格'] = pd.to_numeric(df['價格'], errors='coerce') # 無效值轉為 NaN
|
148 |
+
df['評分'] = df['評分'].apply(clean_rating) # 清理評分
|
149 |
df = df.dropna(subset=['價格']) # 移除價格為 NaN 的行
|
150 |
|
151 |
# 顯示數據摘要
|
|
|
153 |
st.write(f"價格缺失值數量:{df['價格'].isna().sum()}")
|
154 |
|
155 |
# 繪製圖表
|
156 |
+
scatter_fig = create_price_rating_scatter(df)
|
157 |
+
if scatter_fig:
|
158 |
+
st.plotly_chart(scatter_fig)
|
159 |
+
dist_fig = create_price_distribution(df)
|
160 |
+
if dist_fig:
|
161 |
+
st.plotly_chart(dist_fig)
|
162 |
except Exception as e:
|
163 |
st.error(f"讀取或分析資料時發生錯誤:{e}")
|
164 |
|