Rooobert commited on
Commit
f937ce4
·
verified ·
1 Parent(s): dd5d47e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -17
app.py CHANGED
@@ -44,12 +44,9 @@ def scrape_booking_hotel():
44
 
45
  for hotel in hotel_cards:
46
  try:
47
- name_elem = hotel.find('div', {'data-testid': 'title', 'class': 'f6431b446c'})
48
  name = name_elem.text.strip() if name_elem else "無資料"
49
- price_elem = hotel.find('span', {
50
- 'data-testid': 'price-and-discounted-price',
51
- 'class': 'f6431b446c'
52
- })
53
  price = price_elem.text.strip() if price_elem else "無資料"
54
  price = price.replace('TWD', '').replace(' ', '').replace(',', '').strip()
55
  rating_container = hotel.find('div', {'class': 'a3b8729ab1'})
@@ -57,9 +54,9 @@ def scrape_booking_hotel():
57
  rating = rating_elem.text.strip() if rating_elem else "無評分"
58
  description_elem = hotel.find('div', {'data-testid': 'recommended-units'})
59
  if description_elem:
60
- room_type = description_elem.find('h4', {'class': 'abf093bdfe'})
61
  room_type = room_type.text.strip() if room_type else ""
62
- bed_info = description_elem.find('div', {'class': 'abf093bdfe'})
63
  bed_info = bed_info.text.strip() if bed_info else ""
64
  cancellation = description_elem.find('strong', text='可免費取消')
65
  cancellation = "可免費取消" if cancellation else ""
@@ -89,15 +86,14 @@ def clean_rating(x):
89
 
90
  def create_price_rating_scatter(df):
91
  if df.empty:
92
- st.warning("沒有可視化的有效數據。請檢查輸入資料。")
93
- return None # 無數據時返回空
94
-
95
  fig = px.scatter(
96
  df,
97
  x='價格',
98
  y='評分',
99
  text='飯店名稱',
100
- size='價格', # 點的大小根據價格變化
101
  color='評分',
102
  title='台南飯店價格與評分關係圖',
103
  labels={'價格': '房價 (TWD)', '評分': '評分 (0-10)'}
@@ -106,6 +102,9 @@ def create_price_rating_scatter(df):
106
  return fig
107
 
108
  def create_price_distribution(df):
 
 
 
109
  fig = go.Figure()
110
  fig.add_trace(go.Histogram(
111
  x=df['價格'],
@@ -135,7 +134,7 @@ if mode == "資料爬取":
135
  if st.button("開始爬取"):
136
  df = scrape_booking_hotel()
137
  if not df.empty:
138
- st.dataframe(df)
139
  df.to_csv('booking_hotels_tainan.csv', index=False, encoding='utf-8-sig')
140
  st.success("資料爬取成功,已儲存至 booking_hotels_tainan.csv")
141
  else:
@@ -145,10 +144,8 @@ elif mode == "資料視覺化":
145
  st.header("分析與視覺化")
146
  try:
147
  df = pd.read_csv('booking_hotels_tainan.csv', encoding='utf-8-sig')
148
-
149
- # 清理數據
150
  df['價格'] = pd.to_numeric(df['價格'], errors='coerce') # 無效值轉為 NaN
151
- df['評分'] = df['評分'].apply(clean_rating) # 清理評分數據
152
  df = df.dropna(subset=['價格']) # 移除價格為 NaN 的行
153
 
154
  # 顯示數據摘要
@@ -156,8 +153,12 @@ elif mode == "資料視覺化":
156
  st.write(f"價格缺失值數量:{df['價格'].isna().sum()}")
157
 
158
  # 繪製圖表
159
- st.plotly_chart(create_price_rating_scatter(df))
160
- st.plotly_chart(create_price_distribution(df))
 
 
 
 
161
  except Exception as e:
162
  st.error(f"讀取或分析資料時發生錯誤:{e}")
163
 
 
44
 
45
  for hotel in hotel_cards:
46
  try:
47
+ name_elem = hotel.find('div', {'data-testid': 'title'})
48
  name = name_elem.text.strip() if name_elem else "無資料"
49
+ price_elem = hotel.find('span', {'data-testid': 'price-and-discounted-price'})
 
 
 
50
  price = price_elem.text.strip() if price_elem else "無資料"
51
  price = price.replace('TWD', '').replace(' ', '').replace(',', '').strip()
52
  rating_container = hotel.find('div', {'class': 'a3b8729ab1'})
 
54
  rating = rating_elem.text.strip() if rating_elem else "無評分"
55
  description_elem = hotel.find('div', {'data-testid': 'recommended-units'})
56
  if description_elem:
57
+ room_type = description_elem.find('h4')
58
  room_type = room_type.text.strip() if room_type else ""
59
+ bed_info = description_elem.find('div')
60
  bed_info = bed_info.text.strip() if bed_info else ""
61
  cancellation = description_elem.find('strong', text='可免費取消')
62
  cancellation = "可免費取消" if cancellation else ""
 
86
 
87
  def create_price_rating_scatter(df):
88
  if df.empty:
89
+ st.warning("數據為空,無法生成圖表。")
90
+ return None
 
91
  fig = px.scatter(
92
  df,
93
  x='價格',
94
  y='評分',
95
  text='飯店名稱',
96
+ size='價格', # 點大小基於價格
97
  color='評分',
98
  title='台南飯店價格與評分關係圖',
99
  labels={'價格': '房價 (TWD)', '評分': '評分 (0-10)'}
 
102
  return fig
103
 
104
  def create_price_distribution(df):
105
+ if df.empty:
106
+ st.warning("數據為空,無法生成圖表。")
107
+ return None
108
  fig = go.Figure()
109
  fig.add_trace(go.Histogram(
110
  x=df['價格'],
 
134
  if st.button("開始爬取"):
135
  df = scrape_booking_hotel()
136
  if not df.empty:
137
+ st.dataframe(df) # 顯示數據
138
  df.to_csv('booking_hotels_tainan.csv', index=False, encoding='utf-8-sig')
139
  st.success("資料爬取成功,已儲存至 booking_hotels_tainan.csv")
140
  else:
 
144
  st.header("分析與視覺化")
145
  try:
146
  df = pd.read_csv('booking_hotels_tainan.csv', encoding='utf-8-sig')
 
 
147
  df['價格'] = pd.to_numeric(df['價格'], errors='coerce') # 無效值轉為 NaN
148
+ df['評分'] = df['評分'].apply(clean_rating) # 清理評分
149
  df = df.dropna(subset=['價格']) # 移除價格為 NaN 的行
150
 
151
  # 顯示數據摘要
 
153
  st.write(f"價格缺失值數量:{df['價格'].isna().sum()}")
154
 
155
  # 繪製圖表
156
+ scatter_fig = create_price_rating_scatter(df)
157
+ if scatter_fig:
158
+ st.plotly_chart(scatter_fig)
159
+ dist_fig = create_price_distribution(df)
160
+ if dist_fig:
161
+ st.plotly_chart(dist_fig)
162
  except Exception as e:
163
  st.error(f"讀取或分析資料時發生錯誤:{e}")
164