Rooobert commited on
Commit
36d76bc
·
verified ·
1 Parent(s): 974dc1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -26
app.py CHANGED
@@ -8,6 +8,7 @@ from plotly.subplots import make_subplots
8
  from google.oauth2.service_account import Credentials
9
  import gspread
10
 
 
11
  def scrape_booking_hotel():
12
  url = "https://www.booking.com/searchresults.zh-tw.html"
13
  headers = {
@@ -36,18 +37,21 @@ def scrape_booking_hotel():
36
  for hotel in hotel_cards:
37
  try:
38
  name = hotel.find('div', {'data-testid': 'title', 'class': 'f6431b446c'}).text.strip() or "無資料"
 
39
  price = hotel.find('span', {'data-testid': 'price-and-discounted-price', 'class': 'f6431b446c'}).text.strip() or "無資料"
40
  price = price.replace('TWD', '').replace(' ', '').replace(',', '').strip()
 
41
 
42
  rating_container = hotel.find('div', {'class': 'a3b8729ab1'})
43
  rating = rating_container.find('div', {'class': 'ac4a7896c7'}).text.strip() if rating_container else "無評分"
 
44
 
45
  description_elem = hotel.find('div', {'data-testid': 'recommended-units'})
46
  if description_elem:
47
  room_type = description_elem.find('h4', {'class': 'abf093bdfe'}).text.strip() if description_elem.find('h4', {'class': 'abf093bdfe'}) else ""
48
  bed_info = description_elem.find('div', {'class': 'abf093bdfe'}).text.strip() if description_elem.find('div', {'class': 'abf093bdfe'}) else ""
49
- cancellation = "可免費取消" if description_elem.find('strong', text='可免費取消') else ""
50
- payment = "無需訂金" if description_elem.find('strong', text='無需訂金') else ""
51
  description = f"{room_type} | {bed_info} | {cancellation} | {payment}".strip(' |')
52
  else:
53
  description = "無說明"
@@ -71,6 +75,7 @@ def scrape_booking_hotel():
71
  print(f"請求發生錯誤: {e}")
72
  return pd.DataFrame()
73
 
 
74
  def create_price_rating_scatter(df):
75
  fig = px.scatter(
76
  df,
@@ -86,6 +91,7 @@ def create_price_rating_scatter(df):
86
  fig.update_layout(height=600, showlegend=True, title_x=0.5, title_font_size=20)
87
  return fig
88
 
 
89
  def create_price_distribution(df):
90
  fig = go.Figure()
91
  fig.add_trace(go.Histogram(x=df['價格'], name='價格分布', nbinsx=10, marker_color='rgb(55, 83, 109)'))
@@ -93,21 +99,7 @@ def create_price_distribution(df):
93
  fig.update_layout(title_text='台南飯店價格分布', title_x=0.5, title_font_size=20, xaxis_title='價格 (TWD)', yaxis_title='數量', height=500, bargap=0.2, showlegend=True)
94
  return fig
95
 
96
- def create_rating_box_by_price_range(df):
97
- fig = px.box(df, x='價格區間', y='評分', title='不同價格區間的評分分布', labels={'價格區間': '價格類型', '評分': '評分 (0-10)'}, color='價格區間')
98
- fig.update_layout(title_x=0.5, title_font_size=20, height=500, showlegend=False)
99
- return fig
100
-
101
- def create_hotel_comparison(df):
102
- fig = make_subplots(specs=[[{"secondary_y": True}]])
103
- df_sorted = df.sort_values('評分', ascending=True)
104
- fig.add_trace(go.Bar(x=df_sorted['飯店名稱'], y=df_sorted['評分'], name="評分", marker_color='rgb(55, 83, 109)'))
105
- fig.add_trace(go.Scatter(x=df_sorted['飯店名稱'], y=df_sorted['價格'], name="價格", marker_color='rgb(26, 118, 255)'), secondary_y=True)
106
- fig.update_layout(title_text='台南飯店評分與價格比較', title_x=0.5, title_font_size=20, height=700, showlegend=True, xaxis_tickangle=45)
107
- fig.update_yaxes(title_text="評分", secondary_y=False)
108
- fig.update_yaxes(title_text="價格 (TWD)", secondary_y=True)
109
- return fig
110
-
111
  def update_google_sheet(df):
112
  scope = ['https://www.googleapis.com/auth/spreadsheets']
113
  creds = Credentials.from_service_account_file("realtime-441511-f5708eabdf26.json", scopes=scope)
@@ -117,33 +109,30 @@ def update_google_sheet(df):
117
  worksheet.update([df.columns.values.tolist()] + df.astype(str).values.tolist())
118
  st.success("Data updated to Google Sheet successfully!")
119
 
 
120
  def main():
121
  st.set_page_config(page_title="Booking.com Hotel Analysis")
122
  st.title("Booking.com Hotel Analysis")
123
 
124
  df = scrape_booking_hotel()
125
 
 
126
  st.subheader("Hotel Data")
127
  st.dataframe(df)
128
 
 
129
  st.subheader("Price vs Rating Scatter Plot")
130
  scatter_fig = create_price_rating_scatter(df)
131
  st.plotly_chart(scatter_fig)
132
 
 
133
  st.subheader("Price Distribution")
134
  dist_fig = create_price_distribution(df)
135
  st.plotly_chart(dist_fig)
136
 
137
- st.subheader("Rating by Price Range")
138
- box_fig = create_rating_box_by_price_range(df)
139
- st.plotly_chart(box_fig)
140
-
141
- st.subheader("Hotel Comparison")
142
- comparison_fig = create_hotel_comparison(df)
143
- st.plotly_chart(comparison_fig)
144
-
145
  if st.button("Update Google Sheet"):
146
  update_google_sheet(df)
147
 
148
  if __name__ == "__main__":
149
- main()
 
8
  from google.oauth2.service_account import Credentials
9
  import gspread
10
 
11
+ # 爬取 Booking.com 台南飯店數據
12
  def scrape_booking_hotel():
13
  url = "https://www.booking.com/searchresults.zh-tw.html"
14
  headers = {
 
37
  for hotel in hotel_cards:
38
  try:
39
  name = hotel.find('div', {'data-testid': 'title', 'class': 'f6431b446c'}).text.strip() or "無資料"
40
+
41
  price = hotel.find('span', {'data-testid': 'price-and-discounted-price', 'class': 'f6431b446c'}).text.strip() or "無資料"
42
  price = price.replace('TWD', '').replace(' ', '').replace(',', '').strip()
43
+ price = float(price) if price.replace('.', '', 1).isdigit() else None # 價格轉為浮點數
44
 
45
  rating_container = hotel.find('div', {'class': 'a3b8729ab1'})
46
  rating = rating_container.find('div', {'class': 'ac4a7896c7'}).text.strip() if rating_container else "無評分"
47
+ rating = float(rating) if rating.replace('.', '', 1).isdigit() else None # 評分轉為浮點數
48
 
49
  description_elem = hotel.find('div', {'data-testid': 'recommended-units'})
50
  if description_elem:
51
  room_type = description_elem.find('h4', {'class': 'abf093bdfe'}).text.strip() if description_elem.find('h4', {'class': 'abf093bdfe'}) else ""
52
  bed_info = description_elem.find('div', {'class': 'abf093bdfe'}).text.strip() if description_elem.find('div', {'class': 'abf093bdfe'}) else ""
53
+ cancellation = "可免費取消" if description_elem.find('strong', string='可免費取消') else ""
54
+ payment = "無需訂金" if description_elem.find('strong', string='無需訂金') else ""
55
  description = f"{room_type} | {bed_info} | {cancellation} | {payment}".strip(' |')
56
  else:
57
  description = "無說明"
 
75
  print(f"請求發生錯誤: {e}")
76
  return pd.DataFrame()
77
 
78
+ # 散佈圖
79
  def create_price_rating_scatter(df):
80
  fig = px.scatter(
81
  df,
 
91
  fig.update_layout(height=600, showlegend=True, title_x=0.5, title_font_size=20)
92
  return fig
93
 
94
+ # 價格分佈圖
95
  def create_price_distribution(df):
96
  fig = go.Figure()
97
  fig.add_trace(go.Histogram(x=df['價格'], name='價格分布', nbinsx=10, marker_color='rgb(55, 83, 109)'))
 
99
  fig.update_layout(title_text='台南飯店價格分布', title_x=0.5, title_font_size=20, xaxis_title='價格 (TWD)', yaxis_title='數量', height=500, bargap=0.2, showlegend=True)
100
  return fig
101
 
102
+ # 更新 Google Sheet
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def update_google_sheet(df):
104
  scope = ['https://www.googleapis.com/auth/spreadsheets']
105
  creds = Credentials.from_service_account_file("realtime-441511-f5708eabdf26.json", scopes=scope)
 
109
  worksheet.update([df.columns.values.tolist()] + df.astype(str).values.tolist())
110
  st.success("Data updated to Google Sheet successfully!")
111
 
112
+ # 主函數
113
  def main():
114
  st.set_page_config(page_title="Booking.com Hotel Analysis")
115
  st.title("Booking.com Hotel Analysis")
116
 
117
  df = scrape_booking_hotel()
118
 
119
+ # 顯示資料表格
120
  st.subheader("Hotel Data")
121
  st.dataframe(df)
122
 
123
+ # 顯示散佈圖
124
  st.subheader("Price vs Rating Scatter Plot")
125
  scatter_fig = create_price_rating_scatter(df)
126
  st.plotly_chart(scatter_fig)
127
 
128
+ # 顯示價格分布圖
129
  st.subheader("Price Distribution")
130
  dist_fig = create_price_distribution(df)
131
  st.plotly_chart(dist_fig)
132
 
133
+ # Google Sheet 更新
 
 
 
 
 
 
 
134
  if st.button("Update Google Sheet"):
135
  update_google_sheet(df)
136
 
137
  if __name__ == "__main__":
138
+ main()