Spaces:
Sleeping
Sleeping
File size: 5,685 Bytes
f39d20c 65e4f78 cd5f134 bb81aa7 65e4f78 36d76bc cd5f134 f39d20c cd5f134 f39d20c cd5f134 f39d20c 974dc1d f39d20c 974dc1d f39d20c 974dc1d 36d76bc 974dc1d 36d76bc 65e4f78 974dc1d 36d76bc cd5f134 974dc1d 36d76bc 974dc1d bb81aa7 36d76bc cd5f134 974dc1d cd5f134 36d76bc cd5f134 bb81aa7 974dc1d cd5f134 36d76bc 974dc1d 36d76bc 974dc1d 36d76bc 974dc1d 36d76bc 974dc1d 36d76bc 974dc1d 36d76bc 974dc1d 36d76bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from google.oauth2.service_account import Credentials
import gspread
# 爬取 Booking.com 台南飯店數據
def scrape_booking_hotel():
url = "https://www.booking.com/searchresults.zh-tw.html"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
}
params = {
'ss': '台南',
'checkin': '2024-11-16',
'checkout': '2024-11-17',
'group_adults': '2',
'no_rooms': '1',
'group_children': '0',
'dest_id': '-2637868',
'dest_type': 'city'
}
try:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
hotels_data = []
hotel_cards = soup.find_all('div', {'data-testid': 'property-card'})
for hotel in hotel_cards:
try:
name = hotel.find('div', {'data-testid': 'title', 'class': 'f6431b446c'}).text.strip() or "無資料"
price = hotel.find('span', {'data-testid': 'price-and-discounted-price', 'class': 'f6431b446c'}).text.strip() or "無資料"
price = price.replace('TWD', '').replace(' ', '').replace(',', '').strip()
price = float(price) if price.replace('.', '', 1).isdigit() else None # 價格轉為浮點數
rating_container = hotel.find('div', {'class': 'a3b8729ab1'})
rating = rating_container.find('div', {'class': 'ac4a7896c7'}).text.strip() if rating_container else "無評分"
rating = float(rating) if rating.replace('.', '', 1).isdigit() else None # 評分轉為浮點數
description_elem = hotel.find('div', {'data-testid': 'recommended-units'})
if description_elem:
room_type = description_elem.find('h4', {'class': 'abf093bdfe'}).text.strip() if description_elem.find('h4', {'class': 'abf093bdfe'}) else ""
bed_info = description_elem.find('div', {'class': 'abf093bdfe'}).text.strip() if description_elem.find('div', {'class': 'abf093bdfe'}) else ""
cancellation = "可免費取消" if description_elem.find('strong', string='可免費取消') else ""
payment = "無需訂金" if description_elem.find('strong', string='無需訂金') else ""
description = f"{room_type} | {bed_info} | {cancellation} | {payment}".strip(' |')
else:
description = "無說明"
hotels_data.append({
'飯店名稱': name,
'價格': price,
'評分': rating,
'說明': description
})
except AttributeError as e:
print(f"解析飯店資訊時發生錯誤: {e}")
continue
df = pd.DataFrame(hotels_data)
df = df.drop_duplicates()
return df
except requests.RequestException as e:
print(f"請求發生錯誤: {e}")
return pd.DataFrame()
# 散佈圖
def create_price_rating_scatter(df):
fig = px.scatter(
df,
x='價格',
y='評分',
text='飯店名稱',
size='價格',
color='評分',
title='台南飯店價格與評分關係圖',
labels={'價格': '房價 (TWD)', '評分': '評分 (0-10)'}
)
fig.update_traces(textposition='top center', marker=dict(sizeref=2.*max(df['價格'])/(40.**2)))
fig.update_layout(height=600, showlegend=True, title_x=0.5, title_font_size=20)
return fig
# 價格分佈圖
def create_price_distribution(df):
fig = go.Figure()
fig.add_trace(go.Histogram(x=df['價格'], name='價格分布', nbinsx=10, marker_color='rgb(55, 83, 109)'))
fig.add_trace(go.Box(x=df['價格'], name='價格箱型圖', marker_color='rgb(26, 118, 255)'))
fig.update_layout(title_text='台南飯店價格分布', title_x=0.5, title_font_size=20, xaxis_title='價格 (TWD)', yaxis_title='數量', height=500, bargap=0.2, showlegend=True)
return fig
# 更新 Google Sheet
def update_google_sheet(df):
scope = ['https://www.googleapis.com/auth/spreadsheets']
creds = Credentials.from_service_account_file("realtime-441511-f5708eabdf26.json", scopes=scope)
gs = gspread.authorize(creds)
sheet = gs.open_by_url('https://docs.google.com/spreadsheets/d/1tIsXCbB8P6ZxdnZNnv7S7BBWbbT7lrSjW990zG-vQAA/edit?gid=0#gid=0')
worksheet = sheet.get_worksheet(0)
worksheet.update([df.columns.values.tolist()] + df.astype(str).values.tolist())
st.success("Data updated to Google Sheet successfully!")
# 主函數
def main():
st.set_page_config(page_title="Booking.com Hotel Analysis")
st.title("Booking.com Hotel Analysis")
df = scrape_booking_hotel()
# 顯示資料表格
st.subheader("Hotel Data")
st.dataframe(df)
# 顯示散佈圖
st.subheader("Price vs Rating Scatter Plot")
scatter_fig = create_price_rating_scatter(df)
st.plotly_chart(scatter_fig)
# 顯示價格分布圖
st.subheader("Price Distribution")
dist_fig = create_price_distribution(df)
st.plotly_chart(dist_fig)
# Google Sheet 更新
if st.button("Update Google Sheet"):
update_google_sheet(df)
if __name__ == "__main__":
main()
|