File size: 5,685 Bytes
f39d20c
 
 
65e4f78
cd5f134
 
bb81aa7
 
 
65e4f78
36d76bc
cd5f134
f39d20c
 
 
 
 
 
cd5f134
 
 
f39d20c
 
 
cd5f134
f39d20c
 
974dc1d
f39d20c
 
 
 
974dc1d
f39d20c
 
 
 
974dc1d
 
36d76bc
974dc1d
 
36d76bc
65e4f78
974dc1d
 
36d76bc
cd5f134
974dc1d
 
 
 
36d76bc
 
974dc1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb81aa7
36d76bc
cd5f134
974dc1d
 
 
 
 
 
 
 
 
 
 
 
cd5f134
 
36d76bc
cd5f134
 
bb81aa7
 
974dc1d
cd5f134
 
36d76bc
974dc1d
 
 
 
 
 
 
 
 
36d76bc
974dc1d
 
 
 
 
 
36d76bc
974dc1d
 
 
36d76bc
974dc1d
 
 
 
36d76bc
974dc1d
 
 
 
36d76bc
974dc1d
 
 
 
36d76bc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from google.oauth2.service_account import Credentials
import gspread

# 爬取 Booking.com 台南飯店數據
def scrape_booking_hotel():
    url = "https://www.booking.com/searchresults.zh-tw.html"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
    }
    params = {
        'ss': '台南',
        'checkin': '2024-11-16',
        'checkout': '2024-11-17',
        'group_adults': '2',
        'no_rooms': '1',
        'group_children': '0',
        'dest_id': '-2637868',
        'dest_type': 'city'
    }

    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        hotels_data = []
        hotel_cards = soup.find_all('div', {'data-testid': 'property-card'})

        for hotel in hotel_cards:
            try:
                name = hotel.find('div', {'data-testid': 'title', 'class': 'f6431b446c'}).text.strip() or "無資料"
                
                price = hotel.find('span', {'data-testid': 'price-and-discounted-price', 'class': 'f6431b446c'}).text.strip() or "無資料"
                price = price.replace('TWD', '').replace(' ', '').replace(',', '').strip()
                price = float(price) if price.replace('.', '', 1).isdigit() else None  # 價格轉為浮點數

                rating_container = hotel.find('div', {'class': 'a3b8729ab1'})
                rating = rating_container.find('div', {'class': 'ac4a7896c7'}).text.strip() if rating_container else "無評分"
                rating = float(rating) if rating.replace('.', '', 1).isdigit() else None  # 評分轉為浮點數

                description_elem = hotel.find('div', {'data-testid': 'recommended-units'})
                if description_elem:
                    room_type = description_elem.find('h4', {'class': 'abf093bdfe'}).text.strip() if description_elem.find('h4', {'class': 'abf093bdfe'}) else ""
                    bed_info = description_elem.find('div', {'class': 'abf093bdfe'}).text.strip() if description_elem.find('div', {'class': 'abf093bdfe'}) else ""
                    cancellation = "可免費取消" if description_elem.find('strong', string='可免費取消') else ""
                    payment = "無需訂金" if description_elem.find('strong', string='無需訂金') else ""
                    description = f"{room_type} | {bed_info} | {cancellation} | {payment}".strip(' |')
                else:
                    description = "無說明"

                hotels_data.append({
                    '飯店名稱': name,
                    '價格': price,
                    '評分': rating,
                    '說明': description
                })

            except AttributeError as e:
                print(f"解析飯店資訊時發生錯誤: {e}")
                continue

        df = pd.DataFrame(hotels_data)
        df = df.drop_duplicates()
        return df

    except requests.RequestException as e:
        print(f"請求發生錯誤: {e}")
        return pd.DataFrame()

# 散佈圖
def create_price_rating_scatter(df):
    fig = px.scatter(
        df,
        x='價格',
        y='評分',
        text='飯店名稱',
        size='價格',
        color='評分',
        title='台南飯店價格與評分關係圖',
        labels={'價格': '房價 (TWD)', '評分': '評分 (0-10)'}
    )
    fig.update_traces(textposition='top center', marker=dict(sizeref=2.*max(df['價格'])/(40.**2)))
    fig.update_layout(height=600, showlegend=True, title_x=0.5, title_font_size=20)
    return fig

# 價格分佈圖
def create_price_distribution(df):
    fig = go.Figure()
    fig.add_trace(go.Histogram(x=df['價格'], name='價格分布', nbinsx=10, marker_color='rgb(55, 83, 109)'))
    fig.add_trace(go.Box(x=df['價格'], name='價格箱型圖', marker_color='rgb(26, 118, 255)'))
    fig.update_layout(title_text='台南飯店價格分布', title_x=0.5, title_font_size=20, xaxis_title='價格 (TWD)', yaxis_title='數量', height=500, bargap=0.2, showlegend=True)
    return fig

# 更新 Google Sheet
def update_google_sheet(df):
    scope = ['https://www.googleapis.com/auth/spreadsheets']
    creds = Credentials.from_service_account_file("realtime-441511-f5708eabdf26.json", scopes=scope)
    gs = gspread.authorize(creds)
    sheet = gs.open_by_url('https://docs.google.com/spreadsheets/d/1tIsXCbB8P6ZxdnZNnv7S7BBWbbT7lrSjW990zG-vQAA/edit?gid=0#gid=0')
    worksheet = sheet.get_worksheet(0)
    worksheet.update([df.columns.values.tolist()] + df.astype(str).values.tolist())
    st.success("Data updated to Google Sheet successfully!")

# 主函數
def main():
    st.set_page_config(page_title="Booking.com Hotel Analysis")
    st.title("Booking.com Hotel Analysis")

    df = scrape_booking_hotel()

    # 顯示資料表格
    st.subheader("Hotel Data")
    st.dataframe(df)

    # 顯示散佈圖
    st.subheader("Price vs Rating Scatter Plot")
    scatter_fig = create_price_rating_scatter(df)
    st.plotly_chart(scatter_fig)

    # 顯示價格分布圖
    st.subheader("Price Distribution")
    dist_fig = create_price_distribution(df)
    st.plotly_chart(dist_fig)

    # Google Sheet 更新
    if st.button("Update Google Sheet"):
        update_google_sheet(df)

if __name__ == "__main__":
    main()