File size: 5,209 Bytes
f39d20c
 
 
65e4f78
f39d20c
 
65e4f78
f39d20c
 
 
 
 
 
 
 
e1af865
f39d20c
65e4f78
e1af865
f39d20c
 
 
 
 
 
e1af865
 
 
f39d20c
 
 
 
 
 
 
 
 
 
 
 
 
 
f937ce4
f39d20c
f937ce4
f39d20c
ec055df
e1af865
ec055df
 
 
 
 
 
 
 
f39d20c
 
 
 
ec055df
f39d20c
f937ce4
f39d20c
f937ce4
f39d20c
 
 
 
 
 
 
 
ec055df
f39d20c
 
 
 
 
 
 
 
65e4f78
f39d20c
 
 
 
65e4f78
f39d20c
 
 
 
 
 
 
 
65e4f78
083a08a
f39d20c
e1af865
 
 
 
 
f39d20c
e1af865
f39d20c
083a08a
e1af865
 
f39d20c
 
65e4f78
f39d20c
 
 
e1af865
f39d20c
 
 
e1af865
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
from google.oauth2.service_account import Credentials
import gspread

# Google Sheets credentials
SCOPE = ['https://www.googleapis.com/auth/spreadsheets']
SERVICE_ACCOUNT_FILE = "realtime-441511-f5708eabdf26.json"
SPREADSHEET_URL = "https://docs.google.com/spreadsheets/d/1tIsXCbB8P6ZxdnZNnv7S7BBWbbT7lrSjW990zG-vQAA/edit?gid=0#gid=0"

# Streamlit app
st.title("Booking.com ε°ε—ι£―εΊ—θ³‡ζ–™ηˆ¬ε–θˆ‡εˆ†ζž")
st.sidebar.header("εŠŸθƒ½ιΈζ“‡")
mode = st.sidebar.selectbox("選擇樑式", ["θ³‡ζ–™ηˆ¬ε–", "δΈŠε‚³θ‡³ Google Sheet"])

@st.cache_data
def scrape_booking_hotel(location, checkin_date, checkout_date):
    url = "https://www.booking.com/searchresults.zh-tw.html"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
    }
    params = {
        'ss': location,
        'checkin': checkin_date,
        'checkout': checkout_date,
        'group_adults': '2',
        'no_rooms': '1',
        'group_children': '0',
        'dest_type': 'city'
    }
    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        hotels_data = []
        hotel_cards = soup.find_all('div', {'data-testid': 'property-card'})

        for hotel in hotel_cards:
            try:
                name_elem = hotel.find('div', {'data-testid': 'title'})
                name = name_elem.text.strip() if name_elem else "焑資料"
                price_elem = hotel.find('span', {'data-testid': 'price-and-discounted-price'})
                price = price_elem.text.strip() if price_elem else "焑資料"
                
                # Clean price data
                price = (
                    price.replace('TWD', '')
                    .replace(' ', '')
                    .replace(',', '')
                    .strip()
                )
                price = int(price) if price.isdigit() else None
                
                rating_container = hotel.find('div', {'class': 'a3b8729ab1'})
                rating_elem = rating_container.find('div', {'class': 'ac4a7896c7'}) if rating_container else None
                rating = rating_elem.text.strip() if rating_elem else "η„‘θ©•εˆ†"
                description_elem = hotel.find('div', {'data-testid': 'recommended-units'})
                
                if description_elem:
                    room_type = description_elem.find('h4')
                    room_type = room_type.text.strip() if room_type else ""
                    bed_info = description_elem.find('div')
                    bed_info = bed_info.text.strip() if bed_info else ""
                    cancellation = description_elem.find('strong', text='ε―ε…θ²»ε–ζΆˆ')
                    cancellation = "ε―ε…θ²»ε–ζΆˆ" if cancellation else ""
                    payment = description_elem.find('strong', text='η„‘ιœ€θ¨‚ι‡‘')
                    payment = "η„‘ιœ€θ¨‚ι‡‘" if payment else ""
                    description = f"{room_type} | {bed_info} | {cancellation} | {payment}".strip(' |')
                else:
                    description = "η„‘θͺͺ明"
                
                hotels_data.append({
                    '飯店名稱': name,
                    'εƒΉζ Ό': price,
                    'θ©•εˆ†': rating,
                    'θͺͺ明': description
                })
            except AttributeError:
                continue

        df = pd.DataFrame(hotels_data).drop_duplicates()
        return df
    except requests.RequestException:
        return pd.DataFrame()

def upload_to_google_sheets(df):
    creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPE)
    gs = gspread.authorize(creds)
    sheet = gs.open_by_url(SPREADSHEET_URL)
    worksheet = sheet.get_worksheet(0)
    df1 = df.astype(str)
    worksheet.update([df1.columns.values.tolist()] + df1.values.tolist())
    return "θ³‡ζ–™ε·²ζˆεŠŸδΈŠε‚³εˆ° Google Sheet!"

# Streamlit app implementation
if mode == "θ³‡ζ–™ηˆ¬ε–":
    st.header("ηˆ¬ε–ι£―εΊ—θ³‡ζ–™")
    location = st.text_input("θΌΈε…₯ζŸ₯詒地點", "台南")
    checkin_date = st.date_input("選擇ε…₯住ζ—₯期")
    checkout_date = st.date_input("ιΈζ“‡ι€€ζˆΏζ—₯期")

    if st.button("ι–‹ε§‹ηˆ¬ε–"):
        df = scrape_booking_hotel(location, checkin_date.strftime('%Y-%m-%d'), checkout_date.strftime('%Y-%m-%d'))
        if not df.empty:
            st.dataframe(df)
            df.to_csv('booking_hotels.csv', index=False, encoding='utf-8-sig')
            st.success("θ³‡ζ–™ηˆ¬ε–ζˆεŠŸοΌŒε·²ε„²ε­˜θ‡³ booking_hotels.csv")
        else:
            st.error("ζœͺθƒ½ζˆεŠŸηˆ¬ε–θ³‡ζ–™")

elif mode == "δΈŠε‚³θ‡³ Google Sheet":
    st.header("δΈŠε‚³θ³‡ζ–™θ‡³ Google Sheet")
    try:
        df = pd.read_csv('booking_hotels.csv', encoding='utf-8-sig')
        result = upload_to_google_sheets(df)
        st.success(result)
    except Exception as e:
        st.error(f"δΈŠε‚³θ³‡ζ–™ζ™‚η™Όη”ŸιŒ―θͺ€οΌš{e}")