Spaces:
Build error
Build error
File size: 5,209 Bytes
f39d20c 65e4f78 f39d20c 65e4f78 f39d20c e1af865 f39d20c 65e4f78 e1af865 f39d20c e1af865 f39d20c f937ce4 f39d20c f937ce4 f39d20c ec055df e1af865 ec055df f39d20c ec055df f39d20c f937ce4 f39d20c f937ce4 f39d20c ec055df f39d20c 65e4f78 f39d20c 65e4f78 f39d20c 65e4f78 083a08a f39d20c e1af865 f39d20c e1af865 f39d20c 083a08a e1af865 f39d20c 65e4f78 f39d20c e1af865 f39d20c e1af865 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
from google.oauth2.service_account import Credentials
import gspread
# Google Sheets credentials
SCOPE = ['https://www.googleapis.com/auth/spreadsheets']
SERVICE_ACCOUNT_FILE = "realtime-441511-f5708eabdf26.json"
SPREADSHEET_URL = "https://docs.google.com/spreadsheets/d/1tIsXCbB8P6ZxdnZNnv7S7BBWbbT7lrSjW990zG-vQAA/edit?gid=0#gid=0"
# Streamlit app
st.title("Booking.com ε°ει£―εΊθ³ζη¬εθεζ")
st.sidebar.header("εθ½ιΈζ")
mode = st.sidebar.selectbox("ιΈζ樑εΌ", ["θ³ζη¬ε", "δΈε³θ³ Google Sheet"])
@st.cache_data
def scrape_booking_hotel(location, checkin_date, checkout_date):
url = "https://www.booking.com/searchresults.zh-tw.html"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
}
params = {
'ss': location,
'checkin': checkin_date,
'checkout': checkout_date,
'group_adults': '2',
'no_rooms': '1',
'group_children': '0',
'dest_type': 'city'
}
try:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
hotels_data = []
hotel_cards = soup.find_all('div', {'data-testid': 'property-card'})
for hotel in hotel_cards:
try:
name_elem = hotel.find('div', {'data-testid': 'title'})
name = name_elem.text.strip() if name_elem else "η‘θ³ζ"
price_elem = hotel.find('span', {'data-testid': 'price-and-discounted-price'})
price = price_elem.text.strip() if price_elem else "η‘θ³ζ"
# Clean price data
price = (
price.replace('TWD', '')
.replace(' ', '')
.replace(',', '')
.strip()
)
price = int(price) if price.isdigit() else None
rating_container = hotel.find('div', {'class': 'a3b8729ab1'})
rating_elem = rating_container.find('div', {'class': 'ac4a7896c7'}) if rating_container else None
rating = rating_elem.text.strip() if rating_elem else "η‘θ©ε"
description_elem = hotel.find('div', {'data-testid': 'recommended-units'})
if description_elem:
room_type = description_elem.find('h4')
room_type = room_type.text.strip() if room_type else ""
bed_info = description_elem.find('div')
bed_info = bed_info.text.strip() if bed_info else ""
cancellation = description_elem.find('strong', text='ε―ε
θ²»εζΆ')
cancellation = "ε―ε
θ²»εζΆ" if cancellation else ""
payment = description_elem.find('strong', text='η‘ιθ¨ι')
payment = "η‘ιθ¨ι" if payment else ""
description = f"{room_type} | {bed_info} | {cancellation} | {payment}".strip(' |')
else:
description = "η‘θͺͺζ"
hotels_data.append({
'ι£―εΊε稱': name,
'εΉζ Ό': price,
'θ©ε': rating,
'θͺͺζ': description
})
except AttributeError:
continue
df = pd.DataFrame(hotels_data).drop_duplicates()
return df
except requests.RequestException:
return pd.DataFrame()
def upload_to_google_sheets(df):
creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPE)
gs = gspread.authorize(creds)
sheet = gs.open_by_url(SPREADSHEET_URL)
worksheet = sheet.get_worksheet(0)
df1 = df.astype(str)
worksheet.update([df1.columns.values.tolist()] + df1.values.tolist())
return "θ³ζε·²ζεδΈε³ε° Google SheetοΌ"
# Streamlit app implementation
if mode == "θ³ζη¬ε":
st.header("η¬ει£―εΊθ³ζ")
location = st.text_input("θΌΈε
₯ζ₯θ©’ε°ι»", "ε°ε")
checkin_date = st.date_input("ιΈζε
₯δ½ζ₯ζ")
checkout_date = st.date_input("ιΈζιζΏζ₯ζ")
if st.button("ιε§η¬ε"):
df = scrape_booking_hotel(location, checkin_date.strftime('%Y-%m-%d'), checkout_date.strftime('%Y-%m-%d'))
if not df.empty:
st.dataframe(df)
df.to_csv('booking_hotels.csv', index=False, encoding='utf-8-sig')
st.success("θ³ζη¬εζεοΌε·²ε²εθ³ booking_hotels.csv")
else:
st.error("ζͺθ½ζεη¬εθ³ζ")
elif mode == "δΈε³θ³ Google Sheet":
st.header("δΈε³θ³ζθ³ Google Sheet")
try:
df = pd.read_csv('booking_hotels.csv', encoding='utf-8-sig')
result = upload_to_google_sheets(df)
st.success(result)
except Exception as e:
st.error(f"δΈε³θ³ζζηΌηι―θͺ€οΌ{e}")
|