Spaces:
Build error
Build error
import streamlit as st | |
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
from google.oauth2.service_account import Credentials | |
import gspread | |
# Google Sheets credentials | |
SCOPE = ['https://www.googleapis.com/auth/spreadsheets'] | |
SERVICE_ACCOUNT_FILE = "realtime-441511-f5708eabdf26.json" | |
SPREADSHEET_URL = "https://docs.google.com/spreadsheets/d/1tIsXCbB8P6ZxdnZNnv7S7BBWbbT7lrSjW990zG-vQAA/edit?gid=0#gid=0" | |
# Streamlit app | |
st.title("Booking.com ε°ει£―εΊθ³ζη¬εθεζ") | |
st.sidebar.header("εθ½ιΈζ") | |
mode = st.sidebar.selectbox("ιΈζ樑εΌ", ["θ³ζη¬ε", "δΈε³θ³ Google Sheet"]) | |
def scrape_booking_hotel(location, checkin_date, checkout_date): | |
url = "https://www.booking.com/searchresults.zh-tw.html" | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', | |
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7', | |
} | |
params = { | |
'ss': location, | |
'checkin': checkin_date, | |
'checkout': checkout_date, | |
'group_adults': '2', | |
'no_rooms': '1', | |
'group_children': '0', | |
'dest_type': 'city' | |
} | |
try: | |
response = requests.get(url, headers=headers, params=params) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
hotels_data = [] | |
hotel_cards = soup.find_all('div', {'data-testid': 'property-card'}) | |
for hotel in hotel_cards: | |
try: | |
name_elem = hotel.find('div', {'data-testid': 'title'}) | |
name = name_elem.text.strip() if name_elem else "η‘θ³ζ" | |
price_elem = hotel.find('span', {'data-testid': 'price-and-discounted-price'}) | |
price = price_elem.text.strip() if price_elem else "η‘θ³ζ" | |
# Clean price data | |
price = ( | |
price.replace('TWD', '') | |
.replace(' ', '') | |
.replace(',', '') | |
.strip() | |
) | |
price = int(price) if price.isdigit() else None | |
rating_container = hotel.find('div', {'class': 'a3b8729ab1'}) | |
rating_elem = rating_container.find('div', {'class': 'ac4a7896c7'}) if rating_container else None | |
rating = rating_elem.text.strip() if rating_elem else "η‘θ©ε" | |
description_elem = hotel.find('div', {'data-testid': 'recommended-units'}) | |
if description_elem: | |
room_type = description_elem.find('h4') | |
room_type = room_type.text.strip() if room_type else "" | |
bed_info = description_elem.find('div') | |
bed_info = bed_info.text.strip() if bed_info else "" | |
cancellation = description_elem.find('strong', text='ε―ε θ²»εζΆ') | |
cancellation = "ε―ε θ²»εζΆ" if cancellation else "" | |
payment = description_elem.find('strong', text='η‘ιθ¨ι') | |
payment = "η‘ιθ¨ι" if payment else "" | |
description = f"{room_type} | {bed_info} | {cancellation} | {payment}".strip(' |') | |
else: | |
description = "η‘θͺͺζ" | |
hotels_data.append({ | |
'ι£―εΊε稱': name, | |
'εΉζ Ό': price, | |
'θ©ε': rating, | |
'θͺͺζ': description | |
}) | |
except AttributeError: | |
continue | |
df = pd.DataFrame(hotels_data).drop_duplicates() | |
return df | |
except requests.RequestException: | |
return pd.DataFrame() | |
def upload_to_google_sheets(df): | |
creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPE) | |
gs = gspread.authorize(creds) | |
sheet = gs.open_by_url(SPREADSHEET_URL) | |
worksheet = sheet.get_worksheet(0) | |
df1 = df.astype(str) | |
worksheet.update([df1.columns.values.tolist()] + df1.values.tolist()) | |
return "θ³ζε·²ζεδΈε³ε° Google SheetοΌ" | |
# Streamlit app implementation | |
if mode == "θ³ζη¬ε": | |
st.header("η¬ει£―εΊθ³ζ") | |
location = st.text_input("θΌΈε ₯ζ₯θ©’ε°ι»", "ε°ε") | |
checkin_date = st.date_input("ιΈζε ₯δ½ζ₯ζ") | |
checkout_date = st.date_input("ιΈζιζΏζ₯ζ") | |
if st.button("ιε§η¬ε"): | |
df = scrape_booking_hotel(location, checkin_date.strftime('%Y-%m-%d'), checkout_date.strftime('%Y-%m-%d')) | |
if not df.empty: | |
st.dataframe(df) | |
df.to_csv('booking_hotels.csv', index=False, encoding='utf-8-sig') | |
st.success("θ³ζη¬εζεοΌε·²ε²εθ³ booking_hotels.csv") | |
else: | |
st.error("ζͺθ½ζεη¬εθ³ζ") | |
elif mode == "δΈε³θ³ Google Sheet": | |
st.header("δΈε³θ³ζθ³ Google Sheet") | |
try: | |
df = pd.read_csv('booking_hotels.csv', encoding='utf-8-sig') | |
result = upload_to_google_sheets(df) | |
st.success(result) | |
except Exception as e: | |
st.error(f"δΈε³θ³ζζηΌηι―θͺ€οΌ{e}") | |