|
from fastapi import FastAPI, HTTPException, Query |
|
from pydantic import BaseModel |
|
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeout |
|
from typing import List, Optional |
|
import datetime |
|
import logging |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
app = FastAPI(title="RealEstateSnap") |
|
|
|
class Listing(BaseModel): |
|
title: str |
|
price: Optional[str] |
|
address: Optional[str] |
|
bedrooms: Optional[str] |
|
bathrooms: Optional[str] |
|
listing_url: str |
|
image_url: Optional[str] |
|
platform: str |
|
timestamp: str |
|
|
|
async def scrape_craigslist(location: str, limit: int = 10) -> List[Listing]: |
|
listings = [] |
|
async with async_playwright() as p: |
|
browser = await p.chromium.launch(headless=True) |
|
page = await browser.new_page() |
|
url = f"https://{location.replace(' ', '').lower()}.craigslist.org/search/apa" |
|
logging.info(f"➡️ Going to {url}") |
|
await page.goto(url) |
|
items = await page.query_selector_all(".result-row")[:limit] |
|
for item in items: |
|
try: |
|
title = await item.inner_text(".result-title") |
|
href = await item.get_attribute(".result-title", "href") |
|
price = await item.inner_text(".result-price") |
|
listings.append(Listing( |
|
title=title.strip(), |
|
price=price.strip(), |
|
address="", |
|
bedrooms="", |
|
bathrooms="", |
|
listing_url=href, |
|
image_url=None, |
|
platform="craigslist", |
|
timestamp=datetime.datetime.utcnow().isoformat() |
|
)) |
|
except PlaywrightTimeout: |
|
logging.warning("Skipped a troublesome item") |
|
await browser.close() |
|
return listings |
|
|
|
@app.get("/realestate", response_model=List[Listing]) |
|
async def get_listings( |
|
location: str = Query(...), |
|
platform: Optional[List[str]] = Query(["craigslist"]) |
|
): |
|
platform = [p.lower() for p in platform] |
|
results = [] |
|
if "craigslist" in platform: |
|
try: |
|
results += await scrape_craigslist(location) |
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
if not results: |
|
raise HTTPException(status_code=404, detail="No listings found") |
|
return results |
|
|