apexherbert200 commited on
Commit
2e1ba41
·
1 Parent(s): 7dc760d

Working /experimenting

Browse files
Files changed (1) hide show
  1. real_estate.py +65 -18
real_estate.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from fastapi import FastAPI, HTTPException, Query
2
  from pydantic import BaseModel
3
  from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeout
@@ -6,7 +7,7 @@ import datetime
6
  import logging
7
 
8
  logging.basicConfig(level=logging.INFO)
9
- app = FastAPI(title="RealEstateSnap")
10
 
11
  class Listing(BaseModel):
12
  title: str
@@ -24,44 +25,90 @@ async def scrape_craigslist(location: str, limit: int = 10) -> List[Listing]:
24
  async with async_playwright() as p:
25
  browser = await p.chromium.launch(headless=True)
26
  page = await browser.new_page()
27
- url = f"https://{location.replace(' ', '').lower()}.craigslist.org/search/apa"
28
- logging.info(f"➡️ Going to {url}")
 
29
  await page.goto(url)
30
- items = await page.query_selector_all(".result-row")[:limit]
31
- for item in items:
32
  try:
33
  title = await item.inner_text(".result-title")
34
  href = await item.get_attribute(".result-title", "href")
35
- price = await item.inner_text(".result-price")
36
  listings.append(Listing(
37
  title=title.strip(),
38
- price=price.strip(),
39
- address="",
40
- bedrooms="",
41
- bathrooms="",
42
  listing_url=href,
43
  image_url=None,
44
  platform="craigslist",
45
  timestamp=datetime.datetime.utcnow().isoformat()
46
  ))
47
  except PlaywrightTimeout:
48
- logging.warning("Skipped a troublesome item")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  await browser.close()
50
  return listings
51
 
52
  @app.get("/realestate", response_model=List[Listing])
53
  async def get_listings(
54
- location: str = Query(...),
55
- platform: Optional[List[str]] = Query(["craigslist"])
 
 
 
56
  ):
57
- platform = [p.lower() for p in platform]
58
- results = []
59
- if "craigslist" in platform:
 
 
 
60
  try:
61
  results += await scrape_craigslist(location)
62
  except Exception as e:
63
- raise HTTPException(status_code=500, detail=str(e))
64
- # TODO: Add Zillow and Realtor here...
 
 
 
 
 
 
 
 
65
  if not results:
66
  raise HTTPException(status_code=404, detail="No listings found")
67
  return results
 
1
+ # main.py
2
  from fastapi import FastAPI, HTTPException, Query
3
  from pydantic import BaseModel
4
  from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeout
 
7
  import logging
8
 
9
  logging.basicConfig(level=logging.INFO)
10
+ app = FastAPI(title="RealEstateSnap", version="0.3.0")
11
 
12
  class Listing(BaseModel):
13
  title: str
 
25
  async with async_playwright() as p:
26
  browser = await p.chromium.launch(headless=True)
27
  page = await browser.new_page()
28
+ site = location.replace(' ', '').lower()
29
+ url = f"https://{site}.craigslist.org/search/apa"
30
+ logging.info(f"📦 Scraping Craigslist: {url}")
31
  await page.goto(url)
32
+ items = await page.query_selector_all(".result-row")
33
+ for item in items[:limit]:
34
  try:
35
  title = await item.inner_text(".result-title")
36
  href = await item.get_attribute(".result-title", "href")
37
+ price = (await item.inner_text(".result-price")).strip()
38
  listings.append(Listing(
39
  title=title.strip(),
40
+ price=price,
41
+ address=None,
42
+ bedrooms=None,
43
+ bathrooms=None,
44
  listing_url=href,
45
  image_url=None,
46
  platform="craigslist",
47
  timestamp=datetime.datetime.utcnow().isoformat()
48
  ))
49
  except PlaywrightTimeout:
50
+ logging.warning(" Timeout — skipping a Craigslist item")
51
+ await browser.close()
52
+ return listings
53
+
54
+ async def scrape_kijiji(location: str, limit: int = 10) -> List[Listing]:
55
+ listings = []
56
+ async with async_playwright() as p:
57
+ browser = await p.chromium.launch(headless=True)
58
+ page = await browser.new_page()
59
+ city = location.replace(' ', '-').lower()
60
+ url = f"https://www.kijiji.ca/b-apartments-condos/{city}/c37l1700271"
61
+ logging.info(f"📦 Scraping Kijiji: {url}")
62
+ await page.goto(url)
63
+ cards = await page.query_selector_all(".search-item")
64
+ for card in cards[:limit]:
65
+ try:
66
+ title = await card.inner_text(".title")
67
+ price = (await card.inner_text(".price")).strip()
68
+ href = await card.get_attribute("a.title", "href")
69
+ listings.append(Listing(
70
+ title=title.strip(),
71
+ price=price,
72
+ address=None,
73
+ bedrooms=None,
74
+ bathrooms=None,
75
+ listing_url=f"https://www.kijiji.ca{href}",
76
+ image_url=None,
77
+ platform="kijiji",
78
+ timestamp=datetime.datetime.utcnow().isoformat()
79
+ ))
80
+ except PlaywrightTimeout:
81
+ logging.warning("⏱ Timeout — skipping a Kijiji item")
82
  await browser.close()
83
  return listings
84
 
85
  @app.get("/realestate", response_model=List[Listing])
86
  async def get_listings(
87
+ location: str = Query(..., description="City name or ZIP/postal code"),
88
+ platform: Optional[List[str]] = Query(
89
+ None,
90
+ description="Platforms to scrape: craigslist, kijiji. Defaults to all."
91
+ )
92
  ):
93
+ selected = [p.lower() for p in platform] if platform else ["craigslist", "kijiji"]
94
+ logging.info(f"🧭 Platforms selected: {selected}")
95
+
96
+ results: List[Listing] = []
97
+
98
+ if "craigslist" in selected:
99
  try:
100
  results += await scrape_craigslist(location)
101
  except Exception as e:
102
+ logging.error(f"Craigslist scrape failed: {e}")
103
+ raise HTTPException(status_code=500, detail="Craigslist scrape failed")
104
+
105
+ if "kijiji" in selected:
106
+ try:
107
+ results += await scrape_kijiji(location)
108
+ except Exception as e:
109
+ logging.error(f"Kijiji scrape failed: {e}")
110
+ raise HTTPException(status_code=500, detail="Kijiji scrape failed")
111
+
112
  if not results:
113
  raise HTTPException(status_code=404, detail="No listings found")
114
  return results