apexherbert200 commited on
Commit
a8c57d0
·
1 Parent(s): a6f3e76

Added test1

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. scrape.py +69 -0
  3. test1.py +2 -7
Dockerfile CHANGED
@@ -53,4 +53,4 @@ RUN python -m playwright install chromium
53
  EXPOSE 7860
54
 
55
  # Run the FastAPI application
56
- CMD ["python", "-m", "uvicorn", "test1:app", "--host", "0.0.0.0", "--port", "7860"]
 
53
  EXPOSE 7860
54
 
55
  # Run the FastAPI application
56
+ CMD ["python", "-m", "uvicorn", "scrape:app", "--host", "0.0.0.0", "--port", "7860"]
scrape.py CHANGED
@@ -286,6 +286,75 @@ async def scrape_page(
286
 
287
 
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
 
291
 
 
286
 
287
 
288
 
289
+ @app.get("/search_leads")
290
+ async def search_leads(
291
+ query: str = Query(..., description="Search term for business leads")
292
+ ):
293
+ logger.info(f"Searching Google Maps for: {query}")
294
+
295
+ async with async_playwright() as p:
296
+ browser = await p.chromium.launch(headless=True)
297
+ page = await browser.new_page()
298
+
299
+ try:
300
+ # Go to Google Maps
301
+ await page.goto("https://www.google.com/maps", wait_until="networkidle")
302
+
303
+ # Accept cookies if present (optional, depends on region)
304
+ try:
305
+ await page.click('button[aria-label="Accept all"]', timeout=3000)
306
+ except:
307
+ pass
308
+
309
+ # Type the query in the search box and press Enter
310
+ await page.fill('input#searchboxinput', query)
311
+ await page.click('button#searchbox-searchbutton')
312
+
313
+ # Wait for search results to load - selector for listings container
314
+ await page.wait_for_selector('div[role="article"]', timeout=10000)
315
+
316
+ # Scroll results container to load more items (optional)
317
+ # For now, scrape the visible ones
318
+
319
+ # Extract data from listings
320
+ results = await page.evaluate("""
321
+ () => {
322
+ const listings = [];
323
+ const elements = document.querySelectorAll('div[role="article"]');
324
+ elements.forEach(el => {
325
+ const nameEl = el.querySelector('h3 span');
326
+ const name = nameEl ? nameEl.innerText : null;
327
+
328
+ const addressEl = el.querySelector('[data-tooltip="Address"]');
329
+ const address = addressEl ? addressEl.innerText : null;
330
+
331
+ const phoneEl = el.querySelector('button[data-tooltip="Copy phone number"]');
332
+ const phone = phoneEl ? phoneEl.getAttribute('aria-label')?.replace('Copy phone number ', '') : null;
333
+
334
+ const websiteEl = el.querySelector('a[aria-label*="Website"]');
335
+ const website = websiteEl ? websiteEl.href : null;
336
+
337
+ listings.push({name, address, phone, website});
338
+ });
339
+ return listings;
340
+ }
341
+ """)
342
+
343
+ await browser.close()
344
+
345
+ # Filter out empty entries
346
+ filtered = [r for r in results if r['name']]
347
+
348
+ return {"query": query, "results_count": len(filtered), "results": filtered}
349
+
350
+ except Exception as e:
351
+ await browser.close()
352
+ logger.error(f"Error during Google Maps search scraping: {str(e)}")
353
+ raise HTTPException(status_code=500, detail=f"Search scraping error: {str(e)}")
354
+
355
+
356
+
357
+
358
 
359
 
360
 
test1.py CHANGED
@@ -1,10 +1,5 @@
1
- from fastapi import FastAPI, HTTPException, Query
2
- from pydantic import BaseModel
3
- from playwright.async_api import async_playwright
4
- import asyncio
5
- import base64
6
- import logging
7
- from typing import List, Optional
8
 
9
 
10
  app = FastAPI(title="LeadGen Scraper", description="A Lead generation scraper")
 
1
+ from fastapi import Query, FastAPI
2
+
 
 
 
 
 
3
 
4
 
5
  app = FastAPI(title="LeadGen Scraper", description="A Lead generation scraper")