import os # Specify the LLM model to use. You can choose any LLM supported by LiteLLM. # Example options include "gpt-4o", "claude", "deepseek-chat", etc. # For a full list of supported models, refer to: # https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json #LLM_MODEL = "groq/llama3-8b-8192" #LLM_MODEL= "gemini-2.5-pro-exp-03-25" LLM_MODEL ="openai/gpt-4o-mini" # API token for authentication with the LLM provider. # This is fetched from the environment variable "GEMINI_API_KEY". #PI_TOKEN = os.getenv("GEMINI_API_KEY") #API_TOKEN="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" #API_TOKEN="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" import os API_TOKEN= os.getenv("OPENAI_API_KEY") # Base URL of the website to scrape. # In this example, we are scraping Yellow Pages for dentists in Toronto, ON. # You can modify the URL to change the location or the type of business. # Example: # - For plumbers in Vancouver: "https://www.yellowpages.ca/search/si/{page_number}/Plumbers/Vancouver+BC" # - For restaurants in Montreal: "https://www.yellowpages.ca/search/si/{page_number}/Restaurants/Montreal+QC" BASE_URL = "https://gentledental.ai/" # CSS selector to target the main HTML element containing the business information. # This is specific to Yellow Pages and helps focus the scraper on relevant content # instead of sending the entire HTML page to the LLM. # CSS_SELECTOR = "[class^='listing_right_section']" CSS_SELECTOR = "" # Maximum number of pages to crawl. Adjust this value based on how much data you want to scrape. MAX_PAGES = 3 # Example: Set to 5 to scrape 5 pages. # Instructions for the LLM on what information to extract from the scraped content. # The LLM will extract the following details for each business: # - Name # - Address # - Website # - Phone number # - A one-sentence description SCRAPER_INSTRUCTIONS = ( "Extract all business information: 'name', 'address', 'website'" ", 'phone number' and a one-sentence 'description' from the following content." )