from buffalo_rag.scraper.scraper import BuffaloScraper from buffalo_rag.embeddings.chunker import DocumentChunker from buffalo_rag.vector_store.db import VectorStore from buffalo_rag.model.rag import BuffaloRAG def run_scraper(seed_url: str, max_pages: int): """Run the web scraper in the background.""" scraper = BuffaloScraper(seed_url=seed_url) scraper.scrape(max_pages=max_pages) # After scraping, update the embeddings and index chunker = DocumentChunker() chunks = chunker.create_chunks() chunker.create_embeddings(chunks) # Reload the vector store global vector_store vector_store = VectorStore() # Update the RAG model global rag rag = BuffaloRAG(vector_store=vector_store) def refresh_index(): """Refresh the vector index in the background.""" chunker = DocumentChunker() chunks = chunker.create_chunks() chunker.create_embeddings(chunks) # Reload the vector store global vector_store vector_store = VectorStore() # Update the RAG model global rag rag = BuffaloRAG(vector_store=vector_store)