SakshamLak commited on
Commit
867fb31
·
verified ·
1 Parent(s): 2b03f76

Upload 2 files

Browse files
buffalo_rag/api/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # API Directory
2
+
3
+ This directory contains the FastAPI backend for the UB-VSA project. It includes routes, schemas, and static assets for serving the API endpoints.
buffalo_rag/api/background_tasks.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from buffalo_rag.scraper.scraper import BuffaloScraper
2
+ from buffalo_rag.embeddings.chunker import DocumentChunker
3
+ from buffalo_rag.vector_store.db import VectorStore
4
+ from buffalo_rag.model.rag import BuffaloRAG
5
+
6
+ def run_scraper(seed_url: str, max_pages: int):
7
+ """Run the web scraper in the background."""
8
+ scraper = BuffaloScraper(seed_url=seed_url)
9
+ scraper.scrape(max_pages=max_pages)
10
+
11
+ # After scraping, update the embeddings and index
12
+ chunker = DocumentChunker()
13
+ chunks = chunker.create_chunks()
14
+ chunker.create_embeddings(chunks)
15
+
16
+ # Reload the vector store
17
+ global vector_store
18
+ vector_store = VectorStore()
19
+
20
+ # Update the RAG model
21
+ global rag
22
+ rag = BuffaloRAG(vector_store=vector_store)
23
+
24
+ def refresh_index():
25
+ """Refresh the vector index in the background."""
26
+ chunker = DocumentChunker()
27
+ chunks = chunker.create_chunks()
28
+ chunker.create_embeddings(chunks)
29
+
30
+ # Reload the vector store
31
+ global vector_store
32
+ vector_store = VectorStore()
33
+
34
+ # Update the RAG model
35
+ global rag
36
+ rag = BuffaloRAG(vector_store=vector_store)