Spaces:
Running
Running
import os | |
from loguru import logger | |
from .repository import VectorRepository | |
from .transform import clean, embed, load | |
class VectorService(VectorRepository): | |
def __init__(self): | |
super().__init__() | |
async def store_file_content_in_db( | |
self, | |
filepath: str, | |
chunk_size: int = 512, | |
collection_name: str = "knowledgebase", | |
collection_size: int = 768, | |
) -> None: | |
await self.create_collection(collection_name, collection_size) | |
logger.debug(f"Inserting {filepath} content into database") | |
async for chunk in load(filepath, chunk_size): | |
logger.debug(f"Inserting '{chunk[0:20]}...' into database") | |
embedding_vector = embed(clean(chunk)) | |
filename = os.path.basename(filepath) | |
await self.create( | |
collection_name, embedding_vector, chunk, filename | |
) | |
vector_service = VectorService() |