Spaces:
Sleeping
Sleeping
File size: 937 Bytes
0040dff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import os
from loguru import logger
from .repository import VectorRepository
from .transform import clean, embed, load
class VectorService(VectorRepository):
def __init__(self):
super().__init__()
async def store_file_content_in_db(
self,
filepath: str,
chunk_size: int = 512,
collection_name: str = "knowledgebase",
collection_size: int = 768,
) -> None:
await self.create_collection(collection_name, collection_size)
logger.debug(f"Inserting {filepath} content into database")
async for chunk in load(filepath, chunk_size):
logger.debug(f"Inserting '{chunk[0:20]}...' into database")
embedding_vector = embed(clean(chunk))
filename = os.path.basename(filepath)
await self.create(
collection_name, embedding_vector, chunk, filename
)
vector_service = VectorService() |