|
import os |
|
from fastapi import FastAPI, File, UploadFile |
|
from langchain_community.document_loaders import PyMuPDFLoader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain_community.vectorstores import Chroma |
|
|
|
app = FastAPI() |
|
|
|
@app.post("/upload") |
|
async def upload_pdf(file: UploadFile = File(...)): |
|
file_location = f"uploads/{file.filename}" |
|
with open(file_location, "wb") as f: |
|
f.write(await file.read()) |
|
|
|
loader = PyMuPDFLoader(file_location) |
|
docs = loader.load() |
|
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
chunks = splitter.split_documents(docs) |
|
|
|
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
|
vectordb = Chroma.from_documents(chunks, embedding, persist_directory="chroma_db") |
|
vectordb.persist() |
|
|
|
return {"message": f"β
{file.filename} uploaded and processed into vector DB."} |
|
|