import os from fastapi import FastAPI, File, UploadFile from langchain_community.document_loaders import PyMuPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import Chroma app = FastAPI() @app.post("/upload") async def upload_pdf(file: UploadFile = File(...)): file_location = f"uploads/{file.filename}" with open(file_location, "wb") as f: f.write(await file.read()) loader = PyMuPDFLoader(file_location) docs = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chunks = splitter.split_documents(docs) embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vectordb = Chroma.from_documents(chunks, embedding, persist_directory="chroma_db") vectordb.persist() return {"message": f"✅ {file.filename} uploaded and processed into vector DB."}