meshsl commited on
Commit
5e7e592
·
verified ·
1 Parent(s): ec9a43f

Delete upload_pdf.py

Browse files
Files changed (1) hide show
  1. upload_pdf.py +0 -26
upload_pdf.py DELETED
@@ -1,26 +0,0 @@
1
- import os
2
- from fastapi import FastAPI, File, UploadFile
3
- from langchain_community.document_loaders import PyMuPDFLoader
4
- from langchain.text_splitter import RecursiveCharacterTextSplitter
5
- from langchain_community.embeddings import HuggingFaceEmbeddings
6
- from langchain_community.vectorstores import Chroma
7
-
8
- app = FastAPI()
9
-
10
- @app.post("/upload")
11
- async def upload_pdf(file: UploadFile = File(...)):
12
- file_location = f"uploads/{file.filename}"
13
- with open(file_location, "wb") as f:
14
- f.write(await file.read())
15
-
16
- loader = PyMuPDFLoader(file_location)
17
- docs = loader.load()
18
-
19
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
20
- chunks = splitter.split_documents(docs)
21
-
22
- embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
23
- vectordb = Chroma.from_documents(chunks, embedding, persist_directory="chroma_db")
24
- vectordb.persist()
25
-
26
- return {"message": f"✅ {file.filename} uploaded and processed into vector DB."}