merasabkuch commited on
Commit
b7bc9eb
·
verified ·
1 Parent(s): 3bd6085

Upload 6 files

Browse files
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +13 -0
  3. data/Data.pdf +3 -0
  4. main.py +121 -0
  5. requirements.txt +13 -0
  6. vectors_db/index.faiss +0 -0
  7. vectors_db/index.pkl +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/Data.pdf filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
data/Data.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef945caf75b8219067ce06bd625f8581c60c54d58d071ef8355d9cba9294d84
3
+ size 1378767
main.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from langchain_groq import ChatGroq
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.chains.combine_documents import create_stuff_documents_chain
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain.chains import create_retrieval_chain
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_community.document_loaders import PyPDFDirectoryLoader
10
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
11
+ import time
12
+
13
+ from typing import Union
14
+
15
+ from fastapi import FastAPI
16
+ from pydantic import BaseModel
17
+
18
+ app = FastAPI()
19
+
20
+
21
+ from dotenv import load_dotenv
22
+ import os
23
+ load_dotenv()
24
+
25
+
26
+ @app.get("/")
27
+ def read_root():
28
+ return {"Hello": "World"}
29
+
30
+
31
+ class Query(BaseModel):
32
+ query_text: str
33
+
34
+
35
+ ## load the GROQ And OpenAI API KEY
36
+ groq_api_key=os.getenv('GROQ_API_KEY')
37
+ os.environ["GOOGLE_API_KEY"]=os.getenv("GOOGLE_API_KEY")
38
+
39
+
40
+ llm=ChatGroq(groq_api_key=groq_api_key,
41
+ model_name="Llama3-8b-8192")
42
+
43
+ prompt=ChatPromptTemplate.from_template(
44
+ """
45
+ Answer the questions based on the provided context only.
46
+ Please provide the most accurate response based on the question
47
+ <context>
48
+ {context}
49
+ <context>
50
+ Questions:{input}
51
+
52
+ """
53
+ )
54
+ ## load the GROQ And OpenAI API KEY
55
+
56
+ def vector_embedding():
57
+ embeddings=GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
58
+ loader=PyPDFDirectoryLoader("./data") ## Data Ingestion
59
+ docs=loader.load() ## Document Loading
60
+ text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200) ## Chunk Creation
61
+ final_documents=text_splitter.split_documents(docs[:20]) #splitting
62
+ vectors=FAISS.from_documents(final_documents,embeddings) #vector OpenAI embeddings
63
+ # dump the vectors as pickle file
64
+ vectors.save_local("vectors_db")
65
+
66
+
67
+
68
+
69
+ @app.post("/groq")
70
+ def read_item(query: Query):
71
+ try:
72
+ embeddings=GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
73
+ vectors = FAISS.load_local("vectors_db", embeddings,allow_dangerous_deserialization=True)
74
+ except:
75
+ # vector_embedding()
76
+ # vectors=FAISS.load("vectors.pkl")
77
+ print("Vector Store Not Found run /setup")
78
+ return {"response":"Vector Store Not Found run /setup"}
79
+ # print(vectors)
80
+ prompt1 = query.query_text
81
+ if prompt1:
82
+ start=time.process_time()
83
+ document_chain=create_stuff_documents_chain(llm,prompt)
84
+ retriever=vectors.as_retriever()
85
+ retrieval_chain=create_retrieval_chain(retriever,document_chain)
86
+ response=retrieval_chain.invoke({'input':prompt1})
87
+ print("Response time :",time.process_time()-start)
88
+ return response['answer']
89
+ else:
90
+ return {"response":"No Query Found"}
91
+
92
+
93
+ @app.get("/setup")
94
+ def setup():
95
+ vector_embedding()
96
+ return {"response":"Vector Store DB Is Ready"}
97
+
98
+
99
+
100
+ # if prompt1:
101
+ # document_chain=create_stuff_documents_chain(llm,prompt)
102
+ # # retriever=st.session_state.vectors.as_retriever()
103
+ # retrieval_chain=create_retrieval_chain(retriever,document_chain)
104
+ # start=time.process_time()
105
+ # response=retrieval_chain.invoke({'input':prompt1})
106
+ # print("Response time :",time.process_time()-start)
107
+ # st.write(response['answer'])
108
+
109
+ # # With a streamlit expander
110
+ # with st.expander("Document Similarity Search"):
111
+ # # Find the relevant chunks
112
+ # for i, doc in enumerate(response["context"]):
113
+ # st.write(doc.page_content)
114
+ # st.write("--------------------------------")
115
+
116
+
117
+
118
+
119
+ if __name__ == "__main__":
120
+ import uvicorn
121
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ faiss-cpu
2
+ groq
3
+ langchain-groq
4
+ PyPDF2
5
+ langchain_google_genai
6
+ langchain
7
+ # streamlit
8
+ langchain_community
9
+ python-dotenv
10
+ pypdf
11
+ google-cloud-aiplatform>=1.38
12
+ fastapi
13
+ uvicorn[standard]
vectors_db/index.faiss ADDED
Binary file (230 kB). View file
 
vectors_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e30048f3de2b8bbb4f14bee30bda4e80e2b558bb112aa27fe78e4ba4db61eedb
3
+ size 74109