lozanopastor commited on
Commit
74390ec
·
verified ·
1 Parent(s): 6498416

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -115
app.py CHANGED
@@ -1,115 +1,119 @@
1
- import streamlit as st
2
- from PyPDF2 import PdfReader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- import os
5
- from langchain_community.embeddings import HuggingFaceEmbeddings # Using Hugging Face embeddings
6
- from langchain.vectorstores import FAISS
7
- from langchain_groq import ChatGroq
8
- from langchain.chains.question_answering import load_qa_chain
9
- from langchain.prompts import PromptTemplate
10
- from dotenv import load_dotenv
11
-
12
- # Load environment variables
13
- load_dotenv()
14
- os.getenv("GROQ_API_KEY")
15
-
16
- def get_pdf_text(pdf_docs):
17
- """Extracts text from uploaded PDF files."""
18
- text = ""
19
- for pdf in pdf_docs:
20
- pdf_reader = PdfReader(pdf)
21
- for page in pdf_reader.pages:
22
- text += page.extract_text()
23
- return text
24
-
25
- def get_text_chunks(text):
26
- """Splits extracted text into manageable chunks."""
27
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
28
- chunks = text_splitter.split_text(text)
29
- return chunks
30
-
31
- def get_vector_store(text_chunks):
32
- """Creates and saves a FAISS vector store from text chunks."""
33
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Using Hugging Face embeddings
34
- vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
35
- vector_store.save_local("faiss_index")
36
-
37
- def get_conversational_chain():
38
- """Sets up a conversational chain using Groq LLM."""
39
- prompt_template = """
40
- Answer the question as detailed as possible from the provided context. If the answer is not in
41
- the provided context, just say, "answer is not available in the context." Do not provide incorrect answers.
42
-
43
- Context:
44
- {context}?
45
-
46
- Question:
47
- {question}
48
-
49
- Answer:
50
- """
51
-
52
- model = ChatGroq(
53
- temperature=0.3,
54
- model_name="deepseek-r1-distill-llama-70b", # Using Mixtral model through Groq
55
- groq_api_key=os.getenv("GROQ_API_KEY")
56
- )
57
- prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
58
- chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
59
- return chain
60
-
61
- def user_input(user_question):
62
- """Handles user queries by retrieving answers from the vector store."""
63
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Using Hugging Face embeddings
64
-
65
- new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
66
- docs = new_db.similarity_search(user_question)
67
-
68
- chain = get_conversational_chain()
69
-
70
- response = chain(
71
- {"input_documents": docs, "question": user_question},
72
- return_only_outputs=True
73
- )
74
-
75
- st.markdown(f"### Reply:\n{response['output_text']}")
76
-
77
- def main():
78
- """Main function to run the Streamlit app."""
79
- st.set_page_config(page_title="Chat PDF", page_icon=":books:", layout="wide")
80
- st.title("Chat with PDF using DeepSeek Ai")
81
-
82
- st.sidebar.header("Upload & Process PDF Files")
83
- st.sidebar.markdown(
84
- "Using DeepSeek R1 model for advanced conversational capabilities.")
85
-
86
- with st.sidebar:
87
- pdf_docs = st.file_uploader(
88
- "Upload your PDF files:",
89
- accept_multiple_files=True,
90
- type=["pdf"]
91
- )
92
- if st.button("Submit & Process"):
93
- with st.spinner("Processing your files..."):
94
- raw_text = get_pdf_text(pdf_docs)
95
- text_chunks = get_text_chunks(raw_text)
96
- get_vector_store(text_chunks)
97
- st.success("PDFs processed and indexed successfully!")
98
-
99
- st.markdown(
100
- "### Ask Questions from Your PDF Files :mag:\n"
101
- "Once you upload and process your PDFs, type your questions below."
102
- )
103
-
104
- user_question = st.text_input("Enter your question:", placeholder="What do you want to know?")
105
-
106
- if user_question:
107
- with st.spinner("Fetching your answer..."):
108
- user_input(user_question)
109
-
110
- st.sidebar.info(
111
- "**Note:** This app uses DeepSeek R1 model for answering questions accurately."
112
- )
113
-
114
- if __name__ == "__main__":
115
- main()
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ import os
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings # Using Hugging Face embeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain_groq import ChatGroq
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from langchain.prompts import PromptTemplate
10
+ from dotenv import load_dotenv
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+ os.getenv("GROQ_API_KEY")
15
+
16
+ def get_pdf_text(pdf_docs):
17
+ """Extracts text from uploaded PDF files."""
18
+ text = ""
19
+ for pdf in pdf_docs:
20
+ pdf_reader = PdfReader(pdf)
21
+ for page in pdf_reader.pages:
22
+ text += page.extract_text()
23
+ return text
24
+
25
+ def get_text_chunks(text):
26
+ """Splits extracted text into manageable chunks."""
27
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
28
+ chunks = text_splitter.split_text(text)
29
+ return chunks
30
+
31
+ def get_vector_store(text_chunks):
32
+ """Creates and saves a FAISS vector store from text chunks."""
33
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Using Hugging Face embeddings
34
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
35
+ vector_store.save_local("faiss_index")
36
+
37
+ def get_conversational_chain():
38
+ """Sets up a conversational chain using Groq LLM."""
39
+ prompt_template = """
40
+ Answer the question as detailed as possible from the provided context. If the answer is not in
41
+ the provided context, just say, "answer is not available in the context." Do not provide incorrect answers.
42
+
43
+ Context:
44
+ {context}?
45
+
46
+ Question:
47
+ {question}
48
+
49
+ Answer:
50
+ """
51
+
52
+ model = ChatGroq(
53
+ temperature=0.3,
54
+ model_name="deepseek-r1-distill-llama-70b", # Using Mixtral model through Groq
55
+ groq_api_key=os.getenv("GROQ_API_KEY")
56
+ )
57
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
58
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
59
+ return chain
60
+
61
+ def user_input(user_question):
62
+ """Handles user queries by retrieving answers from the vector store."""
63
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Using Hugging Face embeddings
64
+
65
+ new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
66
+ docs = new_db.similarity_search(user_question)
67
+
68
+ chain = get_conversational_chain()
69
+
70
+ response = chain(
71
+ {"input_documents": docs, "question": user_question},
72
+ return_only_outputs=True
73
+ )
74
+
75
+ # Display the model's thought process
76
+ with st.expander("Model Thought Process"):
77
+ st.markdown("<think>Thinking...</think>", unsafe_allow_html=True)
78
+
79
+ st.markdown(f"### Reply:\n{response['output_text']}")
80
+
81
+ def main():
82
+ """Main function to run the Streamlit app."""
83
+ st.set_page_config(page_title="Chat PDF", page_icon=":books:", layout="wide")
84
+ st.title("Chat with PDF using DeepSeek Ai")
85
+
86
+ st.sidebar.header("Upload & Process PDF Files")
87
+ st.sidebar.markdown(
88
+ "Using DeepSeek R1 model for advanced conversational capabilities.")
89
+
90
+ with st.sidebar:
91
+ pdf_docs = st.file_uploader(
92
+ "Upload your PDF files:",
93
+ accept_multiple_files=True,
94
+ type=["pdf"]
95
+ )
96
+ if st.button("Submit & Process"):
97
+ with st.spinner("Processing your files..."):
98
+ raw_text = get_pdf_text(pdf_docs)
99
+ text_chunks = get_text_chunks(raw_text)
100
+ get_vector_store(text_chunks)
101
+ st.success("PDFs processed and indexed successfully!")
102
+
103
+ st.markdown(
104
+ "### Ask Questions from Your PDF Files :mag:\n"
105
+ "Once you upload and process your PDFs, type your questions below."
106
+ )
107
+
108
+ user_question = st.text_input("Enter your question:", placeholder="What do you want to know?")
109
+
110
+ if user_question:
111
+ with st.spinner("Fetching your answer..."):
112
+ user_input(user_question)
113
+
114
+ st.sidebar.info(
115
+ "**Note:** This app uses DeepSeek R1 model for answering questions accurately."
116
+ )
117
+
118
+ if __name__ == "__main__":
119
+ main()