Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,4 @@
|
|
1 |
-
|
2 |
-
Streamlit application for PDF-based Retrieval-Augmented Generation (RAG) using Ollama + LangChain.
|
3 |
-
|
4 |
-
This application allows users to upload a PDF, process it,
|
5 |
-
and then ask questions about the content using a selected language model.
|
6 |
-
"""
|
7 |
-
|
8 |
import streamlit as st
|
9 |
import logging
|
10 |
import os
|
@@ -46,15 +40,7 @@ logger = logging.getLogger(__name__)
|
|
46 |
def extract_model_names(
|
47 |
models_info: Dict[str, List[Dict[str, Any]]],
|
48 |
) -> Tuple[str, ...]:
|
49 |
-
"""
|
50 |
-
Extract model names from the provided models information.
|
51 |
-
|
52 |
-
Args:
|
53 |
-
models_info (Dict[str, List[Dict[str, Any]]]): Dictionary containing information about available models.
|
54 |
-
|
55 |
-
Returns:
|
56 |
-
Tuple[str, ...]: A tuple of model names.
|
57 |
-
"""
|
58 |
logger.info("Extracting model names from models_info")
|
59 |
model_names = tuple(model["name"] for model in models_info["models"])
|
60 |
logger.info(f"Extracted model names: {model_names}")
|
@@ -62,15 +48,7 @@ def extract_model_names(
|
|
62 |
|
63 |
|
64 |
def create_vector_db(file_upload) -> Chroma:
|
65 |
-
"""
|
66 |
-
Create a vector database from an uploaded PDF file.
|
67 |
-
|
68 |
-
Args:
|
69 |
-
file_upload (st.UploadedFile): Streamlit file upload object containing the PDF.
|
70 |
-
|
71 |
-
Returns:
|
72 |
-
Chroma: A vector store containing the processed document chunks.
|
73 |
-
"""
|
74 |
logger.info(f"Creating vector DB from file upload: {file_upload.name}")
|
75 |
temp_dir = tempfile.mkdtemp()
|
76 |
|
@@ -97,19 +75,8 @@ def create_vector_db(file_upload) -> Chroma:
|
|
97 |
|
98 |
|
99 |
def process_question(question: str, vector_db: Chroma, selected_model: str) -> str:
|
100 |
-
"""
|
101 |
-
|
102 |
-
|
103 |
-
Args:
|
104 |
-
question (str): The user's question.
|
105 |
-
vector_db (Chroma): The vector database containing document embeddings.
|
106 |
-
selected_model (str): The name of the selected language model.
|
107 |
-
|
108 |
-
Returns:
|
109 |
-
str: The generated response to the user's question.
|
110 |
-
"""
|
111 |
-
logger.info(f"""Processing question: {
|
112 |
-
question} using model: {selected_model}""")
|
113 |
llm = ChatOllama(model=selected_model, temperature=0)
|
114 |
QUERY_PROMPT = PromptTemplate(
|
115 |
input_variables=["question"],
|
@@ -149,17 +116,8 @@ def process_question(question: str, vector_db: Chroma, selected_model: str) -> s
|
|
149 |
|
150 |
@st.cache_data
|
151 |
def extract_all_pages_as_images(file_upload) -> List[Any]:
|
152 |
-
"""
|
153 |
-
|
154 |
-
|
155 |
-
Args:
|
156 |
-
file_upload (st.UploadedFile): Streamlit file upload object containing the PDF.
|
157 |
-
|
158 |
-
Returns:
|
159 |
-
List[Any]: A list of image objects representing each page of the PDF.
|
160 |
-
"""
|
161 |
-
logger.info(f"""Extracting all pages as images from file: {
|
162 |
-
file_upload.name}""")
|
163 |
pdf_pages = []
|
164 |
with pdfplumber.open(file_upload) as pdf:
|
165 |
pdf_pages = [page.to_image().original for page in pdf.pages]
|
@@ -168,12 +126,7 @@ def extract_all_pages_as_images(file_upload) -> List[Any]:
|
|
168 |
|
169 |
|
170 |
def delete_vector_db(vector_db: Optional[Chroma]) -> None:
|
171 |
-
"""
|
172 |
-
Delete the vector database and clear related session state.
|
173 |
-
|
174 |
-
Args:
|
175 |
-
vector_db (Optional[Chroma]): The vector database to be deleted.
|
176 |
-
"""
|
177 |
logger.info("Deleting vector DB")
|
178 |
if vector_db is not None:
|
179 |
vector_db.delete_collection()
|
@@ -189,12 +142,7 @@ def delete_vector_db(vector_db: Optional[Chroma]) -> None:
|
|
189 |
|
190 |
|
191 |
def main() -> None:
|
192 |
-
"""
|
193 |
-
Main function to run the Streamlit application.
|
194 |
-
|
195 |
-
This function sets up the user interface, handles file uploads,
|
196 |
-
processes user queries, and displays results.
|
197 |
-
"""
|
198 |
st.subheader("🧠 Ollama PDF RAG playground", divider="gray", anchor=False)
|
199 |
|
200 |
models_info = ollama.list()
|
@@ -246,33 +194,4 @@ def main() -> None:
|
|
246 |
with message_container.chat_message(message["role"], avatar=avatar):
|
247 |
st.markdown(message["content"])
|
248 |
|
249 |
-
|
250 |
-
try:
|
251 |
-
st.session_state["messages"].append({"role": "user", "content": prompt})
|
252 |
-
message_container.chat_message("user", avatar="😎").markdown(prompt)
|
253 |
-
|
254 |
-
with message_container.chat_message("assistant", avatar="🤖"):
|
255 |
-
with st.spinner(":green[processing...]"):
|
256 |
-
if st.session_state["vector_db"] is not None:
|
257 |
-
response = process_question(
|
258 |
-
prompt, st.session_state["vector_db"], selected_model
|
259 |
-
)
|
260 |
-
st.markdown(response)
|
261 |
-
else:
|
262 |
-
st.warning("Please upload a PDF file first.")
|
263 |
-
|
264 |
-
if st.session_state["vector_db"] is not None:
|
265 |
-
st.session_state["messages"].append(
|
266 |
-
{"role": "assistant", "content": response}
|
267 |
-
)
|
268 |
-
|
269 |
-
except Exception as e:
|
270 |
-
st.error(e, icon="⛔️")
|
271 |
-
logger.error(f"Error processing prompt: {e}")
|
272 |
-
else:
|
273 |
-
if st.session_state["vector_db"] is None:
|
274 |
-
st.warning("Upload a PDF file to begin chat...")
|
275 |
-
|
276 |
-
|
277 |
-
if __name__ == "__main__":
|
278 |
-
main()
|
|
|
1 |
+
!pip install langchain-community # Install the missing module
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import streamlit as st
|
3 |
import logging
|
4 |
import os
|
|
|
40 |
def extract_model_names(
|
41 |
models_info: Dict[str, List[Dict[str, Any]]],
|
42 |
) -> Tuple[str, ...]:
|
43 |
+
"""Extract model names from the provided models information."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
logger.info("Extracting model names from models_info")
|
45 |
model_names = tuple(model["name"] for model in models_info["models"])
|
46 |
logger.info(f"Extracted model names: {model_names}")
|
|
|
48 |
|
49 |
|
50 |
def create_vector_db(file_upload) -> Chroma:
|
51 |
+
"""Create a vector database from an uploaded PDF file."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
logger.info(f"Creating vector DB from file upload: {file_upload.name}")
|
53 |
temp_dir = tempfile.mkdtemp()
|
54 |
|
|
|
75 |
|
76 |
|
77 |
def process_question(question: str, vector_db: Chroma, selected_model: str) -> str:
|
78 |
+
"""Process a user question using the vector database and selected language model."""
|
79 |
+
logger.info(f"Processing question: {question} using model: {selected_model}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
llm = ChatOllama(model=selected_model, temperature=0)
|
81 |
QUERY_PROMPT = PromptTemplate(
|
82 |
input_variables=["question"],
|
|
|
116 |
|
117 |
@st.cache_data
|
118 |
def extract_all_pages_as_images(file_upload) -> List[Any]:
|
119 |
+
"""Extract all pages from a PDF file as images."""
|
120 |
+
logger.info(f"Extracting all pages as images from file: {file_upload.name}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
pdf_pages = []
|
122 |
with pdfplumber.open(file_upload) as pdf:
|
123 |
pdf_pages = [page.to_image().original for page in pdf.pages]
|
|
|
126 |
|
127 |
|
128 |
def delete_vector_db(vector_db: Optional[Chroma]) -> None:
|
129 |
+
"""Delete the vector database and clear related session state."""
|
|
|
|
|
|
|
|
|
|
|
130 |
logger.info("Deleting vector DB")
|
131 |
if vector_db is not None:
|
132 |
vector_db.delete_collection()
|
|
|
142 |
|
143 |
|
144 |
def main() -> None:
|
145 |
+
"""Main function to run the Streamlit application."""
|
|
|
|
|
|
|
|
|
|
|
146 |
st.subheader("🧠 Ollama PDF RAG playground", divider="gray", anchor=False)
|
147 |
|
148 |
models_info = ollama.list()
|
|
|
194 |
with message_container.chat_message(message["role"], avatar=avatar):
|
195 |
st.markdown(message["content"])
|
196 |
|
197 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|