shallou commited on
Commit
427863b
·
verified ·
1 Parent(s): dcf7dc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -91
app.py CHANGED
@@ -1,10 +1,4 @@
1
- """
2
- Streamlit application for PDF-based Retrieval-Augmented Generation (RAG) using Ollama + LangChain.
3
-
4
- This application allows users to upload a PDF, process it,
5
- and then ask questions about the content using a selected language model.
6
- """
7
-
8
  import streamlit as st
9
  import logging
10
  import os
@@ -46,15 +40,7 @@ logger = logging.getLogger(__name__)
46
  def extract_model_names(
47
  models_info: Dict[str, List[Dict[str, Any]]],
48
  ) -> Tuple[str, ...]:
49
- """
50
- Extract model names from the provided models information.
51
-
52
- Args:
53
- models_info (Dict[str, List[Dict[str, Any]]]): Dictionary containing information about available models.
54
-
55
- Returns:
56
- Tuple[str, ...]: A tuple of model names.
57
- """
58
  logger.info("Extracting model names from models_info")
59
  model_names = tuple(model["name"] for model in models_info["models"])
60
  logger.info(f"Extracted model names: {model_names}")
@@ -62,15 +48,7 @@ def extract_model_names(
62
 
63
 
64
  def create_vector_db(file_upload) -> Chroma:
65
- """
66
- Create a vector database from an uploaded PDF file.
67
-
68
- Args:
69
- file_upload (st.UploadedFile): Streamlit file upload object containing the PDF.
70
-
71
- Returns:
72
- Chroma: A vector store containing the processed document chunks.
73
- """
74
  logger.info(f"Creating vector DB from file upload: {file_upload.name}")
75
  temp_dir = tempfile.mkdtemp()
76
 
@@ -97,19 +75,8 @@ def create_vector_db(file_upload) -> Chroma:
97
 
98
 
99
  def process_question(question: str, vector_db: Chroma, selected_model: str) -> str:
100
- """
101
- Process a user question using the vector database and selected language model.
102
-
103
- Args:
104
- question (str): The user's question.
105
- vector_db (Chroma): The vector database containing document embeddings.
106
- selected_model (str): The name of the selected language model.
107
-
108
- Returns:
109
- str: The generated response to the user's question.
110
- """
111
- logger.info(f"""Processing question: {
112
- question} using model: {selected_model}""")
113
  llm = ChatOllama(model=selected_model, temperature=0)
114
  QUERY_PROMPT = PromptTemplate(
115
  input_variables=["question"],
@@ -149,17 +116,8 @@ def process_question(question: str, vector_db: Chroma, selected_model: str) -> s
149
 
150
  @st.cache_data
151
  def extract_all_pages_as_images(file_upload) -> List[Any]:
152
- """
153
- Extract all pages from a PDF file as images.
154
-
155
- Args:
156
- file_upload (st.UploadedFile): Streamlit file upload object containing the PDF.
157
-
158
- Returns:
159
- List[Any]: A list of image objects representing each page of the PDF.
160
- """
161
- logger.info(f"""Extracting all pages as images from file: {
162
- file_upload.name}""")
163
  pdf_pages = []
164
  with pdfplumber.open(file_upload) as pdf:
165
  pdf_pages = [page.to_image().original for page in pdf.pages]
@@ -168,12 +126,7 @@ def extract_all_pages_as_images(file_upload) -> List[Any]:
168
 
169
 
170
  def delete_vector_db(vector_db: Optional[Chroma]) -> None:
171
- """
172
- Delete the vector database and clear related session state.
173
-
174
- Args:
175
- vector_db (Optional[Chroma]): The vector database to be deleted.
176
- """
177
  logger.info("Deleting vector DB")
178
  if vector_db is not None:
179
  vector_db.delete_collection()
@@ -189,12 +142,7 @@ def delete_vector_db(vector_db: Optional[Chroma]) -> None:
189
 
190
 
191
  def main() -> None:
192
- """
193
- Main function to run the Streamlit application.
194
-
195
- This function sets up the user interface, handles file uploads,
196
- processes user queries, and displays results.
197
- """
198
  st.subheader("🧠 Ollama PDF RAG playground", divider="gray", anchor=False)
199
 
200
  models_info = ollama.list()
@@ -246,33 +194,4 @@ def main() -> None:
246
  with message_container.chat_message(message["role"], avatar=avatar):
247
  st.markdown(message["content"])
248
 
249
- if prompt := st.chat_input("Enter a prompt here..."):
250
- try:
251
- st.session_state["messages"].append({"role": "user", "content": prompt})
252
- message_container.chat_message("user", avatar="😎").markdown(prompt)
253
-
254
- with message_container.chat_message("assistant", avatar="🤖"):
255
- with st.spinner(":green[processing...]"):
256
- if st.session_state["vector_db"] is not None:
257
- response = process_question(
258
- prompt, st.session_state["vector_db"], selected_model
259
- )
260
- st.markdown(response)
261
- else:
262
- st.warning("Please upload a PDF file first.")
263
-
264
- if st.session_state["vector_db"] is not None:
265
- st.session_state["messages"].append(
266
- {"role": "assistant", "content": response}
267
- )
268
-
269
- except Exception as e:
270
- st.error(e, icon="⛔️")
271
- logger.error(f"Error processing prompt: {e}")
272
- else:
273
- if st.session_state["vector_db"] is None:
274
- st.warning("Upload a PDF file to begin chat...")
275
-
276
-
277
- if __name__ == "__main__":
278
- main()
 
1
+ !pip install langchain-community # Install the missing module
 
 
 
 
 
 
2
  import streamlit as st
3
  import logging
4
  import os
 
40
  def extract_model_names(
41
  models_info: Dict[str, List[Dict[str, Any]]],
42
  ) -> Tuple[str, ...]:
43
+ """Extract model names from the provided models information."""
 
 
 
 
 
 
 
 
44
  logger.info("Extracting model names from models_info")
45
  model_names = tuple(model["name"] for model in models_info["models"])
46
  logger.info(f"Extracted model names: {model_names}")
 
48
 
49
 
50
  def create_vector_db(file_upload) -> Chroma:
51
+ """Create a vector database from an uploaded PDF file."""
 
 
 
 
 
 
 
 
52
  logger.info(f"Creating vector DB from file upload: {file_upload.name}")
53
  temp_dir = tempfile.mkdtemp()
54
 
 
75
 
76
 
77
  def process_question(question: str, vector_db: Chroma, selected_model: str) -> str:
78
+ """Process a user question using the vector database and selected language model."""
79
+ logger.info(f"Processing question: {question} using model: {selected_model}")
 
 
 
 
 
 
 
 
 
 
 
80
  llm = ChatOllama(model=selected_model, temperature=0)
81
  QUERY_PROMPT = PromptTemplate(
82
  input_variables=["question"],
 
116
 
117
  @st.cache_data
118
  def extract_all_pages_as_images(file_upload) -> List[Any]:
119
+ """Extract all pages from a PDF file as images."""
120
+ logger.info(f"Extracting all pages as images from file: {file_upload.name}")
 
 
 
 
 
 
 
 
 
121
  pdf_pages = []
122
  with pdfplumber.open(file_upload) as pdf:
123
  pdf_pages = [page.to_image().original for page in pdf.pages]
 
126
 
127
 
128
  def delete_vector_db(vector_db: Optional[Chroma]) -> None:
129
+ """Delete the vector database and clear related session state."""
 
 
 
 
 
130
  logger.info("Deleting vector DB")
131
  if vector_db is not None:
132
  vector_db.delete_collection()
 
142
 
143
 
144
  def main() -> None:
145
+ """Main function to run the Streamlit application."""
 
 
 
 
 
146
  st.subheader("🧠 Ollama PDF RAG playground", divider="gray", anchor=False)
147
 
148
  models_info = ollama.list()
 
194
  with message_container.chat_message(message["role"], avatar=avatar):
195
  st.markdown(message["content"])
196
 
197
+