Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,8 @@ import os
|
|
| 10 |
import streamlit as st
|
| 11 |
from langchain_groq import ChatGroq
|
| 12 |
from langchain_community.document_loaders import WebBaseLoader
|
|
|
|
|
|
|
| 13 |
from langchain_community.embeddings import OllamaEmbeddings
|
| 14 |
|
| 15 |
# JB:
|
|
@@ -39,8 +41,14 @@ if "vector" not in st.session_state:
|
|
| 39 |
st.session_state.embeddings = FastEmbedEmbeddings() # JB
|
| 40 |
|
| 41 |
|
| 42 |
-
st.session_state.loader = WebBaseLoader("https://paulgraham.com/greatwork.html")
|
| 43 |
-
st.session_state.docs = st.session_state.loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 46 |
st.session_state.documents = st.session_state.text_splitter.split_documents( st.session_state.docs)
|
|
|
|
| 10 |
import streamlit as st
|
| 11 |
from langchain_groq import ChatGroq
|
| 12 |
from langchain_community.document_loaders import WebBaseLoader
|
| 13 |
+
# JB:
|
| 14 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 15 |
from langchain_community.embeddings import OllamaEmbeddings
|
| 16 |
|
| 17 |
# JB:
|
|
|
|
| 41 |
st.session_state.embeddings = FastEmbedEmbeddings() # JB
|
| 42 |
|
| 43 |
|
| 44 |
+
# st.session_state.loader = WebBaseLoader("https://paulgraham.com/greatwork.html") # ORIGINAL
|
| 45 |
+
# st.session_state.docs = st.session_state.loader.load() # ORIGINAL
|
| 46 |
+
pdf_file_path = "" # JB
|
| 47 |
+
st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load() # JB
|
| 48 |
+
# chunks = self.text_splitter.split_documents(docs)
|
| 49 |
+
# chunks = filter_complex_metadata(chunks)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
|
| 53 |
st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 54 |
st.session_state.documents = st.session_state.text_splitter.split_documents( st.session_state.docs)
|