Update app.py
Browse files
app.py
CHANGED
|
@@ -55,10 +55,10 @@ st.write("---------------------------------")
|
|
| 55 |
|
| 56 |
st.write("LIST OF ALL THE LOADED DOCUMENTS: ")
|
| 57 |
st.write("")
|
| 58 |
-
|
| 59 |
-
word_files = glob.glob("*.docx")
|
| 60 |
-
|
| 61 |
-
for file in word_files:
|
| 62 |
st.write(file)
|
| 63 |
|
| 64 |
st.write("---------------------------------")
|
|
@@ -153,10 +153,10 @@ if "vector" not in st.session_state:
|
|
| 153 |
# Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': 'fake.docx', 'filename': 'fake.docx', 'category': 'Title'}, lookup_index=0)
|
| 154 |
#
|
| 155 |
# 2A - Using Unstructured
|
| 156 |
-
from langchain_community.document_loaders import UnstructuredWordDocumentLoader
|
| 157 |
-
loader = UnstructuredWordDocumentLoader(path, glob="**/*.docx")
|
| 158 |
-
docs = loader.load()
|
| 159 |
-
st.session_state.docs = docs
|
| 160 |
|
| 161 |
|
| 162 |
|
|
|
|
| 55 |
|
| 56 |
st.write("LIST OF ALL THE LOADED DOCUMENTS: ")
|
| 57 |
st.write("")
|
| 58 |
+
pdf_files = glob.glob("*.pdf")
|
| 59 |
+
# word_files = glob.glob("*.docx")
|
| 60 |
+
for file in pdf_files:
|
| 61 |
+
# for file in word_files:
|
| 62 |
st.write(file)
|
| 63 |
|
| 64 |
st.write("---------------------------------")
|
|
|
|
| 153 |
# Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': 'fake.docx', 'filename': 'fake.docx', 'category': 'Title'}, lookup_index=0)
|
| 154 |
#
|
| 155 |
# 2A - Using Unstructured
|
| 156 |
+
# from langchain_community.document_loaders import UnstructuredWordDocumentLoader
|
| 157 |
+
# loader = UnstructuredWordDocumentLoader(path, glob="**/*.docx")
|
| 158 |
+
# docs = loader.load()
|
| 159 |
+
# st.session_state.docs = docs
|
| 160 |
|
| 161 |
|
| 162 |
|