Spaces:
Runtime error
Runtime error
qorgh346
commited on
Commit
·
dd9ce97
1
Parent(s):
7af777c
update app.py
Browse files
app.py
CHANGED
|
@@ -10,16 +10,26 @@ from langchain.memory import ConversationBufferMemory
|
|
| 10 |
from langchain.chains import ConversationalRetrievalChain
|
| 11 |
from htmlTemplates import css, bot_template, user_template
|
| 12 |
from langchain.llms import HuggingFaceHub, LlamaCpp,CTransformers # For loading transformer models.
|
| 13 |
-
|
|
|
|
| 14 |
def get_pdf_text(pdf_docs):
|
| 15 |
-
text = ''
|
| 16 |
# pdf_file_ = open(pdf_docs,'rb')
|
| 17 |
# text = "example hofjin"
|
| 18 |
-
pdf_reader = PdfReader(pdf_docs)
|
| 19 |
-
for page in pdf_reader.pages:
|
| 20 |
-
text += page.extract_text()
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def get_text_chunks(text):
|
|
@@ -151,7 +161,7 @@ def main():
|
|
| 151 |
if st.button("Process"):
|
| 152 |
with st.spinner("Processing"):
|
| 153 |
# get pdf text
|
| 154 |
-
|
| 155 |
|
| 156 |
for file in docs:
|
| 157 |
print('file - type : ', file.type)
|
|
@@ -160,7 +170,7 @@ def main():
|
|
| 160 |
raw_text += get_text_file(file)
|
| 161 |
elif file.type in ['application/octet-stream', 'application/pdf']:
|
| 162 |
#file is .pdf
|
| 163 |
-
|
| 164 |
elif file.type == 'text/csv':
|
| 165 |
#file is .csv
|
| 166 |
raw_text += get_csv_file(file)
|
|
|
|
| 10 |
from langchain.chains import ConversationalRetrievalChain
|
| 11 |
from htmlTemplates import css, bot_template, user_template
|
| 12 |
from langchain.llms import HuggingFaceHub, LlamaCpp,CTransformers # For loading transformer models.
|
| 13 |
+
from langchain.document_loaders import PyPDFLoader
|
| 14 |
+
from tempfile import NamedTemporaryFile
|
| 15 |
def get_pdf_text(pdf_docs):
|
| 16 |
+
# text = ''
|
| 17 |
# pdf_file_ = open(pdf_docs,'rb')
|
| 18 |
# text = "example hofjin"
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
|
| 21 |
+
# for page in pdf_reader.pages:
|
| 22 |
+
# text += page.extract_text()
|
| 23 |
+
|
| 24 |
+
# return text
|
| 25 |
+
with NamedTemporaryFile() as temp_file:
|
| 26 |
+
temp_file.write(pdf_docs.getvalue())
|
| 27 |
+
temp_file.seek(0)
|
| 28 |
+
pdf_loader = PyPDFLoader(temp_file.name)
|
| 29 |
+
print('pdf_loader = ', pdf_loader)
|
| 30 |
+
pdf_doc = pdf_loader.load()
|
| 31 |
+
print('pdf_doc = ',pdf_doc)
|
| 32 |
+
return pdf_doc
|
| 33 |
|
| 34 |
|
| 35 |
def get_text_chunks(text):
|
|
|
|
| 161 |
if st.button("Process"):
|
| 162 |
with st.spinner("Processing"):
|
| 163 |
# get pdf text
|
| 164 |
+
doc_list = []
|
| 165 |
|
| 166 |
for file in docs:
|
| 167 |
print('file - type : ', file.type)
|
|
|
|
| 170 |
raw_text += get_text_file(file)
|
| 171 |
elif file.type in ['application/octet-stream', 'application/pdf']:
|
| 172 |
#file is .pdf
|
| 173 |
+
doc_list.append(get_pdf_text(file))
|
| 174 |
elif file.type == 'text/csv':
|
| 175 |
#file is .csv
|
| 176 |
raw_text += get_csv_file(file)
|