Spaces:

tony346
/

AI_Chat_Llama2

Runtime error

App Files Files Community

qorgh346 commited on Oct 14, 2023

Commit

92aef63

1 Parent(s): a5173f3

update loader module

Browse files

Files changed (1) hide show

app.py +48 -41

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from htmlTemplates import css, bot_template, user_template
 from langchain.llms import HuggingFaceHub, LlamaCpp,CTransformers # For loading transformer models.
-from langchain.document_loaders import PyPDFLoader
 from tempfile import NamedTemporaryFile
 def get_pdf_text(pdf_docs):
     # text = ''
@@ -32,6 +32,52 @@ def get_pdf_text(pdf_docs):
         return pdf_doc
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
@@ -99,45 +145,6 @@ def handle_userinput(user_question):
             st.write(bot_template.replace(
                 "{{MSG}}", message.content), unsafe_allow_html=True)
-def get_text_file(docs):
-    text = docs.read().decode("utf-8")
-    return text
-def get_csv_file(docs):
-    import pandas as pd
-    text = ''
-    data = pd.read_csv(docs)
-    for index, row in data.iterrows():
-        item_name = row[0]
-        row_text = item_name
-        for col_name in data.columns[1:]:
-            row_text += '{} is {} '.format(col_name, row[col_name])
-        text += row_text + '\n'
-    return text
-def get_json_file(docs):
-    import json
-    text = ''
-    # with open(docs, 'r') as f:
-    json_data = json.load(docs)
-    for f_key, f_value in json_data.items():
-        for s_value in f_value:
-            text += str(f_key) + str(s_value)
-        text += '\n'
-    #print(text)
-    return text
-def get_hwp_file(docs):
-    pass
-def get_docs_file(docs):
-    pass
 def main():
     load_dotenv()
     st.set_page_config(page_title="Chat with multiple PDFs",
@@ -170,7 +177,7 @@ def main():
                         raw_text += get_text_file(file)
                     elif file.type in ['application/octet-stream', 'application/pdf']:
                         #file is .pdf
-                        doc_list.append(get_pdf_text(file))
                     elif file.type == 'text/csv':
                         #file is .csv
                         raw_text += get_csv_file(file)

 from langchain.chains import ConversationalRetrievalChain
 from htmlTemplates import css, bot_template, user_template
 from langchain.llms import HuggingFaceHub, LlamaCpp,CTransformers # For loading transformer models.
+from langchain.document_loaders import PyPDFLoader,TextLoader, JSONLoader, CSVLoader
 from tempfile import NamedTemporaryFile
 def get_pdf_text(pdf_docs):
     # text = ''
         return pdf_doc
+def get_text_file(docs):
+    with NamedTemporaryFile() as temp_file:
+        temp_file.write(pdf_docs.getvalue())
+        temp_file.seek(0)
+        text_loader = TextLoader(temp_file.name)
+        text_doc = text_loader.load()
+        return text_doc
+def get_csv_file(docs):
+    import pandas as pd
+    text = ''
+    data = pd.read_csv(docs)
+    for index, row in data.iterrows():
+        item_name = row[0]
+        row_text = item_name
+        for col_name in data.columns[1:]:
+            row_text += '{} is {} '.format(col_name, row[col_name])
+        text += row_text + '\n'
+    return text
+def get_json_file(docs):
+    with NamedTemporaryFile() as temp_file:
+        temp_file.write(docs.getvalue())
+        temp_file.seek(0)
+        json_loader = JSONLoader(temp_file.name)
+        json_doc = json_loader.load()
+        return json_doc
+def get_hwp_file(docs):
+    pass
+def get_docs_file(docs):
+    pass
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
             st.write(bot_template.replace(
                 "{{MSG}}", message.content), unsafe_allow_html=True)
 def main():
     load_dotenv()
     st.set_page_config(page_title="Chat with multiple PDFs",
                         raw_text += get_text_file(file)
                     elif file.type in ['application/octet-stream', 'application/pdf']:
                         #file is .pdf
+                        doc_list.extend(get_pdf_text(file))
                     elif file.type == 'text/csv':
                         #file is .csv
                         raw_text += get_csv_file(file)