Spaces:

HanLee
/

Demo

Sleeping

App Files Files Community

HanLee commited on Nov 26, 2023

Commit

adb96e6

1 Parent(s): 2acbc36

feat: foo

Browse files

Files changed (1) hide show

app/app.py +8 -25

app/app.py CHANGED Viewed

@@ -29,35 +29,20 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
         List[Document]: List of Document(s). Each individual document has two
         fields: page_content(string) and metadata(dict).
     """
-    # We only support PDF as input.
     if file.type != "application/pdf":
         raise TypeError("Only PDF files are supported")
     with NamedTemporaryFile() as tempfile:
         tempfile.write(file.content)
-        ######################################################################
-        # Exercise 1a:
-        # We have the input PDF file saved as a temporary file. The name of
-        # the file is 'tempfile.name'. Please use one of the PDF loaders in
-        # Langchain to load the file.
-        ######################################################################
         loader = PDFPlumberLoader(tempfile.name)
         documents = loader.load()
-        ######################################################################
-        ######################################################################
-        # Exercise 1b:
-        # We can now chunk the documents now it is loaded. Langchain provides
-        # a list of helpful text splitters. Please use one of the splitters
-        # to chunk the file.
-        ######################################################################
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=3000,
             chunk_overlap=100
         )
         docs = text_splitter.split_documents(documents)
-        ######################################################################
         # We are adding source_id into the metadata here to denote which
         # source document it is.
@@ -72,14 +57,13 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
 @cl.on_chat_start
 async def on_chat_start():
-    ######################################################################
-    # Exercise 1c:
-    # At the start of our Chat with PDF app, we will first ask users to
-    # upload the PDF file they want to ask questions against.
-    #
-    # Please use Chainlit's AskFileMessage and get the file from users.
-    # Note for this course, we only want to deal with one single file.
-    ######################################################################
     files = None
     while files is None:
         files = await cl.AskFileMessage(
@@ -92,7 +76,6 @@ async def on_chat_start():
     # Send message to user to let them know we are processing the file
     msg = cl.Message(content=f"Processing `{file.name}`...")
     await msg.send()
-    ######################################################################
     model = ChatOpenAI(
         model="gpt-3.5-turbo-16k-0613",

         List[Document]: List of Document(s). Each individual document has two
         fields: page_content(string) and metadata(dict).
     """
     if file.type != "application/pdf":
         raise TypeError("Only PDF files are supported")
     with NamedTemporaryFile() as tempfile:
         tempfile.write(file.content)
         loader = PDFPlumberLoader(tempfile.name)
         documents = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=3000,
             chunk_overlap=100
         )
         docs = text_splitter.split_documents(documents)
         # We are adding source_id into the metadata here to denote which
         # source document it is.
 @cl.on_chat_start
 async def on_chat_start():
+    """This function is written to prepare the environments for the chat
+    with PDF application. It should be decorated with cl.on_chat_start.
+    Returns:
+        None
+    """
     files = None
     while files is None:
         files = await cl.AskFileMessage(
     # Send message to user to let them know we are processing the file
     msg = cl.Message(content=f"Processing `{file.name}`...")
     await msg.send()
     model = ChatOpenAI(
         model="gpt-3.5-turbo-16k-0613",