HanLee commited on
Commit
adb96e6
·
1 Parent(s): 2acbc36
Files changed (1) hide show
  1. app/app.py +8 -25
app/app.py CHANGED
@@ -29,35 +29,20 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
29
  List[Document]: List of Document(s). Each individual document has two
30
  fields: page_content(string) and metadata(dict).
31
  """
32
- # We only support PDF as input.
33
  if file.type != "application/pdf":
34
  raise TypeError("Only PDF files are supported")
35
 
36
  with NamedTemporaryFile() as tempfile:
37
  tempfile.write(file.content)
38
 
39
- ######################################################################
40
- # Exercise 1a:
41
- # We have the input PDF file saved as a temporary file. The name of
42
- # the file is 'tempfile.name'. Please use one of the PDF loaders in
43
- # Langchain to load the file.
44
- ######################################################################
45
  loader = PDFPlumberLoader(tempfile.name)
46
  documents = loader.load()
47
- ######################################################################
48
-
49
- ######################################################################
50
- # Exercise 1b:
51
- # We can now chunk the documents now it is loaded. Langchain provides
52
- # a list of helpful text splitters. Please use one of the splitters
53
- # to chunk the file.
54
- ######################################################################
55
  text_splitter = RecursiveCharacterTextSplitter(
56
  chunk_size=3000,
57
  chunk_overlap=100
58
  )
59
  docs = text_splitter.split_documents(documents)
60
- ######################################################################
61
 
62
  # We are adding source_id into the metadata here to denote which
63
  # source document it is.
@@ -72,14 +57,13 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
72
 
73
  @cl.on_chat_start
74
  async def on_chat_start():
75
- ######################################################################
76
- # Exercise 1c:
77
- # At the start of our Chat with PDF app, we will first ask users to
78
- # upload the PDF file they want to ask questions against.
79
- #
80
- # Please use Chainlit's AskFileMessage and get the file from users.
81
- # Note for this course, we only want to deal with one single file.
82
- ######################################################################
83
  files = None
84
  while files is None:
85
  files = await cl.AskFileMessage(
@@ -92,7 +76,6 @@ async def on_chat_start():
92
  # Send message to user to let them know we are processing the file
93
  msg = cl.Message(content=f"Processing `{file.name}`...")
94
  await msg.send()
95
- ######################################################################
96
 
97
  model = ChatOpenAI(
98
  model="gpt-3.5-turbo-16k-0613",
 
29
  List[Document]: List of Document(s). Each individual document has two
30
  fields: page_content(string) and metadata(dict).
31
  """
 
32
  if file.type != "application/pdf":
33
  raise TypeError("Only PDF files are supported")
34
 
35
  with NamedTemporaryFile() as tempfile:
36
  tempfile.write(file.content)
37
 
 
 
 
 
 
 
38
  loader = PDFPlumberLoader(tempfile.name)
39
  documents = loader.load()
40
+
 
 
 
 
 
 
 
41
  text_splitter = RecursiveCharacterTextSplitter(
42
  chunk_size=3000,
43
  chunk_overlap=100
44
  )
45
  docs = text_splitter.split_documents(documents)
 
46
 
47
  # We are adding source_id into the metadata here to denote which
48
  # source document it is.
 
57
 
58
  @cl.on_chat_start
59
  async def on_chat_start():
60
+ """This function is written to prepare the environments for the chat
61
+ with PDF application. It should be decorated with cl.on_chat_start.
62
+
63
+ Returns:
64
+ None
65
+ """
66
+
 
67
  files = None
68
  while files is None:
69
  files = await cl.AskFileMessage(
 
76
  # Send message to user to let them know we are processing the file
77
  msg = cl.Message(content=f"Processing `{file.name}`...")
78
  await msg.send()
 
79
 
80
  model = ChatOpenAI(
81
  model="gpt-3.5-turbo-16k-0613",