feat: foo
Browse files- app/app.py +8 -25
app/app.py
CHANGED
@@ -29,35 +29,20 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
|
|
29 |
List[Document]: List of Document(s). Each individual document has two
|
30 |
fields: page_content(string) and metadata(dict).
|
31 |
"""
|
32 |
-
# We only support PDF as input.
|
33 |
if file.type != "application/pdf":
|
34 |
raise TypeError("Only PDF files are supported")
|
35 |
|
36 |
with NamedTemporaryFile() as tempfile:
|
37 |
tempfile.write(file.content)
|
38 |
|
39 |
-
######################################################################
|
40 |
-
# Exercise 1a:
|
41 |
-
# We have the input PDF file saved as a temporary file. The name of
|
42 |
-
# the file is 'tempfile.name'. Please use one of the PDF loaders in
|
43 |
-
# Langchain to load the file.
|
44 |
-
######################################################################
|
45 |
loader = PDFPlumberLoader(tempfile.name)
|
46 |
documents = loader.load()
|
47 |
-
|
48 |
-
|
49 |
-
######################################################################
|
50 |
-
# Exercise 1b:
|
51 |
-
# We can now chunk the documents now it is loaded. Langchain provides
|
52 |
-
# a list of helpful text splitters. Please use one of the splitters
|
53 |
-
# to chunk the file.
|
54 |
-
######################################################################
|
55 |
text_splitter = RecursiveCharacterTextSplitter(
|
56 |
chunk_size=3000,
|
57 |
chunk_overlap=100
|
58 |
)
|
59 |
docs = text_splitter.split_documents(documents)
|
60 |
-
######################################################################
|
61 |
|
62 |
# We are adding source_id into the metadata here to denote which
|
63 |
# source document it is.
|
@@ -72,14 +57,13 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
|
|
72 |
|
73 |
@cl.on_chat_start
|
74 |
async def on_chat_start():
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
######################################################################
|
83 |
files = None
|
84 |
while files is None:
|
85 |
files = await cl.AskFileMessage(
|
@@ -92,7 +76,6 @@ async def on_chat_start():
|
|
92 |
# Send message to user to let them know we are processing the file
|
93 |
msg = cl.Message(content=f"Processing `{file.name}`...")
|
94 |
await msg.send()
|
95 |
-
######################################################################
|
96 |
|
97 |
model = ChatOpenAI(
|
98 |
model="gpt-3.5-turbo-16k-0613",
|
|
|
29 |
List[Document]: List of Document(s). Each individual document has two
|
30 |
fields: page_content(string) and metadata(dict).
|
31 |
"""
|
|
|
32 |
if file.type != "application/pdf":
|
33 |
raise TypeError("Only PDF files are supported")
|
34 |
|
35 |
with NamedTemporaryFile() as tempfile:
|
36 |
tempfile.write(file.content)
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
loader = PDFPlumberLoader(tempfile.name)
|
39 |
documents = loader.load()
|
40 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
text_splitter = RecursiveCharacterTextSplitter(
|
42 |
chunk_size=3000,
|
43 |
chunk_overlap=100
|
44 |
)
|
45 |
docs = text_splitter.split_documents(documents)
|
|
|
46 |
|
47 |
# We are adding source_id into the metadata here to denote which
|
48 |
# source document it is.
|
|
|
57 |
|
58 |
@cl.on_chat_start
|
59 |
async def on_chat_start():
|
60 |
+
"""This function is written to prepare the environments for the chat
|
61 |
+
with PDF application. It should be decorated with cl.on_chat_start.
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
None
|
65 |
+
"""
|
66 |
+
|
|
|
67 |
files = None
|
68 |
while files is None:
|
69 |
files = await cl.AskFileMessage(
|
|
|
76 |
# Send message to user to let them know we are processing the file
|
77 |
msg = cl.Message(content=f"Processing `{file.name}`...")
|
78 |
await msg.send()
|
|
|
79 |
|
80 |
model = ChatOpenAI(
|
81 |
model="gpt-3.5-turbo-16k-0613",
|