feat: final
Browse files- .chainlit/config.toml +78 -0
- .env.sample +2 -0
- .gitignore +166 -0
- .vscode/settings.json +0 -1
- app/app.py +207 -0
- app/prompt.py +26 -0
- chainlit.md +8 -0
- requirements.txt +8 -0
- sample_pdf/NVDA 2QFY24.pdf +0 -0
.chainlit/config.toml
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
# Whether to enable telemetry (default: true). No personal data is collected.
|
3 |
+
enable_telemetry = true
|
4 |
+
|
5 |
+
# List of environment variables to be provided by each user to use the app.
|
6 |
+
user_env = []
|
7 |
+
|
8 |
+
# Duration (in seconds) during which the session is saved when the connection is lost
|
9 |
+
session_timeout = 3600
|
10 |
+
|
11 |
+
# Enable third parties caching (e.g LangChain cache)
|
12 |
+
cache = false
|
13 |
+
|
14 |
+
# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
|
15 |
+
# follow_symlink = false
|
16 |
+
|
17 |
+
[features]
|
18 |
+
# Show the prompt playground
|
19 |
+
prompt_playground = true
|
20 |
+
|
21 |
+
# Authorize users to upload files with messages
|
22 |
+
multi_modal = true
|
23 |
+
|
24 |
+
# Allows user to use speech to text
|
25 |
+
[features.speech_to_text]
|
26 |
+
enabled = false
|
27 |
+
# See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
|
28 |
+
# language = "en-US"
|
29 |
+
|
30 |
+
[UI]
|
31 |
+
# Name of the app and chatbot.
|
32 |
+
name = "Chatbot"
|
33 |
+
|
34 |
+
# Show the readme while the conversation is empty.
|
35 |
+
show_readme_as_default = true
|
36 |
+
|
37 |
+
# Description of the app and chatbot. This is used for HTML tags.
|
38 |
+
# description = ""
|
39 |
+
|
40 |
+
# Large size content are by default collapsed for a cleaner ui
|
41 |
+
default_collapse_content = true
|
42 |
+
|
43 |
+
# The default value for the expand messages settings.
|
44 |
+
default_expand_messages = false
|
45 |
+
|
46 |
+
# Hide the chain of thought details from the user in the UI.
|
47 |
+
hide_cot = false
|
48 |
+
|
49 |
+
# Link to your github repo. This will add a github button in the UI's header.
|
50 |
+
github = "https://github.com/LinkedInLearning/hands-on-ai-building-and-deploying-llm-powered-apps-4511409"
|
51 |
+
|
52 |
+
# Specify a CSS file that can be used to customize the user interface.
|
53 |
+
# The CSS file can be served from the public directory or via an external link.
|
54 |
+
# custom_css = "/public/test.css"
|
55 |
+
|
56 |
+
# Override default MUI light theme. (Check theme.ts)
|
57 |
+
[UI.theme.light]
|
58 |
+
#background = "#FAFAFA"
|
59 |
+
#paper = "#FFFFFF"
|
60 |
+
|
61 |
+
[UI.theme.light.primary]
|
62 |
+
#main = "#F80061"
|
63 |
+
#dark = "#980039"
|
64 |
+
#light = "#FFE7EB"
|
65 |
+
|
66 |
+
# Override default MUI dark theme. (Check theme.ts)
|
67 |
+
[UI.theme.dark]
|
68 |
+
#background = "#FAFAFA"
|
69 |
+
#paper = "#FFFFFF"
|
70 |
+
|
71 |
+
[UI.theme.dark.primary]
|
72 |
+
#main = "#F80061"
|
73 |
+
#dark = "#980039"
|
74 |
+
#light = "#FFE7EB"
|
75 |
+
|
76 |
+
|
77 |
+
[meta]
|
78 |
+
generated_by = "0.7.501"
|
.env.sample
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
ALLOW_RESET=TRUE
|
2 |
+
OPENAI_API_KEY="sk-your-openai-api-key"
|
.gitignore
CHANGED
@@ -1,4 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
.DS_Store
|
2 |
node_modules
|
3 |
.tmp
|
4 |
npm-debug.log
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ruff
|
2 |
+
.ruff_cache/
|
3 |
+
|
4 |
+
# Chainlit
|
5 |
+
.chainlit/.langchain.db
|
6 |
+
|
7 |
+
# Chroma
|
8 |
+
.chromadb/
|
9 |
+
|
10 |
.DS_Store
|
11 |
node_modules
|
12 |
.tmp
|
13 |
npm-debug.log
|
14 |
+
|
15 |
+
# VSCode
|
16 |
+
.vscode/
|
17 |
+
|
18 |
+
# Byte-compiled / optimized / DLL files
|
19 |
+
__pycache__/
|
20 |
+
*.py[cod]
|
21 |
+
*$py.class
|
22 |
+
|
23 |
+
# C extensions
|
24 |
+
*.so
|
25 |
+
|
26 |
+
# Distribution / packaging
|
27 |
+
.Python
|
28 |
+
build/
|
29 |
+
develop-eggs/
|
30 |
+
dist/
|
31 |
+
downloads/
|
32 |
+
eggs/
|
33 |
+
.eggs/
|
34 |
+
lib/
|
35 |
+
lib64/
|
36 |
+
parts/
|
37 |
+
sdist/
|
38 |
+
var/
|
39 |
+
wheels/
|
40 |
+
share/python-wheels/
|
41 |
+
*.egg-info/
|
42 |
+
.installed.cfg
|
43 |
+
*.egg
|
44 |
+
MANIFEST
|
45 |
+
|
46 |
+
# PyInstaller
|
47 |
+
# Usually these files are written by a python script from a template
|
48 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
49 |
+
*.manifest
|
50 |
+
*.spec
|
51 |
+
|
52 |
+
# Installer logs
|
53 |
+
pip-log.txt
|
54 |
+
pip-delete-this-directory.txt
|
55 |
+
|
56 |
+
# Unit test / coverage reports
|
57 |
+
htmlcov/
|
58 |
+
.tox/
|
59 |
+
.nox/
|
60 |
+
.coverage
|
61 |
+
.coverage.*
|
62 |
+
.cache
|
63 |
+
nosetests.xml
|
64 |
+
coverage.xml
|
65 |
+
*.cover
|
66 |
+
*.py,cover
|
67 |
+
.hypothesis/
|
68 |
+
.pytest_cache/
|
69 |
+
cover/
|
70 |
+
|
71 |
+
# Translations
|
72 |
+
*.mo
|
73 |
+
*.pot
|
74 |
+
|
75 |
+
# Django stuff:
|
76 |
+
*.log
|
77 |
+
local_settings.py
|
78 |
+
db.sqlite3
|
79 |
+
db.sqlite3-journal
|
80 |
+
|
81 |
+
# Flask stuff:
|
82 |
+
instance/
|
83 |
+
.webassets-cache
|
84 |
+
|
85 |
+
# Scrapy stuff:
|
86 |
+
.scrapy
|
87 |
+
|
88 |
+
# Sphinx documentation
|
89 |
+
docs/_build/
|
90 |
+
|
91 |
+
# PyBuilder
|
92 |
+
.pybuilder/
|
93 |
+
target/
|
94 |
+
|
95 |
+
# Jupyter Notebook
|
96 |
+
.ipynb_checkpoints
|
97 |
+
|
98 |
+
# IPython
|
99 |
+
profile_default/
|
100 |
+
ipython_config.py
|
101 |
+
|
102 |
+
# pyenv
|
103 |
+
# For a library or package, you might want to ignore these files since the code is
|
104 |
+
# intended to run in multiple environments; otherwise, check them in:
|
105 |
+
# .python-version
|
106 |
+
|
107 |
+
# pipenv
|
108 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
109 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
110 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
111 |
+
# install all needed dependencies.
|
112 |
+
#Pipfile.lock
|
113 |
+
|
114 |
+
# poetry
|
115 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
116 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
117 |
+
# commonly ignored for libraries.
|
118 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
119 |
+
#poetry.lock
|
120 |
+
|
121 |
+
# pdm
|
122 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
123 |
+
#pdm.lock
|
124 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
125 |
+
# in version control.
|
126 |
+
# https://pdm.fming.dev/#use-with-ide
|
127 |
+
.pdm.toml
|
128 |
+
|
129 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
130 |
+
__pypackages__/
|
131 |
+
|
132 |
+
# Celery stuff
|
133 |
+
celerybeat-schedule
|
134 |
+
celerybeat.pid
|
135 |
+
|
136 |
+
# SageMath parsed files
|
137 |
+
*.sage.py
|
138 |
+
|
139 |
+
# Environments
|
140 |
+
.env
|
141 |
+
.venv
|
142 |
+
env/
|
143 |
+
venv/
|
144 |
+
ENV/
|
145 |
+
env.bak/
|
146 |
+
venv.bak/
|
147 |
+
|
148 |
+
# Spyder project settings
|
149 |
+
.spyderproject
|
150 |
+
.spyproject
|
151 |
+
|
152 |
+
# Rope project settings
|
153 |
+
.ropeproject
|
154 |
+
|
155 |
+
# mkdocs documentation
|
156 |
+
/site
|
157 |
+
|
158 |
+
# mypy
|
159 |
+
.mypy_cache/
|
160 |
+
.dmypy.json
|
161 |
+
dmypy.json
|
162 |
+
|
163 |
+
# Pyre type checker
|
164 |
+
.pyre/
|
165 |
+
|
166 |
+
# pytype static type analyzer
|
167 |
+
.pytype/
|
168 |
+
|
169 |
+
# Cython debug symbols
|
170 |
+
cython_debug/
|
.vscode/settings.json
CHANGED
@@ -17,7 +17,6 @@
|
|
17 |
"files.autoSave": "afterDelay",
|
18 |
"screencastMode.onlyKeyboardShortcuts": true,
|
19 |
"terminal.integrated.fontSize": 18,
|
20 |
-
"workbench.activityBar.visible": true,
|
21 |
"workbench.colorTheme": "Visual Studio Dark",
|
22 |
"workbench.fontAliasing": "antialiased",
|
23 |
"workbench.statusBar.visible": true
|
|
|
17 |
"files.autoSave": "afterDelay",
|
18 |
"screencastMode.onlyKeyboardShortcuts": true,
|
19 |
"terminal.integrated.fontSize": 18,
|
|
|
20 |
"workbench.colorTheme": "Visual Studio Dark",
|
21 |
"workbench.fontAliasing": "antialiased",
|
22 |
"workbench.statusBar.visible": true
|
app/app.py
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Chroma compatibility issue resolution
|
2 |
+
# https://docs.trychroma.com/troubleshooting#sqlite
|
3 |
+
__import__('pysqlite3')
|
4 |
+
import sys
|
5 |
+
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
|
6 |
+
|
7 |
+
from tempfile import NamedTemporaryFile
|
8 |
+
|
9 |
+
import chainlit as cl
|
10 |
+
from chainlit.types import AskFileResponse
|
11 |
+
|
12 |
+
import chromadb
|
13 |
+
from chromadb.config import Settings
|
14 |
+
from langchain.chains import ConversationalRetrievalChain, RetrievalQAWithSourcesChain
|
15 |
+
from langchain.chains.base import Chain
|
16 |
+
from langchain.chat_models import ChatOpenAI
|
17 |
+
from langchain.document_loaders import PDFPlumberLoader
|
18 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
19 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
20 |
+
from langchain.vectorstores import Chroma
|
21 |
+
from langchain.vectorstores.base import VectorStore
|
22 |
+
|
23 |
+
from prompt import EXAMPLE_PROMPT, PROMPT, WELCOME_MESSAGE
|
24 |
+
|
25 |
+
|
26 |
+
namespaces = set()
|
27 |
+
|
28 |
+
|
29 |
+
def process_file(*, file: AskFileResponse) -> list:
|
30 |
+
if file.type != "application/pdf":
|
31 |
+
raise TypeError("Only PDF files are supported")
|
32 |
+
|
33 |
+
|
34 |
+
with NamedTemporaryFile() as tempfile:
|
35 |
+
tempfile.write(file.content)
|
36 |
+
|
37 |
+
######################################################################
|
38 |
+
#
|
39 |
+
# 1. Load the PDF
|
40 |
+
#
|
41 |
+
######################################################################
|
42 |
+
loader = PDFPlumberLoader(tempfile.name)
|
43 |
+
|
44 |
+
######################################################################
|
45 |
+
documents = loader.load()
|
46 |
+
|
47 |
+
######################################################################
|
48 |
+
#
|
49 |
+
# 2. Split the text
|
50 |
+
#
|
51 |
+
######################################################################
|
52 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
53 |
+
chunk_size=3000,
|
54 |
+
chunk_overlap=100
|
55 |
+
)
|
56 |
+
######################################################################
|
57 |
+
|
58 |
+
docs = text_splitter.split_documents(documents)
|
59 |
+
|
60 |
+
for i, doc in enumerate(docs):
|
61 |
+
doc.metadata["source"] = f"source_{i}"
|
62 |
+
|
63 |
+
if not docs:
|
64 |
+
raise ValueError("PDF file parsing failed.")
|
65 |
+
|
66 |
+
return docs
|
67 |
+
|
68 |
+
|
69 |
+
def create_search_engine(*, file: AskFileResponse) -> VectorStore:
|
70 |
+
|
71 |
+
# Process and save data in the user session
|
72 |
+
docs = process_file(file=file)
|
73 |
+
cl.user_session.set("docs", docs)
|
74 |
+
|
75 |
+
##########################################################################
|
76 |
+
#
|
77 |
+
# 3. Set the Encoder model for creating embeddings
|
78 |
+
#
|
79 |
+
##########################################################################
|
80 |
+
encoder = OpenAIEmbeddings(
|
81 |
+
model="text-embedding-ada-002"
|
82 |
+
)
|
83 |
+
##########################################################################
|
84 |
+
|
85 |
+
# Initialize Chromadb client and settings, reset to ensure we get a clean
|
86 |
+
# search engine
|
87 |
+
client = chromadb.EphemeralClient()
|
88 |
+
client_settings=Settings(
|
89 |
+
allow_reset=True,
|
90 |
+
anonymized_telemetry=False
|
91 |
+
)
|
92 |
+
search_engine = Chroma(
|
93 |
+
client=client,
|
94 |
+
client_settings=client_settings
|
95 |
+
)
|
96 |
+
search_engine._client.reset()
|
97 |
+
|
98 |
+
##########################################################################
|
99 |
+
#
|
100 |
+
# 4. Create the document search engine. Remember to add
|
101 |
+
# client_settings using the above settings.
|
102 |
+
#
|
103 |
+
##########################################################################
|
104 |
+
|
105 |
+
search_engine = Chroma.from_documents(
|
106 |
+
client=client,
|
107 |
+
documents=docs,
|
108 |
+
embedding=encoder,
|
109 |
+
client_settings=client_settings
|
110 |
+
)
|
111 |
+
##########################################################################
|
112 |
+
|
113 |
+
return search_engine
|
114 |
+
|
115 |
+
|
116 |
+
@cl.on_chat_start
|
117 |
+
async def start():
|
118 |
+
|
119 |
+
files = None
|
120 |
+
while files is None:
|
121 |
+
files = await cl.AskFileMessage(
|
122 |
+
content=WELCOME_MESSAGE,
|
123 |
+
accept=["application/pdf"],
|
124 |
+
max_size_mb=20,
|
125 |
+
).send()
|
126 |
+
|
127 |
+
file = files[0]
|
128 |
+
msg = cl.Message(content=f"Processing `{file.name}`...")
|
129 |
+
await msg.send()
|
130 |
+
|
131 |
+
try:
|
132 |
+
search_engine = await cl.make_async(create_search_engine)(file=file)
|
133 |
+
except Exception as e:
|
134 |
+
await cl.Message(content=f"Error: {e}").send()
|
135 |
+
raise SystemError
|
136 |
+
|
137 |
+
llm = ChatOpenAI(
|
138 |
+
model='gpt-3.5-turbo-16k-0613',
|
139 |
+
temperature=0,
|
140 |
+
streaming=True
|
141 |
+
)
|
142 |
+
|
143 |
+
##########################################################################
|
144 |
+
#
|
145 |
+
# 5. Create the chain / tool for RetrievalQAWithSourcesChain.
|
146 |
+
#
|
147 |
+
##########################################################################
|
148 |
+
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
149 |
+
llm=llm,
|
150 |
+
chain_type="stuff",
|
151 |
+
retriever=search_engine.as_retriever(max_tokens_limit=4097),
|
152 |
+
######################################################################
|
153 |
+
# 6. Customize prompts to improve summarization and question
|
154 |
+
# answering performance. Perhaps create your own prompt in prompts.py?
|
155 |
+
######################################################################
|
156 |
+
chain_type_kwargs={
|
157 |
+
"prompt": PROMPT,
|
158 |
+
"document_prompt": EXAMPLE_PROMPT
|
159 |
+
},
|
160 |
+
)
|
161 |
+
##########################################################################
|
162 |
+
|
163 |
+
# await msg.update(content=f"`{file.name}` processed. You can now ask questions!")
|
164 |
+
msg.content = f"`{file.name}` processed. You can now ask questions!"
|
165 |
+
await msg.update()
|
166 |
+
|
167 |
+
cl.user_session.set("chain", chain)
|
168 |
+
|
169 |
+
|
170 |
+
@cl.on_message
|
171 |
+
async def main(message: cl.Message):
|
172 |
+
|
173 |
+
chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
|
174 |
+
cb = cl.AsyncLangchainCallbackHandler()
|
175 |
+
response = await chain.acall(message.content, callbacks=[cb])
|
176 |
+
answer = response["answer"]
|
177 |
+
sources = response["sources"].strip()
|
178 |
+
source_elements = []
|
179 |
+
|
180 |
+
# Get the documents from the user session
|
181 |
+
docs = cl.user_session.get("docs")
|
182 |
+
metadatas = [doc.metadata for doc in docs]
|
183 |
+
all_sources = [m["source"] for m in metadatas]
|
184 |
+
|
185 |
+
# Adding sources to the answer
|
186 |
+
if sources:
|
187 |
+
found_sources = []
|
188 |
+
|
189 |
+
# Add the sources to the message
|
190 |
+
for source in sources.split(","):
|
191 |
+
source_name = source.strip().replace(".", "")
|
192 |
+
# Get the index of the source
|
193 |
+
try:
|
194 |
+
index = all_sources.index(source_name)
|
195 |
+
except ValueError:
|
196 |
+
continue
|
197 |
+
text = docs[index].page_content
|
198 |
+
found_sources.append(source_name)
|
199 |
+
# Create the text element referenced in the message
|
200 |
+
source_elements.append(cl.Text(content=text, name=source_name))
|
201 |
+
|
202 |
+
if found_sources:
|
203 |
+
answer += f"\nSources: {', '.join(found_sources)}"
|
204 |
+
else:
|
205 |
+
answer += "\nNo sources found"
|
206 |
+
|
207 |
+
await cl.Message(content=answer, elements=source_elements).send()
|
app/prompt.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# flake8: noqa
|
2 |
+
from langchain.prompts import PromptTemplate
|
3 |
+
|
4 |
+
WELCOME_MESSAGE = """\
|
5 |
+
Welcome to Introduction to LLM App Development Sample PDF QA Application!
|
6 |
+
To get started:
|
7 |
+
1. Upload a PDF or text file
|
8 |
+
2. Ask any question about the file!
|
9 |
+
"""
|
10 |
+
|
11 |
+
template = """Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
|
12 |
+
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
|
13 |
+
ALWAYS return a "SOURCES" field in your answer, with the format "SOURCES: <source1>, <source2>, <source3>, ...".
|
14 |
+
|
15 |
+
QUESTION: {question}
|
16 |
+
=========
|
17 |
+
{summaries}
|
18 |
+
=========
|
19 |
+
FINAL ANSWER:"""
|
20 |
+
|
21 |
+
PROMPT = PromptTemplate(template=template, input_variables=["summaries", "question"])
|
22 |
+
|
23 |
+
EXAMPLE_PROMPT = PromptTemplate(
|
24 |
+
template="Content: {page_content}\nSource: {source}",
|
25 |
+
input_variables=["page_content", "source"],
|
26 |
+
)
|
chainlit.md
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Welcome to your PDF QA Sample Application! ππ€
|
2 |
+
|
3 |
+
Hi Team! π Congratulations on launching your first LLM Application. This application is build using OpenAI, Langchain, Chainlit, and Chroma. The goal of this application is to provite a quick overview of the most basic archetype of LLM application and the prototyping and debugging environment.
|
4 |
+
|
5 |
+
## Useful Links π
|
6 |
+
|
7 |
+
- **Langchain Documentation:** Get started with [Langchain Documentation](https://python.langchain.com/) π
|
8 |
+
- **Chainlit Documentation:** Get started with [Chainlit Documentation](https://docs.chainlit.io) π
|
requirements.txt
CHANGED
@@ -1 +1,9 @@
|
|
1 |
# Specify Python package requirements for your project here (e.g., Mako==1.1.1). If your project doesn't require these, you can leave this file unchanged or delete it.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Specify Python package requirements for your project here (e.g., Mako==1.1.1). If your project doesn't require these, you can leave this file unchanged or delete it.
|
2 |
+
openai==1.2.3
|
3 |
+
langchain==0.0.334
|
4 |
+
chainlit==0.7.501
|
5 |
+
tiktoken==0.5.1
|
6 |
+
pdfplumber==0.10.3
|
7 |
+
chromadb==0.4.17
|
8 |
+
pysqlite3-binary==0.5.2.post1
|
9 |
+
ruff==0.1.5
|
sample_pdf/NVDA 2QFY24.pdf
ADDED
Binary file (85.3 kB). View file
|
|