Spaces:

isayahc
/

Insurance-Assistant

Sleeping

App Files Files Community

isayahc commited on Dec 10, 2023

Commit

1a34160

verified ·

1 Parent(s): 5d7a6c0

changed embedding model

Browse files

Files changed (2) hide show

app.py +90 -23
requirements.txt +257 -0

app.py CHANGED Viewed

@@ -14,10 +14,22 @@ from langchain.vectorstores import Chroma
 from langchain.chains import RetrievalQA
 from langchain.document_loaders import PyPDFLoader
 from langchain.embeddings import HuggingFaceHubEmbeddings, OpenAIEmbeddings
 text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
@@ -27,6 +39,12 @@ flan_ul2 = OpenAI()
 global qa
 # embeddings = HuggingFaceHubEmbeddings()
@@ -34,48 +52,97 @@ global qa
 def loading_pdf():
     return "Loading..."
 def pdf_changes(pdf_doc):
-    embeddings = OpenAIEmbeddings()
     loader = PyPDFLoader(pdf_doc.name)
     documents = loader.load()
     texts = text_splitter.split_documents(documents)
     db = Chroma.from_documents(texts, embeddings)
     retriever = db.as_retriever()
-    prompt_template = """You have been given a pdf or pdfs. You must search these pdfs.
-    If you don't know the answer, just say that you don't know, don't try to make up an answer.
-    Only answer the question.
-    {context}
-    Question: {query}
-    Answer:"""
-    PROMPT = PromptTemplate(
-        template=prompt_template, input_variables=["context", "question"]
-    )
-    chain_type_kwargs = {"prompt": PROMPT}
     global qa
-    qa = RetrievalQA.from_chain_type(
-        llm=flan_ul2,
-        chain_type="stuff",
-        retriever=retriever,
-        return_source_documents=True,
-        chain_type_kwargs=chain_type_kwargs,
-    )
     return "Ready"
 def add_text(history, text):
     history = history + [(text, None)]
     return history, ""
 def bot(history):
-    response = infer(history[-1][0])
-    history[-1][1] = response['result']
-    return history
-def infer(question):
     query = question
-    result = qa({"query": query})
     return result

 from langchain.chains import RetrievalQA
 from langchain.document_loaders import PyPDFLoader
+from langchain.memory import VectorStoreRetrieverMemory
+from langchain.chains import RetrievalQAWithSourcesChain
+from langchain.memory import ConversationBufferMemory
+from langchain.embeddings import CohereEmbeddings
 from langchain.embeddings import HuggingFaceHubEmbeddings, OpenAIEmbeddings
+import dotenv
+import os
+dotenv.load_dotenv()
 text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
 global qa
 # embeddings = HuggingFaceHubEmbeddings()
+COHERE_API_KEY = os.getenv("COHERE_API_KEY")
+embeddings = CohereEmbeddings(
+    model="embed-english-light-v3.0",
+    cohere_api_key=COHERE_API_KEY
+)
 def loading_pdf():
     return "Loading..."
 def pdf_changes(pdf_doc):
+    # embeddings = OpenAIEmbeddings()
+    # embeddings = HuggingFaceHubEmbeddings()
+    embeddings = CohereEmbeddings(
+    model="embed-english-light-v3.0",
+    # cohere_api_key=COHERE_API_KEY
+)
     loader = PyPDFLoader(pdf_doc.name)
     documents = loader.load()
     texts = text_splitter.split_documents(documents)
     db = Chroma.from_documents(texts, embeddings)
     retriever = db.as_retriever()
+    # memory = VectorStoreRetrieverMemory(retriever=retriever)
+    memory = ConversationBufferMemory(memory_key="chat_history", input_key="human_input")
+    # prompt_template = """You have been given a pdf or pdfs. You must search these pdfs.
+    # If you don't know the answer, just say that you don't know, don't try to make up an answer.
+    # Only answer the question.
+    # Question: {query}
+    # Answer:"""
+    # PROMPT = PromptTemplate(
+    #     template=prompt_template, input_variables=["context", "question"]
+    # )
+    # template = """You are a chatbot having a conversation with a human.\n\nGiven the following extracted parts of a long document and a question, create a final answer.\n\n{context}\n\n{chat_history}\nHuman: {human_input}\nChatbot:"""
+    template = """
+You are the friendly documentation buddy Arti, who helps the Human in using RAY, the open-source unified framework for scaling AI and Python applications.\
+    Use the following context (delimited by <ctx></ctx>) and the chat history (delimited by <hs></hs>) to answer the question :
+------
+<ctx>
+{context}
+</ctx>
+------
+<hs>
+{history}
+</hs>
+------
+{question}
+Answer:
+"""
+    prompt = PromptTemplate(input_variables=["chat_history", "human_input", "context"], template=template)
+    chain_type_kwargs = {"prompt": prompt}
     global qa
+    # qa = RetrievalQA.from_chain_type(
+    #     llm=flan_ul2,
+    #     memory=memory,
+    #     chain_type="stuff",
+    #     retriever=retriever,
+    #     return_source_documents=True,
+    #     chain_type_kwargs=chain_type_kwargs,
+    # )
+    prompt = PromptTemplate(
+    input_variables=["history", "context", "question"],
+    template=template,
+)
+    memory = ConversationBufferMemory(memory_key="history", input_key="question")
+    qa = RetrievalQAWithSourcesChain.from_chain_type(llm=flan_ul2, retriever=retriever, return_source_documents=True, verbose=True, chain_type_kwargs={
+        "verbose": True,
+        "memory": memory,
+        "prompt": prompt,
+        "document_variable_name": "context"
+    }
+        )
     return "Ready"
 def add_text(history, text):
     history = history + [(text, None)]
     return history, ""
+# def bot(history):
+#     response = infer(history[-1][0])
+#     history[-1][1] = response['result']
+#     return history
 def bot(history):
+    response = infer(history[-1][0], history)
+    sources = [doc.metadata.get("source") for doc in response['source_documents']]
+    src_list = '\n'.join(sources)
+    print_this = response['answer'] + "\n\n\n Sources: \n\n\n" + src_list
+def infer(question, history):
     query = question
+    # result = qa({"query": query, "context":""})
+    # result = qa({"query": query, })
+    result = qa({"query": query, "history": history, "question": question})
     return result

requirements.txt CHANGED Viewed

@@ -500,3 +500,260 @@ xkit==0.0.0
 yarl==1.9.2
 yaspin==3.0.1
 zipp==1.0.0

 yarl==1.9.2
 yaspin==3.0.1
 zipp==1.0.0
+aiofiles==23.2.1
+aiohttp==3.9.1
+aiosignal==1.3.1
+aiostream==0.5.2
+altair==5.1.2
+annotated-types==0.5.0
+anyio==3.7.1
+appdirs==1.4.4
+argcomplete==1.8.1
+astor==0.8.1
+asttokens==2.4.1
+async-timeout==4.0.3
+asyncer==0.0.2
+attrs==23.1.0
+auth0-python==4.4.2
+Babel==2.8.0
+backoff==2.2.1
+beautiful-date==2.2.1
+beautifulsoup4==4.12.2
+bidict==0.22.1
+blessed==1.20.0
+blinker==1.4
+Brotli==1.0.9
+CacheControl==0.12.10
+cachetools==5.3.1
+cachy==0.3.0
+certifi==2023.7.22
+cffi==1.15.1
+chardet==4.0.0
+charset-normalizer==3.2.0
+cleo==0.8.1
+click==8.1.7
+clikit==0.6.2
+cohere==4.37
+colorama==0.4.4
+comm==0.2.0
+command-not-found==0.3
+contourpy==1.2.0
+crashtest==0.3.1
+cryptography==41.0.3
+cycler==0.12.1
+dataclasses-json==0.5.14
+dbus-python==1.2.18
+debugpy==1.8.0
+decorator==5.1.1
+Deprecated==1.2.14
+distlib==0.3.4
+distro==1.7.0
+distro-info==1.1+ubuntu0.1
+exceptiongroup==1.1.3
+executing==2.0.1
+fastapi==0.104.1
+fastapi-socketio==0.0.10
+fastavro==1.9.1
+ffmpy==0.3.1
+filelock==3.6.0
+filetype==1.2.0
+fonttools==4.44.3
+frozenlist==1.4.0
+fsspec==2023.10.0
+gcsa==2.1.0
+gdown==4.7.1
+git-python==1.0.3
+gitdb==4.0.11
+GitPython==3.1.40
+google-api-core==2.11.1
+google-api-python-client==2.99.0
+google-auth==2.23.0
+google-auth-httplib2==0.1.1
+google-auth-oauthlib==0.8.0
+googleapis-common-protos==1.60.0
+gradio==4.4.1
+gradio_client==0.7.0
+graphviz==0.14.2
+greenlet==2.0.2
+grpcio==1.58.0
+gyp==0.1
+h11==0.14.0
+html2image==2.0.4.3
+html5lib==1.1
+httpcore==0.18.0
+httplib2==0.20.2
+httpx==0.25.0
+huggingface-hub==0.19.4
+idna==3.4
+importlib-metadata==6.8.0
+importlib-resources==6.1.1
+inquirer==3.1.4
+ipykernel==6.26.0
+ipython==8.18.0
+jedi==0.19.1
+jeepney==0.7.1
+Jinja2==3.1.2
+joblib==1.3.2
+jsonschema==4.19.2
+jsonschema-specifications==2023.7.1
+jupyter_client==8.6.0
+jupyter_core==5.5.0
+keyring==21.8.0
+kiwisolver==1.4.5
+langchain==0.0.281
+langsmith==0.0.33
+launchpadlib==1.10.16
+Lazify==0.4.0
+lazr.restfulclient==0.14.4
+lazr.uri==1.0.6
+litellm==0.13.2
+livereload==2.6.3
+llama-index==0.9.13
+lockfile==0.12.2
+Markdown==3.3.6
+markdown-it-py==3.0.0
+MarkupSafe==2.0.1
+marshmallow==3.20.1
+matplotlib==3.8.2
+matplotlib-inline==0.1.6
+mdurl==0.1.2
+mkdocs==1.1.2
+more-itertools==8.10.0
+msgpack==1.0.3
+multidict==6.0.4
+mutagen==1.45.1
+mypy-extensions==1.0.0
+nest-asyncio==1.5.8
+netifaces==0.11.0
+nltk==3.8.1
+nodeenv==1.8.0
+numexpr==2.8.5
+numpy==1.25.2
+oauthlib==3.2.0
+open-interpreter==0.1.15
+openai==1.3.8
+openapi-schema-pydantic==1.2.4
+opentelemetry-api==1.20.0
+opentelemetry-exporter-otlp==1.20.0
+opentelemetry-exporter-otlp-proto-common==1.20.0
+opentelemetry-exporter-otlp-proto-grpc==1.20.0
+opentelemetry-exporter-otlp-proto-http==1.20.0
+opentelemetry-instrumentation==0.40b0
+opentelemetry-proto==1.20.0
+opentelemetry-sdk==1.20.0
+opentelemetry-semantic-conventions==0.41b0
+orjson==3.9.10
+packaging==20.9
+pandas==2.1.3
+parso==0.8.3
+pastel==0.2.1
+pexpect==4.8.0
+Pillow==10.1.0
+pipdeptree==2.2.0
+pkginfo==1.8.2
+platformdirs==2.5.1
+poetry==1.1.12
+poetry-core==1.0.7
+prisma==0.10.0
+prompt-toolkit==3.0.41
+protobuf==4.24.3
+psutil==5.9.6
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==14.0.1
+pyasn1==0.5.0
+pyasn1-modules==0.3.0
+pycparser==2.21
+pycryptodomex==3.11.0
+pydantic==2.5.2
+pydantic_core==2.14.5
+pydeck==0.8.1b0
+pydub==0.25.1
+PyGithub==2.1.1
+Pygments==2.16.1
+PyGObject==3.42.1
+pyinotify==0.9.6
+PyJWT==2.8.0
+pylev==1.2.0
+PyNaCl==1.5.0
+pyOpenSSL==23.2.0
+pyparsing==2.4.7
+PySocks==1.7.1
+python-apt==2.4.0+ubuntu2
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+python-editor==1.0.4
+python-engineio==4.7.0
+python-graphql-client==0.4.3
+python-multipart==0.0.6
+python-socketio==5.9.0
+pytz==2022.1
+pytz-deprecation-shim==0.1.0.post0
+pyxattr==0.7.2
+PyYAML==6.0.1
+pyzmq==25.1.1
+readchar==4.0.5
+referencing==0.30.2
+regex==2023.10.3
+requests==2.31.0
+requests-oauthlib==1.3.1
+requests-toolbelt==0.9.1
+rich==13.6.0
+rpds-py==0.12.0
+rsa==4.9
+screen-resolution-extra==0.0.0
+SecretStorage==3.3.1
+semantic-version==2.10.0
+shellingham==1.4.0
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.0
+soupsieve==2.5
+speedtest-cli==2.1.3
+SQLAlchemy==2.0.20
+stack-data==0.6.3
+starlette==0.27.0
+streamlit==1.28.1
+syncer==2.0.3
+systemd-python==234
+tenacity==8.2.3
+termcolor==2.3.0
+tiktoken==0.4.0
+tokenizers==0.15.0
+tokentrim==0.1.13
+toml==0.10.2
+tomli==2.0.1
+tomlkit==0.12.0
+toolz==0.12.0
+tornado==6.3.3
+tqdm==4.66.1
+traitlets==5.13.0
+typer==0.9.0
+typing-inspect==0.9.0
+typing_extensions==4.8.0
+tzdata==2023.3
+tzlocal==4.3.1
+ubuntu-advantage-tools==8001
+ubuntu-drivers-common==0.0.0
+ufw==0.36.1
+unattended-upgrades==0.1
+uptrace==1.20.0
+uritemplate==4.1.1
+urllib3==2.0.4
+userpath==1.8.0
+uvicorn==0.23.2
+validators==0.22.0
+virtualenv==20.13.0+ds
+wadllib==1.3.6
+watchdog==3.0.0
+watchfiles==0.20.0
+wcwidth==0.2.12
+webencodings==0.5.1
+websocket-client==1.6.4
+websockets==11.0.3
+wget==3.2
+wrapt==1.15.0
+xkit==0.0.0
+yarl==1.9.2
+yaspin==3.0.1
+zipp==1.0.0