Spaces:

towardsai-tutors
/

buster

Running

App Files Files Community

jerpint commited on Jul 16, 2023

Commit

e9698e9

0 Parent(s):

First commit

Browse files

Files changed (5) hide show

Procfile +1 -0
cfg.py +133 -0
gradio_app.py +115 -0
requirements.txt +2 -0
setup.sh +2 -0

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: source setup.sh && python gradio_app.py

cfg.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import os
+import logging
+from huggingface_hub import hf_hub_download
+from buster.busterbot import Buster, BusterConfig
+from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
+from buster.formatters.documents import DocumentsFormatter
+from buster.formatters.prompts import PromptFormatter
+from buster.retriever import Retriever, SQLiteRetriever
+from buster.tokenizers import GPTTokenizer
+from buster.validators import QuestionAnswerValidator, Validator
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+HUB_TOKEN = os.getenv("HUB_TOKEN")
+REPO_ID = "jerpint/towardsai-buster-data"
+HUB_DB_FILE = "documents.db"
+logger.info(f"Downloading {HUB_DB_FILE} from hub...")
+hf_hub_download(
+    repo_id=REPO_ID,
+    repo_type="dataset",
+    filename=HUB_DB_FILE,
+    token=HUB_TOKEN,
+    local_dir=".",
+)
+buster_cfg = BusterConfig(
+    validator_cfg={
+        "unknown_response_templates": [
+            "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
+        ],
+        "unknown_threshold": 0.85,
+        "embedding_model": "text-embedding-ada-002",
+        "use_reranking": True,
+        "invalid_question_response": "This question does not seem relevant to my current knowledge.",
+        "check_question_prompt": """You are an chatbot answering questions on towardsAI, an artificial intelligence blogs.
+Users will be asking questions about the blog.
+Your job is to determine wether or not a question is a valid question to ask, and should be answered.
+More general questions are not considered valid, even if you might know the response.
+A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
+For example:
+Q: How can I setup my own chatbot?
+true
+Q: What is the meaning of life?
+false
+A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.""",
+        "completion_kwargs": {
+            "model": "gpt-3.5-turbo",
+            "stream": False,
+            "temperature": 0,
+        },
+    },
+    retriever_cfg={
+        "db_path": "./documents.db",
+        "top_k": 3,
+        "thresh": 0.7,
+        "max_tokens": 2000,
+        "embedding_model": "text-embedding-ada-002",
+    },
+    documents_answerer_cfg={
+        "no_documents_message": "No blog posts are available for this question.",
+    },
+    completion_cfg={
+        "completion_kwargs": {
+            "model": "gpt-3.5-turbo",
+            "stream": True,
+            "temperature": 0,
+        },
+    },
+    tokenizer_cfg={
+        "model_name": "gpt-3.5-turbo",
+    },
+    documents_formatter_cfg={
+        "max_tokens": 3500,
+        "formatter": "{content}",
+    },
+    prompt_formatter_cfg={
+        "max_tokens": 3500,
+        "text_before_docs": (
+            "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
+            "If the answer is in the documentation, summarize it in a helpful way to the user. "
+            "If it isn't, simply reply that you cannot answer the question. "
+            "Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
+            "Here is the documentation: "
+            "<DOCUMENTS> "
+        ),
+        "text_after_docs": (
+            "<\DOCUMENTS>\n"
+            "REMEMBER:\n"
+            "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
+            "Here are the rules you must follow:\n"
+            "1) You must only respond with information contained in the documentation above. Say you do not know if the information is not provided.\n"
+            "2) Make sure to format your answers in Markdown format, including code block and snippets.\n"
+            "3) Do not reference any links, urls or hyperlinks in your answers.\n"
+            "4) Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
+            "5) If you do not know the answer to a question, or if it is completely irrelevant to the library usage, simply reply with:\n"
+            "'I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?'"
+            "For example:\n"
+            "What is the meaning of life for a qa bot?\n"
+            "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?"
+            "Now answer the following question:\n"
+        ),
+    },
+)
+# initialize buster with the config in cfg.py (adapt to your needs) ...
+# buster_cfg = cfg.buster_cfg
+retriever: Retriever = SQLiteRetriever(**buster_cfg.retriever_cfg)
+tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
+document_answerer: DocumentAnswerer = DocumentAnswerer(
+    completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
+    documents_formatter=DocumentsFormatter(
+        tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
+    ),
+    prompt_formatter=PromptFormatter(
+        tokenizer=tokenizer, **buster_cfg.prompt_formatter_cfg
+    ),
+    **buster_cfg.documents_answerer_cfg,
+)
+validator: Validator = QuestionAnswerValidator(**buster_cfg.validator_cfg)
+buster: Buster = Buster(
+    retriever=retriever, document_answerer=document_answerer, validator=validator
+)

gradio_app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os
+import cfg
+import gradio as gr
+import pandas as pd
+from cfg import buster
+import logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+USERNAME = os.getenv("BUSTER_USERNAME")
+PASSWORD = os.getenv("BUSTER_PASSWORD")
+def check_auth(username: str, password: str) -> bool:
+    valid_user = username == USERNAME
+    valid_password = password == PASSWORD
+    is_auth = valid_user and valid_password
+    logger.info(f"Log-in attempted by {username=}. {is_auth=}")
+    return is_auth
+def format_sources(matched_documents: pd.DataFrame) -> str:
+    if len(matched_documents) == 0:
+        return ""
+    documents_answer_template: str = "📝 Here are the sources I used to answer your question:\n\n{documents}\n\n{footnote}"
+    document_template: str = "[🔗 {document.title}]({document.url}), relevance: {document.similarity_to_answer:2.1f} %"
+    matched_documents.similarity_to_answer = (
+        matched_documents.similarity_to_answer * 100
+    )
+    documents = "\n".join(
+        [
+            document_template.format(document=document)
+            for _, document in matched_documents.iterrows()
+        ]
+    )
+    footnote: str = "I'm a bot 🤖 and not always perfect."
+    return documents_answer_template.format(documents=documents, footnote=footnote)
+def add_sources(history, completion):
+    if completion.answer_relevant:
+        formatted_sources = format_sources(completion.matched_documents)
+        history.append([None, formatted_sources])
+    return history
+def user(user_input, history):
+    """Adds user's question immediately to the chat."""
+    return "", history + [[user_input, None]]
+def chat(history):
+    user_input = history[-1][0]
+    completion = buster.process_input(user_input)
+    history[-1][1] = ""
+    for token in completion.answer_generator:
+        history[-1][1] += token
+        yield history, completion
+block = gr.Blocks(css="#chatbot .overflow-y-auto{height:500px}")
+with block:
+    with gr.Row():
+        gr.Markdown(
+            "<h3><center>Buster 🤖: A Question-Answering Bot for your documentation</center></h3>"
+        )
+    chatbot = gr.Chatbot()
+    with gr.Row():
+        question = gr.Textbox(
+            label="What's your question?",
+            placeholder="Ask a question to AI stackoverflow here...",
+            lines=1,
+        )
+        submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
+    examples = gr.Examples(
+        examples=[
+            "How can I perform backpropagation?",
+            "How do I deal with noisy data?",
+            "How do I deal with noisy data in 2 words?",
+        ],
+        inputs=question,
+    )
+    gr.Markdown(
+        "This application uses GPT to search the docs for relevant info and answer questions."
+    )
+    response = gr.State()
+    submit.click(user, [question, chatbot], [question, chatbot], queue=False).then(
+        chat, inputs=[chatbot], outputs=[chatbot, response]
+    ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
+    question.submit(user, [question, chatbot], [question, chatbot], queue=False).then(
+        chat, inputs=[chatbot], outputs=[chatbot, response]
+    ).then(add_sources, inputs=[chatbot, response], outputs=[chatbot])
+block.queue(concurrency_count=16)
+block.launch(debug=True, share=False, auth=check_auth)

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ git+https://github.com/jerpint/[email protected]
2	+ gradio

setup.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ export GRADIO_SERVER_NAME=0.0.0.0
2	+ export GRADIO_SERVER_PORT=$PORT