jerpint commited on
Commit
c8a1687
·
unverified ·
1 Parent(s): 1b88635

Support for ChatGPT (#65)

Browse files

* Rename Chatbot class to Buster

* move gradio app to .py file

* add new completers class

* add OOD test

* update apps

* update OOD prompt

* Update prompt engineering

buster/apps/gradio_app.ipynb DELETED
@@ -1,138 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "id": "4a6b2b70",
7
- "metadata": {
8
- "scrolled": true
9
- },
10
- "outputs": [],
11
- "source": [
12
- "%load_ext autoreload\n",
13
- "%autoreload 2\n",
14
- "\n",
15
- "import gradio as gr\n",
16
- "\n",
17
- "from buster.chatbot import Chatbot, ChatbotConfig\n",
18
- "\n",
19
- "hf_transformers_cfg = ChatbotConfig(\n",
20
- " documents_file=\"../data/document_embeddings_huggingface.tar.gz\",\n",
21
- " unknown_prompt=\"This doesn't seem to be related to the huggingface library. I am not sure how to answer.\",\n",
22
- " embedding_model=\"text-embedding-ada-002\",\n",
23
- " top_k=3,\n",
24
- " thresh=0.7,\n",
25
- " max_words=3000,\n",
26
- " completion_kwargs={\n",
27
- " \"engine\": \"text-davinci-003\",\n",
28
- " \"max_tokens\": 500,\n",
29
- " },\n",
30
- " link_format=\"gradio\",\n",
31
- " response_footnote=\"I'm a bot 🤖 trained to answer huggingface 🤗 transformers questions. My answers aren't always perfect.\",\n",
32
- " text_before_prompt=\"\"\"You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n",
33
- "Make sure to format your answers in Markdown format, including code block and snippets.\n",
34
- "Do not include any links to urls or hyperlinks in your answers.\n",
35
- "\n",
36
- "If you do not know the answer to a question, or if it is completely irrelevant to the library usage, simply reply with:\n",
37
- "\n",
38
- "'This doesn't seem to be related to the huggingface library.'\n",
39
- "\n",
40
- "For example:\n",
41
- "\n",
42
- "What is the meaning of life for huggingface?\n",
43
- "\n",
44
- "This doesn't seem to be related to the huggingface library.\n",
45
- "\n",
46
- "Now answer the following question:\n",
47
- "\"\"\",\n",
48
- ")\n",
49
- "hf_transformers_chatbot = Chatbot(hf_transformers_cfg)\n",
50
- "\n",
51
- "def chat(question, history):\n",
52
- " history = history or []\n",
53
- " \n",
54
- " answer = hf_transformers_chatbot.process_input(question)\n",
55
- " \n",
56
- " # formatting hack for code blocks to render properly every time\n",
57
- " answer = answer.replace(\"```\", \"\\n```\\n\")\n",
58
- "\n",
59
- " history.append((question, answer))\n",
60
- " return history, history\n",
61
- "\n",
62
- "\n",
63
- "\n",
64
- "block = gr.Blocks(css=\".gradio-container {background-color: lightgray}\")\n",
65
- "\n",
66
- "with block:\n",
67
- " with gr.Row():\n",
68
- " gr.Markdown(\"<h3><center>Buster 🤖: A Question-Answering Bot for Huggingface 🤗 Transformers </center></h3>\")\n",
69
- "\n",
70
- "\n",
71
- " chatbot = gr.Chatbot()\n",
72
- "\n",
73
- " with gr.Row():\n",
74
- " message = gr.Textbox(\n",
75
- " label=\"What's your question?\",\n",
76
- " placeholder=\"What kind of model should I use for sentiment analysis?\",\n",
77
- " lines=1,\n",
78
- " )\n",
79
- " submit = gr.Button(value=\"Send\", variant=\"secondary\").style(full_width=False)\n",
80
- "\n",
81
- " gr.Examples(\n",
82
- " examples=[\n",
83
- " \"What kind of models should I use for images and text?\",\n",
84
- " \"When should I finetune a model vs. training it form scratch?\",\n",
85
- " \"How can I deploy my trained huggingface model?\",\n",
86
- " \"Can you give me some python code to quickly finetune a model on my sentiment analysis dataset?\",\n",
87
- " ],\n",
88
- " inputs=message,\n",
89
- " )\n",
90
- "\n",
91
- " gr.Markdown(\n",
92
- " \"\"\"This simple application uses GPT to search the huggingface 🤗 transformers docs and answer questions.\n",
93
- " For more info on huggingface transformers view the [full documentation.](https://huggingface.co/docs/transformers/index).\"\"\" \n",
94
- " )\n",
95
- "\n",
96
- "\n",
97
- " gr.HTML(\n",
98
- " \"️<center> Created with ❤️ by @jerpint and @hadrienbertrand\"\n",
99
- " )\n",
100
- "\n",
101
- " state = gr.State()\n",
102
- " agent_state = gr.State()\n",
103
- "\n",
104
- " submit.click(chat, inputs=[message, state], outputs=[chatbot, state])\n",
105
- " message.submit(chat, inputs=[message, state], outputs=[chatbot, state])\n",
106
- "\n",
107
- "\n",
108
- "block.launch(debug=True)"
109
- ]
110
- }
111
- ],
112
- "metadata": {
113
- "kernelspec": {
114
- "display_name": "buster",
115
- "language": "python",
116
- "name": "python3"
117
- },
118
- "language_info": {
119
- "codemirror_mode": {
120
- "name": "ipython",
121
- "version": 3
122
- },
123
- "file_extension": ".py",
124
- "mimetype": "text/x-python",
125
- "name": "python",
126
- "nbconvert_exporter": "python",
127
- "pygments_lexer": "ipython3",
128
- "version": "3.10.9"
129
- },
130
- "vscode": {
131
- "interpreter": {
132
- "hash": "bfa91706490f6a3314a87f4853806d905e46027cd889e58fcad4739e8600f624"
133
- }
134
- }
135
- },
136
- "nbformat": 4,
137
- "nbformat_minor": 5
138
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
buster/apps/gradio_app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from buster.buster import Buster, BusterConfig
4
+
5
+ buster_cfg = BusterConfig(
6
+ documents_file="../data/document_embeddings_huggingface.tar.gz",
7
+ unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
8
+ embedding_model="text-embedding-ada-002",
9
+ top_k=3,
10
+ thresh=0.7,
11
+ max_words=3000,
12
+ completer_cfg={
13
+ "name": "ChatGPT",
14
+ "text_before_prompt": (
15
+ """You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python. """
16
+ """Make sure to format your answers in Markdown format, including code block and snippets. """
17
+ """Do not include any links to urls or hyperlinks in your answers. """
18
+ """If you do not know the answer to a question, or if it is completely irrelevant to the library usage, let the user know you cannot answer with this response:\n"""
19
+ """'I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?'"""
20
+ """For example:\n"""
21
+ """What is the meaning of life for huggingface?\n"""
22
+ """I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?"""
23
+ """Now answer the following question:\n"""
24
+ ),
25
+ "text_before_documents": "Only use these documents as reference:\n",
26
+ "completion_kwargs": {
27
+ "model": "gpt-3.5-turbo",
28
+ },
29
+ },
30
+ response_format="gradio",
31
+ )
32
+ buster = Buster(buster_cfg)
33
+
34
+
35
+ def chat(question, history):
36
+ history = history or []
37
+
38
+ answer = buster.process_input(question)
39
+
40
+ # formatting hack for code blocks to render properly every time
41
+ answer = answer.replace("```", "\n```\n")
42
+
43
+ history.append((question, answer))
44
+ return history, history
45
+
46
+
47
+ block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
48
+
49
+ with block:
50
+ with gr.Row():
51
+ gr.Markdown("<h3><center>Buster 🤖: A Question-Answering Bot for Huggingface 🤗 Transformers </center></h3>")
52
+
53
+ chatbot = gr.Chatbot()
54
+
55
+ with gr.Row():
56
+ message = gr.Textbox(
57
+ label="What's your question?",
58
+ placeholder="What kind of model should I use for sentiment analysis?",
59
+ lines=1,
60
+ )
61
+ submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
62
+
63
+ gr.Examples(
64
+ examples=[
65
+ "What kind of models should I use for images and text?",
66
+ "When should I finetune a model vs. training it form scratch?",
67
+ "How can I deploy my trained huggingface model?",
68
+ "Can you give me some python code to quickly finetune a model on my sentiment analysis dataset?",
69
+ ],
70
+ inputs=message,
71
+ )
72
+
73
+ gr.Markdown(
74
+ """This simple application uses GPT to search the huggingface 🤗 transformers docs and answer questions.
75
+ For more info on huggingface transformers view the [full documentation.](https://huggingface.co/docs/transformers/index)."""
76
+ )
77
+
78
+ gr.HTML("️<center> Created with ❤️ by @jerpint and @hadrienbertrand")
79
+
80
+ state = gr.State()
81
+ agent_state = gr.State()
82
+
83
+ submit.click(chat, inputs=[message, state], outputs=[chatbot, state])
84
+ message.submit(chat, inputs=[message, state], outputs=[chatbot, state])
85
+
86
+
87
+ block.launch(debug=True)
buster/apps/slackbot.py CHANGED
@@ -3,7 +3,7 @@ import os
3
 
4
  from slack_bolt import App
5
 
6
- from buster.chatbot import Chatbot, ChatbotConfig
7
 
8
  logger = logging.getLogger(__name__)
9
  logging.basicConfig(level=logging.INFO)
@@ -14,137 +14,122 @@ ORION_CHANNEL = "C04LYHGUYB0"
14
  PYTORCH_CHANNEL = "C04MEK6N882"
15
  HF_TRANSFORMERS_CHANNEL = "C04NJNCJWHE"
16
 
17
- mila_doc_cfg = ChatbotConfig(
18
  documents_file="../data/document_embeddings_mila.tar.gz",
19
  unknown_prompt="This doesn't seem to be related to cluster usage.",
20
  embedding_model="text-embedding-ada-002",
21
  top_k=3,
22
  thresh=0.7,
23
  max_words=3000,
24
- completion_kwargs={
25
- "engine": "text-davinci-003",
26
- "max_tokens": 200,
27
- },
28
- separator="\n",
29
  response_format="slack",
30
- response_footnote="""I'm a bot 🤖 and not always perfect.
31
- For more info, view the full documentation here (https://docs.mila.quebec/) or contact [email protected]
32
- """,
33
- text_before_prompt="""
34
- You are a slack chatbot assistant answering technical questions about a cluster.
35
- Make sure to format your answers in Markdown format, including code block and snippets.
36
- Do not include any links to urls or hyperlinks in your answers.
37
-
38
- If you do not know the answer to a question, or if it is completely irrelevant to cluster usage, simply reply with:
39
-
40
- 'This doesn't seem to be related to cluster usage.'
41
-
42
- For example:
43
-
44
- What is the meaning of life on the cluster?
45
-
46
- This doesn't seem to be related to cluster usage.
47
-
48
- Now answer the following question:
49
- """,
50
  )
51
- mila_doc_chatbot = Chatbot(mila_doc_cfg)
52
 
53
- orion_cfg = ChatbotConfig(
54
  documents_file="../data/document_embeddings_orion.tar.gz",
55
  unknown_prompt="This doesn't seem to be related to the orion library. I am not sure how to answer.",
56
  embedding_model="text-embedding-ada-002",
57
  top_k=3,
58
  thresh=0.7,
59
  max_words=3000,
60
- completion_kwargs={
61
- "engine": "text-davinci-003",
62
- "max_tokens": 200,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  },
64
- separator="\n",
65
  response_format="slack",
66
- text_before_prompt="""You are a slack chatbot assistant answering technical questions about orion, a hyperparameter optimization library written in python.
67
- Make sure to format your answers in Markdown format, including code block and snippets.
68
- Do not include any links to urls or hyperlinks in your answers.
69
-
70
- If you do not know the answer to a question, or if it is completely irrelevant to the library usage, simply reply with:
71
-
72
- 'This doesn't seem to be related to the orion library.'
73
-
74
- For example:
75
-
76
- What is the meaning of life for orion?
77
-
78
- This doesn't seem to be related to the orion library.
79
-
80
- Now answer the following question:
81
- """,
82
  )
83
- orion_chatbot = Chatbot(orion_cfg)
84
 
85
- pytorch_cfg = ChatbotConfig(
86
  documents_file="../data/document_embeddings_pytorch.tar.gz",
87
  unknown_prompt="This doesn't seem to be related to the pytorch library. I am not sure how to answer.",
88
  embedding_model="text-embedding-ada-002",
89
  top_k=3,
90
  thresh=0.7,
91
  max_words=3000,
92
- completion_kwargs={
93
- "engine": "text-davinci-003",
94
- "max_tokens": 500,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  },
96
- separator="\n",
97
  response_format="slack",
98
- text_before_prompt="""You are a slack chatbot assistant answering technical questions about pytorch, a library to train neural networks written in python.
99
- Make sure to format your answers in Markdown format, including code block and snippets.
100
- Do not include any links to urls or hyperlinks in your answers.
101
-
102
- If you do not know the answer to a question, or if it is completely irrelevant to the library usage, simply reply with:
103
-
104
- 'This doesn't seem to be related to the pytorch library.'
105
-
106
- For example:
107
-
108
- What is the meaning of life for pytorch?
109
-
110
- This doesn't seem to be related to the pytorch library.
111
-
112
- Now answer the following question:
113
- """,
114
  )
115
- pytorch_chatbot = Chatbot(pytorch_cfg)
116
 
117
- hf_transformers_cfg = ChatbotConfig(
118
  documents_file="../data/document_embeddings_huggingface.tar.gz",
119
- unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",
120
  embedding_model="text-embedding-ada-002",
121
  top_k=3,
122
  thresh=0.7,
123
  max_words=3000,
124
- completion_kwargs={
125
- "engine": "text-davinci-003",
126
- "max_tokens": 500,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  },
128
- separator="\n",
129
  response_format="slack",
130
- text_before_prompt="""You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.
131
- Make sure to format your answers in Markdown format, including code block and snippets.
132
- Do not include any links to urls or hyperlinks in your answers.
133
-
134
- If you do not know the answer to a question, or if it is completely irrelevant to the library usage, simply reply with:
135
-
136
- 'This doesn't seem to be related to the huggingface library.'
137
-
138
- For example:
139
-
140
- What is the meaning of life for huggingface?
141
-
142
- This doesn't seem to be related to the huggingface library.
143
-
144
- Now answer the following question:
145
- """,
146
  )
147
- hf_transformers_chatbot = Chatbot(hf_transformers_cfg)
148
 
149
  # TODO: eventually move this to a factory of sorts
150
  # Put all the bots in a dict by channel
 
3
 
4
  from slack_bolt import App
5
 
6
+ from buster.buster import Buster, BusterConfig
7
 
8
  logger = logging.getLogger(__name__)
9
  logging.basicConfig(level=logging.INFO)
 
14
  PYTORCH_CHANNEL = "C04MEK6N882"
15
  HF_TRANSFORMERS_CHANNEL = "C04NJNCJWHE"
16
 
17
+ mila_doc_cfg = BusterConfig(
18
  documents_file="../data/document_embeddings_mila.tar.gz",
19
  unknown_prompt="This doesn't seem to be related to cluster usage.",
20
  embedding_model="text-embedding-ada-002",
21
  top_k=3,
22
  thresh=0.7,
23
  max_words=3000,
 
 
 
 
 
24
  response_format="slack",
25
+ completer_cfg={
26
+ "name": "ChatGPT",
27
+ "text_before_prompt": (
28
+ """You are a slack chatbot assistant answering technical questions about the mila cluster. """
29
+ """Make sure to format your answers in Markdown format, including code block and snippets. """
30
+ """Do not include any links to urls or hyperlinks in your answers. """
31
+ """If you do not know the answer to a question, or if it is completely irrelevant to the library usage, simply reply with: """
32
+ """'This doesn't seem to be related to the pytorch library.'\n"""
33
+ """For example:\n"""
34
+ """What is the meaning of life for pytorch?\n"""
35
+ """This doesn't seem to be related to the pytorch library.\n"""
36
+ """Now answer the following question:\n"""
37
+ ),
38
+ "text_before_documents": "Only use these documents as reference:\n",
39
+ "completion_kwargs": {
40
+ "model": "gpt-3.5-turbo",
41
+ },
42
+ },
 
 
43
  )
44
+ mila_doc_chatbot = Buster(mila_doc_cfg)
45
 
46
+ orion_cfg = BusterConfig(
47
  documents_file="../data/document_embeddings_orion.tar.gz",
48
  unknown_prompt="This doesn't seem to be related to the orion library. I am not sure how to answer.",
49
  embedding_model="text-embedding-ada-002",
50
  top_k=3,
51
  thresh=0.7,
52
  max_words=3000,
53
+ completer_cfg={
54
+ "name": "ChatGPT",
55
+ "text_before_prompt": (
56
+ """You are a slack chatbot assistant answering technical questions about orion, a hyperparameter optimization library written in python. """
57
+ """Make sure to format your answers in Markdown format, including code block and snippets. """
58
+ """Do not include any links to urls or hyperlinks in your answers. """
59
+ """If you do not know the answer to a question, or if it is completely irrelevant to the library usage, simply reply with: """
60
+ """'This doesn't seem to be related to the pytorch library.'\n"""
61
+ """For example:\n"""
62
+ """What is the meaning of life for pytorch?\n"""
63
+ """This doesn't seem to be related to the pytorch library.\n"""
64
+ """Now answer the following question:\n"""
65
+ ),
66
+ "text_before_documents": "Only use these documents as reference:\n",
67
+ "completion_kwargs": {
68
+ "model": "gpt-3.5-turbo",
69
+ },
70
  },
 
71
  response_format="slack",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  )
73
+ orion_chatbot = Buster(orion_cfg)
74
 
75
+ pytorch_cfg = BusterConfig(
76
  documents_file="../data/document_embeddings_pytorch.tar.gz",
77
  unknown_prompt="This doesn't seem to be related to the pytorch library. I am not sure how to answer.",
78
  embedding_model="text-embedding-ada-002",
79
  top_k=3,
80
  thresh=0.7,
81
  max_words=3000,
82
+ completer_cfg={
83
+ "name": "ChatGPT",
84
+ "text_before_prompt": (
85
+ """You are a slack chatbot assistant answering technical questions about pytorch, a library to train neural networks written in python. """
86
+ """Make sure to format your answers in Markdown format, including code block and snippets. """
87
+ """Do not include any links to urls or hyperlinks in your answers. """
88
+ """If you do not know the answer to a question, or if it is completely irrelevant to the library usage, simply reply with: """
89
+ """'This doesn't seem to be related to the pytorch library.'\n"""
90
+ """For example:\n"""
91
+ """What is the meaning of life for pytorch?\n"""
92
+ """This doesn't seem to be related to the pytorch library.\n"""
93
+ """Now answer the following question:\n"""
94
+ ),
95
+ "text_before_documents": "Only use these documents as reference:\n",
96
+ "completion_kwargs": {
97
+ "model": "gpt-3.5-turbo",
98
+ },
99
  },
 
100
  response_format="slack",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  )
102
+ pytorch_chatbot = Buster(pytorch_cfg)
103
 
104
+ hf_transformers_cfg = BusterConfig(
105
  documents_file="../data/document_embeddings_huggingface.tar.gz",
106
+ unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
107
  embedding_model="text-embedding-ada-002",
108
  top_k=3,
109
  thresh=0.7,
110
  max_words=3000,
111
+ completer_cfg={
112
+ "name": "ChatGPT",
113
+ "text_before_prompt": (
114
+ """You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python. """
115
+ """Make sure to format your answers in Markdown format, including code block and snippets. """
116
+ """Do not include any links to urls or hyperlinks in your answers. """
117
+ """If you do not know the answer to a question, or if it is completely irrelevant to the library usage, let the user know you cannot answer. """
118
+ """For example:\n"""
119
+ """What is the meaning of life for huggingface?\n"""
120
+ """This doesn't seem to be related to the huggingface library.\n"""
121
+ """I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?"""
122
+ """"""
123
+ """Now answer the following question:\n"""
124
+ ),
125
+ "text_before_documents": "Only use these documents as reference:\n",
126
+ "completion_kwargs": {
127
+ "model": "gpt-3.5-turbo",
128
+ },
129
  },
 
130
  response_format="slack",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  )
132
+ hf_transformers_chatbot = Buster(hf_transformers_cfg)
133
 
134
  # TODO: eventually move this to a factory of sorts
135
  # Put all the bots in a dict by channel
buster/{chatbot.py → buster.py} RENAMED
@@ -1,14 +1,11 @@
1
  import logging
2
- import os
3
  from dataclasses import dataclass, field
4
- from typing import Iterable
5
 
6
  import numpy as np
7
- import openai
8
  import pandas as pd
9
- import promptlayer
10
  from openai.embeddings_utils import cosine_similarity, get_embedding
11
 
 
12
  from buster.documents import get_documents_manager_from_extension
13
  from buster.formatter import (
14
  Response,
@@ -20,19 +17,9 @@ from buster.formatter import (
20
  logger = logging.getLogger(__name__)
21
  logging.basicConfig(level=logging.INFO)
22
 
23
- # Check if an API key exists for promptlayer, if it does, use it
24
- promptlayer_api_key = os.environ.get("PROMPTLAYER_API_KEY")
25
- if promptlayer_api_key:
26
- logger.info("Enabling prompt layer...")
27
- promptlayer.api_key = promptlayer_api_key
28
-
29
- # replace openai with the promptlayer wrapper
30
- openai = promptlayer.openai
31
- openai.api_key = os.environ.get("OPENAI_API_KEY")
32
-
33
 
34
  @dataclass
35
- class ChatbotConfig:
36
  """Configuration object for a chatbot.
37
 
38
  documents_csv: Path to the csv file containing the documents and their embeddings.
@@ -54,29 +41,32 @@ class ChatbotConfig:
54
  thresh: float = 0.7
55
  max_words: int = 3000
56
  unknown_threshold: float = 0.9 # set to 0 to deactivate
57
-
58
- completion_kwargs: dict = field(
59
  default_factory=lambda: {
60
- "engine": "text-davinci-003",
61
- "max_tokens": 200,
62
- "temperature": None,
63
- "top_p": None,
64
- "frequency_penalty": 1,
65
- "presence_penalty": 1,
 
 
 
 
 
66
  }
67
  )
68
- separator: str = "\n"
69
  response_format: str = "slack"
70
  unknown_prompt: str = "I Don't know how to answer your question."
71
- text_before_documents: str = "You are a chatbot answering questions.\n"
72
- text_before_prompt: str = "Answer the following question:\n"
73
  response_footnote: str = "I'm a bot 🤖 and not always perfect."
74
 
75
 
76
- class Chatbot:
77
- def __init__(self, cfg: ChatbotConfig):
78
  # TODO: right now, the cfg is being passed as an omegaconf, is this what we want?
79
  self.cfg = cfg
 
80
  self._init_documents()
81
  self._init_unk_embedding()
82
  self._init_response_formatter()
@@ -141,67 +131,21 @@ class Chatbot:
141
 
142
  return documents_str
143
 
144
- def prepare_prompt(
145
  self,
146
- question: str,
147
  matched_documents: pd.DataFrame,
148
- text_before_prompt: str,
149
- text_before_documents: str,
150
- ) -> str:
151
- """
152
- Prepare the prompt with prompt engineering.
153
- """
154
- documents_str: str = self.prepare_documents(matched_documents, max_words=self.cfg.max_words)
155
- return text_before_documents + documents_str + text_before_prompt + question
156
-
157
- def get_gpt_response(self, **completion_kwargs) -> Response:
158
- # Call the API to generate a response
159
- logger.info(f"querying GPT...")
160
- try:
161
- response = openai.Completion.create(**completion_kwargs)
162
- except Exception as e:
163
- # log the error and return a generic response instead.
164
- logger.exception("Error connecting to OpenAI API. See traceback:")
165
- return Response("", True, "We're having trouble connecting to OpenAI right now... Try again soon!")
166
-
167
- text = response["choices"][0]["text"]
168
- return Response(text)
169
-
170
- def generate_response(
171
- self, prompt: str, matched_documents: pd.DataFrame, unknown_prompt: str
172
- ) -> tuple[Response, Iterable[Source]]:
173
- """
174
- Generate a response based on the retrieved documents.
175
- """
176
- if len(matched_documents) == 0:
177
- # No matching documents were retrieved, return
178
- sources = tuple()
179
- return Response(unknown_prompt), sources
180
-
181
- logger.info(f"Prompt: {prompt}")
182
- response = self.get_gpt_response(prompt=prompt, **self.cfg.completion_kwargs)
183
- if response:
184
- logger.info(f"GPT Response:\n{response.text}")
185
- relevant = self.check_response_relevance(
186
- response=response.text,
187
- engine=self.cfg.embedding_model,
188
- unk_embedding=self.unk_embedding,
189
- unk_threshold=self.cfg.unknown_threshold,
190
- )
191
- if relevant:
192
- sources = (
193
- Source(dct["source"], dct["url"], dct["similarity"])
194
- for dct in matched_documents.to_dict(orient="records")
195
- )
196
- else:
197
- # Override the answer with a generic unknown prompt, without sources.
198
- response = Response(text=self.cfg.unknown_prompt)
199
- sources = tuple()
200
-
201
- return response, sources
202
 
203
  def check_response_relevance(
204
- self, response: str, engine: str, unk_embedding: np.array, unk_threshold: float
205
  ) -> bool:
206
  """Check to see if a response is relevant to the chatbot's knowledge or not.
207
 
@@ -211,7 +155,7 @@ class Chatbot:
211
  set the unk_threshold to 0 to essentially turn off this feature.
212
  """
213
  response_embedding = get_embedding(
214
- response,
215
  engine=engine,
216
  )
217
  score = cosine_similarity(response_embedding, unk_embedding)
@@ -220,29 +164,45 @@ class Chatbot:
220
  # Likely that the answer is meaningful, add the top sources
221
  return score < unk_threshold
222
 
223
- def process_input(self, question: str, formatter: ResponseFormatter = None) -> str:
224
  """
225
  Main function to process the input question and generate a formatted output.
226
  """
227
 
228
- logger.info(f"User Question:\n{question}")
229
 
230
  # We make sure there is always a newline at the end of the question to avoid completing the question.
231
- if not question.endswith("\n"):
232
- question += "\n"
233
 
234
  matched_documents = self.rank_documents(
235
- query=question,
236
  top_k=self.cfg.top_k,
237
  thresh=self.cfg.thresh,
238
  engine=self.cfg.embedding_model,
239
  )
240
- prompt = self.prepare_prompt(
241
- question=question,
242
- matched_documents=matched_documents,
243
- text_before_prompt=self.cfg.text_before_prompt,
244
- text_before_documents=self.cfg.text_before_documents,
 
 
 
 
 
 
 
 
 
 
 
 
245
  )
246
- response, sources = self.generate_response(prompt, matched_documents, self.cfg.unknown_prompt)
 
 
 
 
247
 
248
  return self.response_formatter(response, sources)
 
1
  import logging
 
2
  from dataclasses import dataclass, field
 
3
 
4
  import numpy as np
 
5
  import pandas as pd
 
6
  from openai.embeddings_utils import cosine_similarity, get_embedding
7
 
8
+ from buster.completers import get_completer
9
  from buster.documents import get_documents_manager_from_extension
10
  from buster.formatter import (
11
  Response,
 
17
  logger = logging.getLogger(__name__)
18
  logging.basicConfig(level=logging.INFO)
19
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @dataclass
22
+ class BusterConfig:
23
  """Configuration object for a chatbot.
24
 
25
  documents_csv: Path to the csv file containing the documents and their embeddings.
 
41
  thresh: float = 0.7
42
  max_words: int = 3000
43
  unknown_threshold: float = 0.9 # set to 0 to deactivate
44
+ completer_cfg: dict = field(
45
+ # TODO: Put all this in its own config with sane defaults?
46
  default_factory=lambda: {
47
+ "name": "GPT3",
48
+ "text_before_documents": "You are a chatbot answering questions.\n",
49
+ "text_before_prompt": "Answer the following question:\n",
50
+ "completion_kwargs": {
51
+ "engine": "text-davinci-003",
52
+ "max_tokens": 200,
53
+ "temperature": None,
54
+ "top_p": None,
55
+ "frequency_penalty": 1,
56
+ "presence_penalty": 1,
57
+ },
58
  }
59
  )
 
60
  response_format: str = "slack"
61
  unknown_prompt: str = "I Don't know how to answer your question."
 
 
62
  response_footnote: str = "I'm a bot 🤖 and not always perfect."
63
 
64
 
65
+ class Buster:
66
+ def __init__(self, cfg: BusterConfig):
67
  # TODO: right now, the cfg is being passed as an omegaconf, is this what we want?
68
  self.cfg = cfg
69
+ self.completer = get_completer(cfg.completer_cfg)
70
  self._init_documents()
71
  self._init_unk_embedding()
72
  self._init_response_formatter()
 
131
 
132
  return documents_str
133
 
134
+ def add_sources(
135
  self,
136
+ response,
137
  matched_documents: pd.DataFrame,
138
+ unknown_prompt: str,
139
+ ):
140
+ logger.info(f"GPT Response:\n{response.text}")
141
+ sources = (
142
+ Source(dct["source"], dct["url"], dct["similarity"]) for dct in matched_documents.to_dict(orient="records")
143
+ )
144
+
145
+ return sources
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  def check_response_relevance(
148
+ self, completion: str, engine: str, unk_embedding: np.array, unk_threshold: float
149
  ) -> bool:
150
  """Check to see if a response is relevant to the chatbot's knowledge or not.
151
 
 
155
  set the unk_threshold to 0 to essentially turn off this feature.
156
  """
157
  response_embedding = get_embedding(
158
+ completion,
159
  engine=engine,
160
  )
161
  score = cosine_similarity(response_embedding, unk_embedding)
 
164
  # Likely that the answer is meaningful, add the top sources
165
  return score < unk_threshold
166
 
167
+ def process_input(self, user_input: str, formatter: ResponseFormatter = None) -> str:
168
  """
169
  Main function to process the input question and generate a formatted output.
170
  """
171
 
172
+ logger.info(f"User Input:\n{user_input}")
173
 
174
  # We make sure there is always a newline at the end of the question to avoid completing the question.
175
+ if not user_input.endswith("\n"):
176
+ user_input += "\n"
177
 
178
  matched_documents = self.rank_documents(
179
+ query=user_input,
180
  top_k=self.cfg.top_k,
181
  thresh=self.cfg.thresh,
182
  engine=self.cfg.embedding_model,
183
  )
184
+
185
+ if len(matched_documents) == 0:
186
+ response = Response("I did not find any sources to answer your question.")
187
+ sources = tuple()
188
+ return self.response_formatter(response, sources)
189
+
190
+ # generate a completion
191
+ documents: str = self.prepare_documents(matched_documents, max_words=self.cfg.max_words)
192
+ response = self.completer.generate_response(user_input, documents)
193
+ sources = self.add_sources(response, matched_documents, self.cfg.unknown_prompt)
194
+
195
+ # check for relevance
196
+ relevant = self.check_response_relevance(
197
+ completion=response.text,
198
+ engine=self.cfg.embedding_model,
199
+ unk_embedding=self.unk_embedding,
200
+ unk_threshold=self.cfg.unknown_threshold,
201
  )
202
+ if not relevant:
203
+ # answer generated was the chatbot saying it doesn't know how to answer
204
+ # override completion with generic "I don't know"
205
+ response = Response(text=self.cfg.unknown_prompt)
206
+ sources = tuple()
207
 
208
  return self.response_formatter(response, sources)
buster/completers/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from .base import ChatGPTCompleter, GPT3Completer, get_completer
2
+
3
+ __all__ = [
4
+ get_completer,
5
+ GPT3Completer,
6
+ ChatGPTCompleter,
7
+ ]
buster/completers/base.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from abc import ABC, abstractmethod
4
+
5
+ import openai
6
+ import promptlayer
7
+
8
+ from buster.formatter.base import Response
9
+
10
+ logger = logging.getLogger(__name__)
11
+ logging.basicConfig(level=logging.INFO)
12
+
13
+ # Check if an API key exists for promptlayer, if it does, use it
14
+ promptlayer_api_key = os.environ.get("PROMPTLAYER_API_KEY")
15
+ if promptlayer_api_key:
16
+ logger.info("Enabling prompt layer...")
17
+ promptlayer.api_key = promptlayer_api_key
18
+
19
+ # replace openai with the promptlayer wrapper
20
+ openai = promptlayer.openai
21
+ openai.api_key = os.environ.get("OPENAI_API_KEY")
22
+
23
+
24
+ class Completer(ABC):
25
+ def __init__(self, cfg):
26
+ self.cfg = cfg
27
+
28
+ @abstractmethod
29
+ def complete(self, prompt) -> str:
30
+ ...
31
+
32
+ def generate_response(self, user_input, documents) -> Response:
33
+ # Call the API to generate a response
34
+ prompt = self.prepare_prompt(user_input, documents)
35
+ name = self.cfg["name"]
36
+ logger.info(f"querying model {name}...")
37
+ logger.info(f"{prompt=}")
38
+ try:
39
+ completion_kwargs = self.cfg["completion_kwargs"]
40
+ completion = self.complete(prompt=prompt, **completion_kwargs)
41
+ except Exception as e:
42
+ # log the error and return a generic response instead.
43
+ logger.exception("Error connecting to OpenAI API. See traceback:")
44
+ return Response("", True, "We're having trouble connecting to OpenAI right now... Try again soon!")
45
+
46
+ return Response(completion)
47
+
48
+
49
+ class GPT3Completer(Completer):
50
+ def prepare_prompt(
51
+ self,
52
+ user_input: str,
53
+ documents: str,
54
+ ) -> str:
55
+ """
56
+ Prepare the prompt with prompt engineering.
57
+ """
58
+ text_before_docs = self.cfg["text_before_documents"]
59
+ text_before_prompt = self.cfg["text_before_prompt"]
60
+ return text_before_docs + documents + text_before_prompt + user_input
61
+
62
+ def complete(self, prompt, **completion_kwargs):
63
+ response = openai.Completion.create(prompt=prompt, **completion_kwargs)
64
+ return response["choices"][0]["text"]
65
+
66
+
67
+ class ChatGPTCompleter(Completer):
68
+ def prepare_prompt(
69
+ self,
70
+ user_input: str,
71
+ documents: str,
72
+ ) -> list:
73
+ """
74
+ Prepare the prompt with prompt engineering.
75
+ """
76
+ text_before_docs = self.cfg["text_before_documents"]
77
+ text_before_prompt = self.cfg["text_before_prompt"]
78
+ prompt = [
79
+ {"role": "system", "content": text_before_docs + documents + text_before_prompt},
80
+ {"role": "user", "content": user_input},
81
+ ]
82
+ return prompt
83
+
84
+ def complete(self, prompt, **completion_kwargs) -> str:
85
+ response = openai.ChatCompletion.create(
86
+ messages=prompt,
87
+ **completion_kwargs,
88
+ )
89
+
90
+ return response["choices"][0]["message"]["content"]
91
+
92
+
93
+ def get_completer(completer_cfg):
94
+ name = completer_cfg["name"]
95
+ completers = {
96
+ "GPT3": GPT3Completer,
97
+ "ChatGPT": ChatGPTCompleter,
98
+ }
99
+ return completers[name](completer_cfg)
tests/test_chatbot.py CHANGED
@@ -4,7 +4,7 @@ from pathlib import Path
4
  import numpy as np
5
  import pandas as pd
6
 
7
- from buster.chatbot import Chatbot, ChatbotConfig
8
  from buster.documents import DocumentsManager
9
 
10
  TEST_DATA_DIR = Path(__file__).resolve().parent / "data"
@@ -39,61 +39,136 @@ class DocumentsMock(DocumentsManager):
39
  return self.documents
40
 
41
 
42
- def test_chatbot_real_data():
43
- hf_transformers_cfg = ChatbotConfig(
44
- documents_file=DOCUMENTS_FILE,
 
 
 
 
 
45
  unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",
46
  embedding_model="text-embedding-ada-002",
47
  top_k=3,
48
  thresh=0.7,
49
  max_words=3000,
50
- completion_kwargs={
51
- "temperature": 0,
52
- "engine": "text-davinci-003",
53
- "max_tokens": 100,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  response_format="slack",
56
- text_before_prompt=(
57
- """You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n"""
58
- """Make sure to format your answers in Markdown format, including code block and snippets.\n"""
59
- """Do not include any links to urls or hyperlinks in your answers.\n\n"""
60
- """Now answer the following question:\n"""
61
- ),
 
 
 
 
 
 
 
62
  )
63
- chatbot = Chatbot(hf_transformers_cfg)
64
- answer = chatbot.process_input("What is a transformer?")
65
  assert isinstance(answer, str)
66
 
67
 
68
- def test_chatbot_mock_data(tmp_path, monkeypatch):
69
- gpt_expected_answer = "this is GPT answer"
70
- monkeypatch.setattr("buster.chatbot.get_documents_manager_from_extension", lambda filepath: DocumentsMock)
71
- monkeypatch.setattr("buster.chatbot.get_embedding", lambda x, engine: get_fake_embedding())
72
- monkeypatch.setattr(
73
- "buster.chatbot.openai.Completion.create", lambda **kwargs: {"choices": [{"text": gpt_expected_answer}]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  )
 
 
 
 
75
 
76
- hf_transformers_cfg = ChatbotConfig(
77
- documents_file=tmp_path / "not_a_real_file.tar.gz",
 
 
78
  unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",
79
  embedding_model="text-embedding-ada-002",
80
  top_k=3,
81
  thresh=0.7,
82
  max_words=3000,
83
- completion_kwargs={
84
- "temperature": 0,
85
- "engine": "text-davinci-003",
86
- "max_tokens": 100,
87
- },
88
  response_format="slack",
89
- text_before_prompt=(
90
- """You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n"""
91
- """Make sure to format your answers in Markdown format, including code block and snippets.\n"""
92
- """Do not include any links to urls or hyperlinks in your answers.\n\n"""
93
- """Now answer the following question:\n"""
94
- ),
 
 
 
 
 
 
 
 
 
 
 
 
95
  )
96
- chatbot = Chatbot(hf_transformers_cfg)
97
- answer = chatbot.process_input("What is a transformer?")
98
  assert isinstance(answer, str)
99
- assert answer.startswith(gpt_expected_answer)
 
4
  import numpy as np
5
  import pandas as pd
6
 
7
+ from buster.buster import Buster, BusterConfig
8
  from buster.documents import DocumentsManager
9
 
10
  TEST_DATA_DIR = Path(__file__).resolve().parent / "data"
 
39
  return self.documents
40
 
41
 
42
+ def test_chatbot_mock_data(tmp_path, monkeypatch):
43
+ gpt_expected_answer = "this is GPT answer"
44
+ monkeypatch.setattr("buster.buster.get_documents_manager_from_extension", lambda filepath: DocumentsMock)
45
+ monkeypatch.setattr("buster.buster.get_embedding", lambda x, engine: get_fake_embedding())
46
+ monkeypatch.setattr("openai.Completion.create", lambda **kwargs: {"choices": [{"text": gpt_expected_answer}]})
47
+
48
+ hf_transformers_cfg = BusterConfig(
49
+ documents_file=tmp_path / "not_a_real_file.tar.gz",
50
  unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",
51
  embedding_model="text-embedding-ada-002",
52
  top_k=3,
53
  thresh=0.7,
54
  max_words=3000,
55
+ response_format="slack",
56
+ completer_cfg={
57
+ "name": "GPT3",
58
+ "text_before_prompt": (
59
+ """You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n"""
60
+ """Make sure to format your answers in Markdown format, including code block and snippets.\n"""
61
+ """Do not include any links to urls or hyperlinks in your answers.\n\n"""
62
+ """Now answer the following question:\n"""
63
+ ),
64
+ "text_before_documents": "",
65
+ "completion_kwargs": {
66
+ "engine": "text-davinci-003",
67
+ "max_tokens": 200,
68
+ "temperature": None,
69
+ "top_p": None,
70
+ "frequency_penalty": 1,
71
+ "presence_penalty": 1,
72
+ },
73
  },
74
+ )
75
+ buster = Buster(hf_transformers_cfg)
76
+ answer = buster.process_input("What is a transformer?")
77
+ assert isinstance(answer, str)
78
+ assert answer.startswith(gpt_expected_answer)
79
+
80
+
81
+ def test_chatbot_real_data__chatGPT():
82
+ hf_transformers_cfg = BusterConfig(
83
+ documents_file=DOCUMENTS_FILE,
84
+ unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
85
+ embedding_model="text-embedding-ada-002",
86
+ top_k=3,
87
+ thresh=0.7,
88
+ max_words=3000,
89
  response_format="slack",
90
+ completer_cfg={
91
+ "name": "ChatGPT",
92
+ "text_before_prompt": (
93
+ """You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n"""
94
+ """Make sure to format your answers in Markdown format, including code block and snippets.\n"""
95
+ """Do not include any links to urls or hyperlinks in your answers.\n\n"""
96
+ """Now answer the following question:\n"""
97
+ ),
98
+ "text_before_documents": "Only use these documents as reference:\n",
99
+ "completion_kwargs": {
100
+ "model": "gpt-3.5-turbo",
101
+ },
102
+ },
103
  )
104
+ buster = Buster(hf_transformers_cfg)
105
+ answer = buster.process_input("What is a transformer?")
106
  assert isinstance(answer, str)
107
 
108
 
109
+ def test_chatbot_real_data__chatGPT_OOD():
110
+ buster_cfg = BusterConfig(
111
+ documents_file=DOCUMENTS_FILE,
112
+ unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
113
+ embedding_model="text-embedding-ada-002",
114
+ top_k=3,
115
+ thresh=0.7,
116
+ max_words=3000,
117
+ completer_cfg={
118
+ "name": "ChatGPT",
119
+ "text_before_prompt": (
120
+ """You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python. """
121
+ """Make sure to format your answers in Markdown format, including code block and snippets. """
122
+ """Do not include any links to urls or hyperlinks in your answers. """
123
+ """If you do not know the answer to a question, or if it is completely irrelevant to the library usage, let the user know you cannot answer. """
124
+ """Use this response: """
125
+ """I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?"""
126
+ """For example:\n"""
127
+ """What is the meaning of life for huggingface?\n"""
128
+ """I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?"""
129
+ """Now answer the following question:\n"""
130
+ ),
131
+ "text_before_documents": "Only use these documents as reference:\n",
132
+ "completion_kwargs": {
133
+ "model": "gpt-3.5-turbo",
134
+ },
135
+ },
136
+ response_format="gradio",
137
  )
138
+ buster = Buster(buster_cfg)
139
+ answer = buster.process_input("What is a good recipe for brocolli soup?")
140
+ assert isinstance(answer, str)
141
+ assert buster_cfg.unknown_prompt in answer
142
 
143
+
144
+ def test_chatbot_real_data__GPT():
145
+ hf_transformers_cfg = BusterConfig(
146
+ documents_file=DOCUMENTS_FILE,
147
  unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",
148
  embedding_model="text-embedding-ada-002",
149
  top_k=3,
150
  thresh=0.7,
151
  max_words=3000,
 
 
 
 
 
152
  response_format="slack",
153
+ completer_cfg={
154
+ "name": "GPT3",
155
+ "text_before_prompt": (
156
+ """You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n"""
157
+ """Make sure to format your answers in Markdown format, including code block and snippets.\n"""
158
+ """Do not include any links to urls or hyperlinks in your answers.\n\n"""
159
+ """Now answer the following question:\n"""
160
+ ),
161
+ "text_before_documents": "",
162
+ "completion_kwargs": {
163
+ "engine": "text-davinci-003",
164
+ "max_tokens": 200,
165
+ "temperature": None,
166
+ "top_p": None,
167
+ "frequency_penalty": 1,
168
+ "presence_penalty": 1,
169
+ },
170
+ },
171
  )
172
+ buster = Buster(hf_transformers_cfg)
173
+ answer = buster.process_input("What is a transformer?")
174
  assert isinstance(answer, str)