Spaces:
Runtime error
Runtime error
remove OmegaConf dependency (#27)
Browse files- buster/chatbot.py +41 -42
buster/chatbot.py
CHANGED
@@ -4,7 +4,6 @@ from dataclasses import dataclass, field
|
|
4 |
import numpy as np
|
5 |
import openai
|
6 |
import pandas as pd
|
7 |
-
from omegaconf import OmegaConf
|
8 |
from openai.embeddings_utils import cosine_similarity, get_embedding
|
9 |
|
10 |
from buster.docparser import EMBEDDING_MODEL, read_documents
|
@@ -13,8 +12,48 @@ logger = logging.getLogger(__name__)
|
|
13 |
logging.basicConfig(level=logging.INFO)
|
14 |
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
class Chatbot:
|
17 |
-
def __init__(self, cfg:
|
18 |
# TODO: right now, the cfg is being passed as an omegaconf, is this what we want?
|
19 |
self.cfg = cfg
|
20 |
self._init_documents()
|
@@ -176,43 +215,3 @@ class Chatbot:
|
|
176 |
formatted_output = self.format_response(response, matched_documents)
|
177 |
|
178 |
return formatted_output
|
179 |
-
|
180 |
-
|
181 |
-
@dataclass
|
182 |
-
class ChatbotConfig:
|
183 |
-
"""Configuration object for a chatbot.
|
184 |
-
|
185 |
-
documents_csv: Path to the csv file containing the documents and their embeddings.
|
186 |
-
embedding_model: OpenAI model to use to get embeddings.
|
187 |
-
top_k: Max number of documents to retrieve, ordered by cosine similarity
|
188 |
-
thresh: threshold for cosine similarity to be considered
|
189 |
-
max_chars: maximum number of characters the retrieved documents can be. Will truncate otherwise.
|
190 |
-
completion_kwargs: kwargs for the OpenAI.Completion() method
|
191 |
-
separator: the separator to use, can be either "\n" or <p> depending on rendering.
|
192 |
-
link_format: the type of format to render links with, e.g. slack or markdown
|
193 |
-
unknown_prompt: Prompt to use to generate the "I don't know" embedding to compare to.
|
194 |
-
text_before_prompt: Text to prompt GPT with before the user prompt, but after the documentation.
|
195 |
-
text_after_response: Generic response to add the the chatbot's reply.
|
196 |
-
"""
|
197 |
-
|
198 |
-
documents_file: str = "buster/data/document_embeddings.csv"
|
199 |
-
embedding_model: str = "text-embedding-ada-002"
|
200 |
-
top_k: int = 3
|
201 |
-
thresh: float = 0.7
|
202 |
-
max_chars: int = 3000
|
203 |
-
|
204 |
-
completion_kwargs: dict = field(
|
205 |
-
default_factory=lambda: {
|
206 |
-
"engine": "text-davinci-003",
|
207 |
-
"max_tokens": 200,
|
208 |
-
"temperature": None,
|
209 |
-
"top_p": None,
|
210 |
-
"frequency_penalty": 1,
|
211 |
-
"presence_penalty": 1,
|
212 |
-
}
|
213 |
-
)
|
214 |
-
separator: str = "\n"
|
215 |
-
link_format: str = "slack"
|
216 |
-
unknown_prompt: str = "I Don't know how to answer your question."
|
217 |
-
text_before_prompt: str = "I'm a chatbot, bleep bloop."
|
218 |
-
text_after_response: str = "Answer the following question:\n"
|
|
|
4 |
import numpy as np
|
5 |
import openai
|
6 |
import pandas as pd
|
|
|
7 |
from openai.embeddings_utils import cosine_similarity, get_embedding
|
8 |
|
9 |
from buster.docparser import EMBEDDING_MODEL, read_documents
|
|
|
12 |
logging.basicConfig(level=logging.INFO)
|
13 |
|
14 |
|
15 |
+
@dataclass
|
16 |
+
class ChatbotConfig:
|
17 |
+
"""Configuration object for a chatbot.
|
18 |
+
|
19 |
+
documents_csv: Path to the csv file containing the documents and their embeddings.
|
20 |
+
embedding_model: OpenAI model to use to get embeddings.
|
21 |
+
top_k: Max number of documents to retrieve, ordered by cosine similarity
|
22 |
+
thresh: threshold for cosine similarity to be considered
|
23 |
+
max_chars: maximum number of characters the retrieved documents can be. Will truncate otherwise.
|
24 |
+
completion_kwargs: kwargs for the OpenAI.Completion() method
|
25 |
+
separator: the separator to use, can be either "\n" or <p> depending on rendering.
|
26 |
+
link_format: the type of format to render links with, e.g. slack or markdown
|
27 |
+
unknown_prompt: Prompt to use to generate the "I don't know" embedding to compare to.
|
28 |
+
text_before_prompt: Text to prompt GPT with before the user prompt, but after the documentation.
|
29 |
+
text_after_response: Generic response to add the the chatbot's reply.
|
30 |
+
"""
|
31 |
+
|
32 |
+
documents_file: str = "buster/data/document_embeddings.csv"
|
33 |
+
embedding_model: str = "text-embedding-ada-002"
|
34 |
+
top_k: int = 3
|
35 |
+
thresh: float = 0.7
|
36 |
+
max_chars: int = 3000
|
37 |
+
|
38 |
+
completion_kwargs: dict = field(
|
39 |
+
default_factory=lambda: {
|
40 |
+
"engine": "text-davinci-003",
|
41 |
+
"max_tokens": 200,
|
42 |
+
"temperature": None,
|
43 |
+
"top_p": None,
|
44 |
+
"frequency_penalty": 1,
|
45 |
+
"presence_penalty": 1,
|
46 |
+
}
|
47 |
+
)
|
48 |
+
separator: str = "\n"
|
49 |
+
link_format: str = "slack"
|
50 |
+
unknown_prompt: str = "I Don't know how to answer your question."
|
51 |
+
text_before_prompt: str = "I'm a chatbot, bleep bloop."
|
52 |
+
text_after_response: str = "Answer the following question:\n"
|
53 |
+
|
54 |
+
|
55 |
class Chatbot:
|
56 |
+
def __init__(self, cfg: ChatbotConfig):
|
57 |
# TODO: right now, the cfg is being passed as an omegaconf, is this what we want?
|
58 |
self.cfg = cfg
|
59 |
self._init_documents()
|
|
|
215 |
formatted_output = self.format_response(response, matched_documents)
|
216 |
|
217 |
return formatted_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|