jerpint commited on
Commit
1ea867d
·
unverified ·
1 Parent(s): d88c550

remove OmegaConf dependency (#27)

Browse files
Files changed (1) hide show
  1. buster/chatbot.py +41 -42
buster/chatbot.py CHANGED
@@ -4,7 +4,6 @@ from dataclasses import dataclass, field
4
  import numpy as np
5
  import openai
6
  import pandas as pd
7
- from omegaconf import OmegaConf
8
  from openai.embeddings_utils import cosine_similarity, get_embedding
9
 
10
  from buster.docparser import EMBEDDING_MODEL, read_documents
@@ -13,8 +12,48 @@ logger = logging.getLogger(__name__)
13
  logging.basicConfig(level=logging.INFO)
14
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  class Chatbot:
17
- def __init__(self, cfg: OmegaConf):
18
  # TODO: right now, the cfg is being passed as an omegaconf, is this what we want?
19
  self.cfg = cfg
20
  self._init_documents()
@@ -176,43 +215,3 @@ class Chatbot:
176
  formatted_output = self.format_response(response, matched_documents)
177
 
178
  return formatted_output
179
-
180
-
181
- @dataclass
182
- class ChatbotConfig:
183
- """Configuration object for a chatbot.
184
-
185
- documents_csv: Path to the csv file containing the documents and their embeddings.
186
- embedding_model: OpenAI model to use to get embeddings.
187
- top_k: Max number of documents to retrieve, ordered by cosine similarity
188
- thresh: threshold for cosine similarity to be considered
189
- max_chars: maximum number of characters the retrieved documents can be. Will truncate otherwise.
190
- completion_kwargs: kwargs for the OpenAI.Completion() method
191
- separator: the separator to use, can be either "\n" or <p> depending on rendering.
192
- link_format: the type of format to render links with, e.g. slack or markdown
193
- unknown_prompt: Prompt to use to generate the "I don't know" embedding to compare to.
194
- text_before_prompt: Text to prompt GPT with before the user prompt, but after the documentation.
195
- text_after_response: Generic response to add the the chatbot's reply.
196
- """
197
-
198
- documents_file: str = "buster/data/document_embeddings.csv"
199
- embedding_model: str = "text-embedding-ada-002"
200
- top_k: int = 3
201
- thresh: float = 0.7
202
- max_chars: int = 3000
203
-
204
- completion_kwargs: dict = field(
205
- default_factory=lambda: {
206
- "engine": "text-davinci-003",
207
- "max_tokens": 200,
208
- "temperature": None,
209
- "top_p": None,
210
- "frequency_penalty": 1,
211
- "presence_penalty": 1,
212
- }
213
- )
214
- separator: str = "\n"
215
- link_format: str = "slack"
216
- unknown_prompt: str = "I Don't know how to answer your question."
217
- text_before_prompt: str = "I'm a chatbot, bleep bloop."
218
- text_after_response: str = "Answer the following question:\n"
 
4
  import numpy as np
5
  import openai
6
  import pandas as pd
 
7
  from openai.embeddings_utils import cosine_similarity, get_embedding
8
 
9
  from buster.docparser import EMBEDDING_MODEL, read_documents
 
12
  logging.basicConfig(level=logging.INFO)
13
 
14
 
15
+ @dataclass
16
+ class ChatbotConfig:
17
+ """Configuration object for a chatbot.
18
+
19
+ documents_csv: Path to the csv file containing the documents and their embeddings.
20
+ embedding_model: OpenAI model to use to get embeddings.
21
+ top_k: Max number of documents to retrieve, ordered by cosine similarity
22
+ thresh: threshold for cosine similarity to be considered
23
+ max_chars: maximum number of characters the retrieved documents can be. Will truncate otherwise.
24
+ completion_kwargs: kwargs for the OpenAI.Completion() method
25
+ separator: the separator to use, can be either "\n" or <p> depending on rendering.
26
+ link_format: the type of format to render links with, e.g. slack or markdown
27
+ unknown_prompt: Prompt to use to generate the "I don't know" embedding to compare to.
28
+ text_before_prompt: Text to prompt GPT with before the user prompt, but after the documentation.
29
+ text_after_response: Generic response to add the the chatbot's reply.
30
+ """
31
+
32
+ documents_file: str = "buster/data/document_embeddings.csv"
33
+ embedding_model: str = "text-embedding-ada-002"
34
+ top_k: int = 3
35
+ thresh: float = 0.7
36
+ max_chars: int = 3000
37
+
38
+ completion_kwargs: dict = field(
39
+ default_factory=lambda: {
40
+ "engine": "text-davinci-003",
41
+ "max_tokens": 200,
42
+ "temperature": None,
43
+ "top_p": None,
44
+ "frequency_penalty": 1,
45
+ "presence_penalty": 1,
46
+ }
47
+ )
48
+ separator: str = "\n"
49
+ link_format: str = "slack"
50
+ unknown_prompt: str = "I Don't know how to answer your question."
51
+ text_before_prompt: str = "I'm a chatbot, bleep bloop."
52
+ text_after_response: str = "Answer the following question:\n"
53
+
54
+
55
  class Chatbot:
56
+ def __init__(self, cfg: ChatbotConfig):
57
  # TODO: right now, the cfg is being passed as an omegaconf, is this what we want?
58
  self.cfg = cfg
59
  self._init_documents()
 
215
  formatted_output = self.format_response(response, matched_documents)
216
 
217
  return formatted_output