Spaces:
Runtime error
Runtime error
File size: 9,756 Bytes
1696c32 1b88635 6008655 06bca0c 1696c32 1b88635 c6dd20e 6008655 c6dd20e 06bca0c 1b88635 6aad21a 1b88635 c6dd20e c8a1687 c6dd20e d16a006 c8a1687 1696c32 8252b96 c8a1687 8252b96 c8a1687 1696c32 8252b96 c8a1687 c6dd20e 06bca0c 6008655 c8a1687 8252b96 c8a1687 8252b96 c8a1687 1696c32 06bca0c 6008655 1b88635 c8a1687 8252b96 c8a1687 8252b96 c8a1687 6008655 c8a1687 c6dd20e c8a1687 1b88635 06bca0c 6008655 25a0d11 1b88635 c8a1687 8252b96 1b88635 8252b96 c8a1687 8252b96 c8a1687 8252b96 c8a1687 1b88635 06bca0c 8252b96 6008655 25a0d11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
import os
from pathlib import Path
import numpy as np
import pandas as pd
from buster.busterbot import Buster, BusterConfig, Response
from buster.completers.base import Completer, Completion
from buster.retriever import Retriever
from buster.utils import get_retriever_from_extension
TEST_DATA_DIR = Path(__file__).resolve().parent / "data"
DOCUMENTS_FILE = os.path.join(str(TEST_DATA_DIR), "document_embeddings_huggingface_subset.tar.gz")
def get_fake_embedding(length=1536):
rng = np.random.default_rng()
return list(rng.random(length, dtype=np.float32))
class MockCompleter(Completer):
def __init__(self, expected_answer):
self.expected_answer = expected_answer
def complete(self):
return
def generate_response(self, user_input, system_prompt) -> Completion:
return Completion(self.expected_answer)
class MockRetriever(Retriever):
def __init__(self, filepath):
self.filepath = filepath
n_samples = 100
self.documents = pd.DataFrame.from_dict(
{
"title": ["test"] * n_samples,
"url": ["http://url.com"] * n_samples,
"content": ["cool text"] * n_samples,
"embedding": [get_fake_embedding()] * n_samples,
"n_tokens": [10] * n_samples,
"source": ["fake source"] * n_samples,
}
)
def get_documents(self, source):
return self.documents
def get_source_display_name(self, source):
return source
import logging
logging.basicConfig(level=logging.INFO)
def test_chatbot_mock_data(tmp_path, monkeypatch):
gpt_expected_answer = "this is GPT answer"
monkeypatch.setattr(Buster, "get_embedding", lambda self, prompt, engine: get_fake_embedding())
monkeypatch.setattr(
"buster.busterbot.completer_factory", lambda x: MockCompleter(expected_answer=gpt_expected_answer)
)
hf_transformers_cfg = BusterConfig(
unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",
embedding_model="text-embedding-ada-002",
retriever_cfg={
"top_k": 3,
"thresh": 0.7,
},
document_source="fake source",
completion_cfg={
"name": "ChatGPT",
"completion_kwargs": {
"engine": "gpt-3.5-turbo",
"max_tokens": 200,
"temperature": None,
"top_p": None,
"frequency_penalty": 1,
"presence_penalty": 1,
},
},
prompt_cfg={
"max_words": 2000,
"text_before_documents": "",
"text_before_prompt": (
"""You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n"""
"""Make sure to format your answers in Markdown format, including code block and snippets.\n"""
"""Do not include any links to urls or hyperlinks in your answers.\n\n"""
"""Now answer the following question:\n"""
),
},
)
filepath = tmp_path / "not_a_real_file.tar.gz"
retriever = MockRetriever(filepath)
buster = Buster(cfg=hf_transformers_cfg, retriever=retriever)
response = buster.process_input("What is a transformer?")
assert isinstance(response.completion.text, str)
assert response.completion.text.startswith(gpt_expected_answer)
def test_chatbot_real_data__chatGPT():
hf_transformers_cfg = BusterConfig(
unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
embedding_model="text-embedding-ada-002",
completion_cfg={
"name": "ChatGPT",
"completion_kwargs": {
"model": "gpt-3.5-turbo",
},
},
prompt_cfg={
"max_words": 2000,
"text_before_documents": "",
"text_before_prompt": (
"""You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n"""
"""Make sure to format your answers in Markdown format, including code block and snippets.\n"""
"""Do not include any links to urls or hyperlinks in your answers.\n\n"""
"""Now answer the following question:\n"""
),
},
)
retriever = get_retriever_from_extension(DOCUMENTS_FILE)(DOCUMENTS_FILE)
buster = Buster(cfg=hf_transformers_cfg, retriever=retriever)
response = buster.process_input("What is a transformer?")
assert isinstance(response.completion.text, str)
def test_chatbot_real_data__chatGPT_OOD():
buster_cfg = BusterConfig(
unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
embedding_model="text-embedding-ada-002",
completion_cfg={
"name": "ChatGPT",
"completion_kwargs": {
"model": "gpt-3.5-turbo",
},
},
retriever_cfg={
"top_k": 3,
"thresh": 0.7,
},
prompt_cfg={
"max_words": 3000,
"text_before_prompt": (
"""You are a chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python. """
"""Make sure to format your answers in Markdown format, including code block and snippets. """
"""Do not include any links to urls or hyperlinks in your answers. """
"""If you do not know the answer to a question, or if it is completely irrelevant to the library usage, let the user know you cannot answer. """
"""Use this response: """
"""'I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?'\n"""
"""For example:\n"""
"""What is the meaning of life for huggingface?\n"""
"""I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?"""
"""Now answer the following question:\n"""
),
"text_before_documents": "Only use these documents as reference:\n",
},
)
retriever = get_retriever_from_extension(DOCUMENTS_FILE)(DOCUMENTS_FILE)
buster = Buster(cfg=buster_cfg, retriever=retriever)
response = buster.process_input("What is a good recipe for brocolli soup?")
assert isinstance(response.completion.text, str)
assert response.is_relevant == False
def test_chatbot_real_data__GPT():
buster_cfg = BusterConfig(
unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
embedding_model="text-embedding-ada-002",
completion_cfg={
"name": "ChatGPT",
"completion_kwargs": {
"model": "gpt-3.5-turbo",
},
},
retriever_cfg={
"top_k": 3,
"thresh": 0.7,
},
prompt_cfg={
"max_words": 3000,
"text_before_prompt": (
"""You are a chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python. """
"""Make sure to format your answers in Markdown format, including code block and snippets. """
"""Do not include any links to urls or hyperlinks in your answers. """
"""If you do not know the answer to a question, or if it is completely irrelevant to the library usage, let the user know you cannot answer. """
"""Use this response: """
"""'I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?'\n"""
"""For example:\n"""
"""What is the meaning of life for huggingface?\n"""
"""I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?"""
"""Now answer the following question:\n"""
),
"text_before_documents": "Only use these documents as reference:\n",
},
)
retriever = get_retriever_from_extension(DOCUMENTS_FILE)(DOCUMENTS_FILE)
buster = Buster(cfg=buster_cfg, retriever=retriever)
response = buster.process_input("What is a transformer?")
assert isinstance(response.completion.text, str)
assert response.is_relevant == True
|