Spaces:
Runtime error
Runtime error
File size: 3,813 Bytes
1696c32 1b88635 1696c32 1b88635 1696c32 1b88635 1696c32 1b88635 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import os
from pathlib import Path
import numpy as np
import pandas as pd
from buster.chatbot import Chatbot, ChatbotConfig
from buster.documents import DocumentsManager
TEST_DATA_DIR = Path(__file__).resolve().parent / "data"
DOCUMENTS_FILE = os.path.join(str(TEST_DATA_DIR), "document_embeddings_huggingface_subset.tar.gz")
def get_fake_embedding(length=1536):
rng = np.random.default_rng()
return list(rng.random(length, dtype=np.float32))
class DocumentsMock(DocumentsManager):
def __init__(self, filepath):
self.filepath = filepath
n_samples = 100
self.documents = pd.DataFrame.from_dict(
{
"title": ["test"] * n_samples,
"url": ["http://url.com"] * n_samples,
"content": ["cool text"] * n_samples,
"embedding": [get_fake_embedding()] * n_samples,
"n_tokens": [10] * n_samples,
"source": ["fake source"] * n_samples,
}
)
def add(self, documents):
pass
def get_documents(self, source):
return self.documents
def test_chatbot_real_data():
hf_transformers_cfg = ChatbotConfig(
documents_file=DOCUMENTS_FILE,
unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",
embedding_model="text-embedding-ada-002",
top_k=3,
thresh=0.7,
max_words=3000,
completion_kwargs={
"temperature": 0,
"engine": "text-davinci-003",
"max_tokens": 100,
},
response_format="slack",
text_before_prompt=(
"""You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n"""
"""Make sure to format your answers in Markdown format, including code block and snippets.\n"""
"""Do not include any links to urls or hyperlinks in your answers.\n\n"""
"""Now answer the following question:\n"""
),
)
chatbot = Chatbot(hf_transformers_cfg)
answer = chatbot.process_input("What is a transformer?")
assert isinstance(answer, str)
def test_chatbot_mock_data(tmp_path, monkeypatch):
gpt_expected_answer = "this is GPT answer"
monkeypatch.setattr("buster.chatbot.get_documents_manager_from_extension", lambda filepath: DocumentsMock)
monkeypatch.setattr("buster.chatbot.get_embedding", lambda x, engine: get_fake_embedding())
monkeypatch.setattr(
"buster.chatbot.openai.Completion.create", lambda **kwargs: {"choices": [{"text": gpt_expected_answer}]}
)
hf_transformers_cfg = ChatbotConfig(
documents_file=tmp_path / "not_a_real_file.tar.gz",
unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",
embedding_model="text-embedding-ada-002",
top_k=3,
thresh=0.7,
max_words=3000,
completion_kwargs={
"temperature": 0,
"engine": "text-davinci-003",
"max_tokens": 100,
},
response_format="slack",
text_before_prompt=(
"""You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n"""
"""Make sure to format your answers in Markdown format, including code block and snippets.\n"""
"""Do not include any links to urls or hyperlinks in your answers.\n\n"""
"""Now answer the following question:\n"""
),
)
chatbot = Chatbot(hf_transformers_cfg)
answer = chatbot.process_input("What is a transformer?")
assert isinstance(answer, str)
assert answer.startswith(gpt_expected_answer)
|