Spaces:
Runtime error
Runtime error
import os | |
from pathlib import Path | |
import numpy as np | |
import pandas as pd | |
from buster.chatbot import Chatbot, ChatbotConfig | |
from buster.documents import DocumentsManager | |
TEST_DATA_DIR = Path(__file__).resolve().parent / "data" | |
DOCUMENTS_FILE = os.path.join(str(TEST_DATA_DIR), "document_embeddings_huggingface_subset.tar.gz") | |
def get_fake_embedding(length=1536): | |
rng = np.random.default_rng() | |
return list(rng.random(length, dtype=np.float32)) | |
class DocumentsMock(DocumentsManager): | |
def __init__(self, filepath): | |
self.filepath = filepath | |
n_samples = 100 | |
self.documents = pd.DataFrame.from_dict( | |
{ | |
"title": ["test"] * n_samples, | |
"url": ["http://url.com"] * n_samples, | |
"content": ["cool text"] * n_samples, | |
"embedding": [get_fake_embedding()] * n_samples, | |
"n_tokens": [10] * n_samples, | |
"source": ["fake source"] * n_samples, | |
} | |
) | |
def add(self, documents): | |
pass | |
def get_documents(self, source): | |
return self.documents | |
def test_chatbot_real_data(): | |
hf_transformers_cfg = ChatbotConfig( | |
documents_file=DOCUMENTS_FILE, | |
unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.", | |
embedding_model="text-embedding-ada-002", | |
top_k=3, | |
thresh=0.7, | |
max_words=3000, | |
completion_kwargs={ | |
"temperature": 0, | |
"engine": "text-davinci-003", | |
"max_tokens": 100, | |
}, | |
response_format="slack", | |
text_before_prompt=( | |
"""You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n""" | |
"""Make sure to format your answers in Markdown format, including code block and snippets.\n""" | |
"""Do not include any links to urls or hyperlinks in your answers.\n\n""" | |
"""Now answer the following question:\n""" | |
), | |
) | |
chatbot = Chatbot(hf_transformers_cfg) | |
answer = chatbot.process_input("What is a transformer?") | |
assert isinstance(answer, str) | |
def test_chatbot_mock_data(tmp_path, monkeypatch): | |
gpt_expected_answer = "this is GPT answer" | |
monkeypatch.setattr("buster.chatbot.get_documents_manager_from_extension", lambda filepath: DocumentsMock) | |
monkeypatch.setattr("buster.chatbot.get_embedding", lambda x, engine: get_fake_embedding()) | |
monkeypatch.setattr( | |
"buster.chatbot.openai.Completion.create", lambda **kwargs: {"choices": [{"text": gpt_expected_answer}]} | |
) | |
hf_transformers_cfg = ChatbotConfig( | |
documents_file=tmp_path / "not_a_real_file.tar.gz", | |
unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.", | |
embedding_model="text-embedding-ada-002", | |
top_k=3, | |
thresh=0.7, | |
max_words=3000, | |
completion_kwargs={ | |
"temperature": 0, | |
"engine": "text-davinci-003", | |
"max_tokens": 100, | |
}, | |
response_format="slack", | |
text_before_prompt=( | |
"""You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n""" | |
"""Make sure to format your answers in Markdown format, including code block and snippets.\n""" | |
"""Do not include any links to urls or hyperlinks in your answers.\n\n""" | |
"""Now answer the following question:\n""" | |
), | |
) | |
chatbot = Chatbot(hf_transformers_cfg) | |
answer = chatbot.process_input("What is a transformer?") | |
assert isinstance(answer, str) | |
assert answer.startswith(gpt_expected_answer) | |