File size: 3,813 Bytes
1696c32
 
 
1b88635
 
 
1696c32
1b88635
1696c32
 
 
 
 
1b88635
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1696c32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b88635
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
from pathlib import Path

import numpy as np
import pandas as pd

from buster.chatbot import Chatbot, ChatbotConfig
from buster.documents import DocumentsManager

TEST_DATA_DIR = Path(__file__).resolve().parent / "data"
DOCUMENTS_FILE = os.path.join(str(TEST_DATA_DIR), "document_embeddings_huggingface_subset.tar.gz")


def get_fake_embedding(length=1536):
    rng = np.random.default_rng()
    return list(rng.random(length, dtype=np.float32))


class DocumentsMock(DocumentsManager):
    def __init__(self, filepath):
        self.filepath = filepath

        n_samples = 100
        self.documents = pd.DataFrame.from_dict(
            {
                "title": ["test"] * n_samples,
                "url": ["http://url.com"] * n_samples,
                "content": ["cool text"] * n_samples,
                "embedding": [get_fake_embedding()] * n_samples,
                "n_tokens": [10] * n_samples,
                "source": ["fake source"] * n_samples,
            }
        )

    def add(self, documents):
        pass

    def get_documents(self, source):
        return self.documents


def test_chatbot_real_data():
    hf_transformers_cfg = ChatbotConfig(
        documents_file=DOCUMENTS_FILE,
        unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",
        embedding_model="text-embedding-ada-002",
        top_k=3,
        thresh=0.7,
        max_words=3000,
        completion_kwargs={
            "temperature": 0,
            "engine": "text-davinci-003",
            "max_tokens": 100,
        },
        response_format="slack",
        text_before_prompt=(
            """You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n"""
            """Make sure to format your answers in Markdown format, including code block and snippets.\n"""
            """Do not include any links to urls or hyperlinks in your answers.\n\n"""
            """Now answer the following question:\n"""
        ),
    )
    chatbot = Chatbot(hf_transformers_cfg)
    answer = chatbot.process_input("What is a transformer?")
    assert isinstance(answer, str)


def test_chatbot_mock_data(tmp_path, monkeypatch):
    gpt_expected_answer = "this is GPT answer"
    monkeypatch.setattr("buster.chatbot.get_documents_manager_from_extension", lambda filepath: DocumentsMock)
    monkeypatch.setattr("buster.chatbot.get_embedding", lambda x, engine: get_fake_embedding())
    monkeypatch.setattr(
        "buster.chatbot.openai.Completion.create", lambda **kwargs: {"choices": [{"text": gpt_expected_answer}]}
    )

    hf_transformers_cfg = ChatbotConfig(
        documents_file=tmp_path / "not_a_real_file.tar.gz",
        unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",
        embedding_model="text-embedding-ada-002",
        top_k=3,
        thresh=0.7,
        max_words=3000,
        completion_kwargs={
            "temperature": 0,
            "engine": "text-davinci-003",
            "max_tokens": 100,
        },
        response_format="slack",
        text_before_prompt=(
            """You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n"""
            """Make sure to format your answers in Markdown format, including code block and snippets.\n"""
            """Do not include any links to urls or hyperlinks in your answers.\n\n"""
            """Now answer the following question:\n"""
        ),
    )
    chatbot = Chatbot(hf_transformers_cfg)
    answer = chatbot.process_input("What is a transformer?")
    assert isinstance(answer, str)
    assert answer.startswith(gpt_expected_answer)