File size: 3,694 Bytes
b110593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
//                           _       _
// __      _____  __ ___   ___  __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
//  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
//   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
//  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
//  CONTACT: [email protected]
//

package clients

import (
	"testing"

	"github.com/stretchr/testify/assert"
)

func Test_getTokensCount(t *testing.T) {
	prompt := `

	Summarize the following in a tweet:



	As generative language models such as GPT-4 continue to push the boundaries of what AI can do,

	the excitement surrounding its potential is spreading quickly. Many applications and projects are

	built on top of GPT-4 to extend its capabilities and features. Additionally, many tools were created

	in order to interact with large language models, like LangChain as an example. Auto-GPT is one of the fastest

	rising open-source python projects harnessing the power of GPT-4!

	`
	messages := []message{
		{Role: "user", Content: prompt},
	}
	// Example messages from: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
	// added for sanity check that getTokensCount method computes tokens accordingly to above examples provided by OpenAI
	exampleMessages := []message{
		{
			Role:    "system",
			Content: "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.",
		},
		{
			Role:    "system",
			Name:    "example_user",
			Content: "New synergies will help drive top-line growth.",
		},
		{
			Role:    "system",
			Name:    "example_assistant",
			Content: "Things working well together will increase revenue.",
		},
		{
			Role:    "system",
			Name:    "example_user",
			Content: "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.",
		},
		{
			Role:    "system",
			Name:    "example_assistant",
			Content: "Let's talk later when we're less busy about how to do better.",
		},
		{
			Role:    "user",
			Content: "This late pivot means we don't have time to boil the ocean for the client deliverable.",
		},
	}
	tests := []struct {
		name     string
		model    string
		messages []message
		want     int
		wantErr  string
	}{
		{
			name:     "text-davinci-002",
			model:    "text-davinci-002",
			messages: messages,
			want:     128,
		},
		{
			name:     "text-davinci-003",
			model:    "text-davinci-003",
			messages: messages,
			want:     128,
		},
		{
			name:     "gpt-3.5-turbo",
			model:    "gpt-3.5-turbo",
			messages: messages,
			want:     122,
		},
		{
			name:     "gpt-4",
			model:    "gpt-4",
			messages: messages,
			want:     121,
		},
		{
			name:     "gpt-4-32k",
			model:    "gpt-4-32k",
			messages: messages,
			want:     121,
		},
		{
			name:     "non-existent-model",
			model:    "non-existent-model",
			messages: messages,
			wantErr:  "encoding for model non-existent-model: no encoding for model non-existent-model",
		},
		{
			name:     "OpenAI cookbook example - gpt-3.5-turbo-0301",
			model:    "gpt-3.5-turbo-0301",
			messages: exampleMessages,
			want:     127,
		},
		{
			name:     "OpenAI cookbook example - gpt-4",
			model:    "gpt-4",
			messages: exampleMessages,
			want:     129,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got, err := getTokensCount(tt.model, tt.messages)
			if err != nil {
				assert.EqualError(t, err, tt.wantErr)
			} else {
				assert.Nil(t, err)
				assert.Equal(t, tt.want, got)
			}
		})
	}
}