File size: 610 Bytes
41dd156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import pandas as pd
from datasets import load_dataset

def get_samples():
    dataset = load_dataset("mteb/stsbenchmark-sts")
    get_where = lambda score: dataset['validation'].filter(lambda x: x['score'] == score, load_from_cache_file = False)[0]
    test_samples = pd.DataFrame([
        get_where(5),
        get_where(4.5),
        get_where(4),
        get_where(3.5),
        get_where(3),
        get_where(2.5),
        get_where(2),
        get_where(1.5),
        get_where(1),
        get_where(0.5),
        get_where(0),
    ], columns=['sentence1', 'sentence2', 'score'])
    return test_samples