File size: 1,024 Bytes
ec77f50
00a6def
 
 
 
 
 
 
26f466f
 
00a6def
 
 
0c62ce7
00a6def
 
0c62ce7
 
 
 
 
 
 
 
 
00a6def
0c62ce7
 
26f466f
00a6def
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
colors = {
        'GeneOrGeneProduct': '#aad4aa',  # Pastel green
        'DiseaseOrPhenotypicFeature': '#f8b400',  # Pastel orange
        'ChemicalEntity': '#a4c2f4',  # Pastel blue
        'OrganismTaxon': '#ffb6c1',  # Pastel pink
        'SequenceVariant': '#e2b0ff',  # Pastel purple
        'CellLine': '#ffcc99'  # Pastel peach
    }

def annotate_sentence(sentence, predictions):
    output = []
    i = 0
    for p in predictions:
        # Add initial tokens
        if sentence[i:p['start']] != '':
            output.append(sentence[i:p['start']])

        # Add prediction tokens
        if sentence[p['start']:p['end']] != '':
            output.append(
                (sentence[p['start']:p['end']], p['entity_group'], colors[p['entity_group']])
                if p['entity_group'] != 'null'
                else sentence[p['start']:p['end']]
                )

        i = p['end']
    
    # Add any trailing tokens
    if sentence[i:] != '':
        output.append(sentence[p['end']:])
    return output