usta-llm-demo / v2 /tokenizer.json
alibayram's picture
v2 implemented
6563ff2
raw
history blame contribute delete
943 Bytes
{
"the": 0,
"capital": 1,
"of": 2,
"united": 3,
"state": 4,
"is": 5,
"not": 6,
"london": 7,
"france": 8,
"paris": 9,
"and": 10,
"berlin": 11,
"germany": 12,
"rome": 13,
"in": 14,
"italy": 15,
"madrid": 16,
"spain": 17,
"lisbon": 18,
"portugal": 19,
"kingdom": 20,
"washington": 21,
"although": 22,
"these": 23,
"place": 24,
"are": 25,
"often": 26,
"mention": 27,
"together": 28,
"each": 29,
"country": 30,
"has": 31,
"its": 32,
"own": 33,
"identity": 34,
"any": 35,
"european": 36,
"city": 37,
"remain": 38,
"important": 39,
"with": 40,
"a": 41,
"rich": 42,
"history": 43,
"culture": 44,
"europe": 45,
"made": 46,
"many": 47,
"unique": 48,
"world": 49,
"while": 50,
"known": 51,
"for": 52,
"art": 53,
"fashion": 54,
"famous": 55,
"they": 56,
"ed": 57,
"s": 58,
".": 59,
",": 60,
" ": 61,
"<unk>": 62,
"<pad>": 63
}