diff --git "a/data/Austro-Asiatic.json" "b/data/Austro-Asiatic.json" --- "a/data/Austro-Asiatic.json" +++ "b/data/Austro-Asiatic.json" @@ -2,3108 +2,4133 @@ "name": "Austro-Asiatic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Mon-Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jah Hut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jah Hut", "iso_1_code": null, "iso_3_code": "jah", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1167", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1166", + "scripts": [], + "own_tokenizer": false }, { "name": "North Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chewong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cheq Wong", "iso_1_code": null, "iso_3_code": "cwg", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1170", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1169", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Batek", "iso_1_code": null, "iso_3_code": "btq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1172", + "scripts": [], + "own_tokenizer": false }, { "name": "Jehai", "iso_1_code": null, "iso_3_code": "jhi", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1173", + "scripts": [], + "own_tokenizer": false }, { "name": "Minriq", "iso_1_code": null, "iso_3_code": "mnq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1174", + "scripts": [], + "own_tokenizer": false }, { "name": "Mintil", "iso_1_code": null, "iso_3_code": "mzt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1175", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1171", + "scripts": [], + "own_tokenizer": false }, { "name": "Tonga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ten\u2019edn", "iso_1_code": null, "iso_3_code": "tnz", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1177", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1176", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kintaq", "iso_1_code": null, "iso_3_code": "knq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1179", + "scripts": [], + "own_tokenizer": false }, { "name": "Kensiu", "iso_1_code": null, "iso_3_code": "kns", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1180", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1178", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1168", + "scripts": [], + "own_tokenizer": false }, { "name": "Senoic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lanoh", "iso_1_code": null, "iso_3_code": "lnh", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1182", + "scripts": [], + "own_tokenizer": false }, { "name": "Sab\u00fcm", "iso_1_code": null, "iso_3_code": "sbo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1183", + "scripts": [], + "own_tokenizer": false }, { "name": "Semai", "iso_1_code": null, "iso_3_code": "sea", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1184", + "scripts": [], + "own_tokenizer": false }, { "name": "Semnam", "iso_1_code": null, "iso_3_code": "ssm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1185", + "scripts": [], + "own_tokenizer": false }, { "name": "Temiar", "iso_1_code": null, "iso_3_code": "tea", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1186", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1181", + "scripts": [], + "own_tokenizer": false }, { "name": "South Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mah Meri", "iso_1_code": null, "iso_3_code": "mhe", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1188", + "scripts": [], + "own_tokenizer": false }, { "name": "Semelai", "iso_1_code": null, "iso_3_code": "sza", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1189", + "scripts": [], + "own_tokenizer": false }, { "name": "Semaq Beri", "iso_1_code": null, "iso_3_code": "szc", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1190", + "scripts": [], + "own_tokenizer": false }, { "name": "Temoq", "iso_1_code": null, "iso_3_code": "tmo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1191", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1187", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1165", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Mon-Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Alak", "iso_1_code": null, "iso_3_code": "alk", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1195", + "scripts": [], + "own_tokenizer": false }, { "name": "Bahnar", "iso_1_code": null, "iso_3_code": "bdq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1196", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Romam", "iso_1_code": null, "iso_3_code": "rmx", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1197", + "scripts": [], + "own_tokenizer": false }, { "name": "Tampuan", "iso_1_code": null, "iso_3_code": "tpu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1198", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1194", + "scripts": [], + "own_tokenizer": false }, { "name": "East Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cua", "iso_1_code": null, "iso_3_code": "cua", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1200", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1199", + "scripts": [], + "own_tokenizer": false }, { "name": "North Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Katua", "iso_1_code": null, "iso_3_code": "kta", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1202", + "scripts": [], + "own_tokenizer": false }, { "name": "Kachok", "iso_1_code": null, "iso_3_code": "xkk", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1203", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kayong", "iso_1_code": null, "iso_3_code": "kxy", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1205", + "scripts": [], + "own_tokenizer": false }, { "name": "Takua", "iso_1_code": null, "iso_3_code": "tkz", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1206", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1204", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Trieng", "iso_1_code": null, "iso_3_code": "stg", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1208", + "scripts": [], + "own_tokenizer": false }, { "name": "Talieng", "iso_1_code": null, "iso_3_code": "tdf", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1209", + "scripts": [], + "own_tokenizer": false }, { "name": "Duan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Halang Doan", "iso_1_code": null, "iso_3_code": "hld", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1211", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1210", + "scripts": [], + "own_tokenizer": false }, { "name": "Jeh-Halang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Halang", "iso_1_code": null, "iso_3_code": "hal", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1213", + "scripts": [], + "own_tokenizer": false }, { "name": "Jeh", "iso_1_code": null, "iso_3_code": "jeh", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1214", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1212", + "scripts": [], + "own_tokenizer": false }, { "name": "Rengao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Rengao", "iso_1_code": null, "iso_3_code": "ren", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1216", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1215", + "scripts": [], + "own_tokenizer": false }, { "name": "Sedang-Todrah", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sedang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hre", "iso_1_code": null, "iso_3_code": "hre", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1219", + "scripts": [], + "own_tokenizer": false }, { "name": "Sedang", "iso_1_code": null, "iso_3_code": "sed", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1220", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1218", + "scripts": [], + "own_tokenizer": false }, { "name": "Todrah-Monom", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Monom", "iso_1_code": null, "iso_3_code": "moo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1222", + "scripts": [], + "own_tokenizer": false }, { "name": "Todrah", "iso_1_code": null, "iso_3_code": "tdr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1223", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1221", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1217", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1207", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1201", + "scripts": [], + "own_tokenizer": false }, { "name": "South Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Stieng, Budeh", "iso_1_code": null, "iso_3_code": "stt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1225", + "scripts": [], + "own_tokenizer": false }, { "name": "Sre-Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mnong, Eastern", "iso_1_code": null, "iso_3_code": "mng", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1229", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1228", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern-Central Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mnong, Central", "iso_1_code": null, "iso_3_code": "cmo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1231", + "scripts": [ + "Latn", + "Khmr" + ], + "own_tokenizer": false }, { "name": "Mnong, Southern", "iso_1_code": null, "iso_3_code": "mnn", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1232", + "scripts": [], + "own_tokenizer": false }, { "name": "Kraol", "iso_1_code": null, "iso_3_code": "rka", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1233", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1230", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1227", + "scripts": [], + "own_tokenizer": false }, { "name": "Sre", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Maa", "iso_1_code": null, "iso_3_code": "cma", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1235", + "scripts": [], + "own_tokenizer": false }, { "name": "Koho", "iso_1_code": null, "iso_3_code": "kpm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1236", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1234", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1226", + "scripts": [], + "own_tokenizer": false }, { "name": "Stieng-Chrau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chrau", "iso_1_code": null, "iso_3_code": "crw", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1238", + "scripts": [], + "own_tokenizer": false }, { "name": "Mel-Khaonh", "iso_1_code": null, "iso_3_code": "hkn", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1239", + "scripts": [], + "own_tokenizer": false }, { "name": "Stieng, Bulo", "iso_1_code": null, "iso_3_code": "sti", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1240", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1237", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1224", + "scripts": [], + "own_tokenizer": false }, { "name": "West Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lavi", "iso_1_code": null, "iso_3_code": "lvi", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1242", + "scripts": [], + "own_tokenizer": false }, { "name": "Brao-Kravet", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Brao", "iso_1_code": null, "iso_3_code": "brb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1244", + "scripts": [], + "own_tokenizer": false }, { "name": "Krung", "iso_1_code": null, "iso_3_code": "krr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1245", + "scripts": [], + "own_tokenizer": false }, { "name": "Kavet", "iso_1_code": null, "iso_3_code": "krv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1246", + "scripts": [], + "own_tokenizer": false }, { "name": "Sou", "iso_1_code": null, "iso_3_code": "sqq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1247", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1243", + "scripts": [], + "own_tokenizer": false }, { "name": "Laven", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Laven", "iso_1_code": null, "iso_3_code": "lbo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1249", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1248", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyaheun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyaheun", "iso_1_code": null, "iso_3_code": "nev", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1251", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1250", + "scripts": [], + "own_tokenizer": false }, { "name": "Oi-The", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Oy", "iso_1_code": null, "iso_3_code": "oyb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1253", + "scripts": [], + "own_tokenizer": false }, { "name": "Sapuan", "iso_1_code": null, "iso_3_code": "spu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1254", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1252", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1241", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1193", + "scripts": [], + "own_tokenizer": false }, { "name": "Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ta\u2019oih", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ir", "iso_1_code": null, "iso_3_code": "irr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1258", + "scripts": [], + "own_tokenizer": false }, { "name": "Ong", "iso_1_code": null, "iso_3_code": "oog", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1259", + "scripts": [], + "own_tokenizer": false }, { "name": "Ta\u2019oih, Upper", "iso_1_code": null, "iso_3_code": "tth", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1260", + "scripts": [], + "own_tokenizer": false }, { "name": "Ta\u2019oih, Lower", "iso_1_code": null, "iso_3_code": "tto", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1261", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1257", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1256", + "scripts": [], + "own_tokenizer": false }, { "name": "East Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Katu-Pacoh", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Katu, Eastern", "iso_1_code": null, "iso_3_code": "ktv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1264", + "scripts": [], + "own_tokenizer": false }, { "name": "Katu, Western", "iso_1_code": null, "iso_3_code": "kuf", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1265", + "scripts": [], + "own_tokenizer": false }, { "name": "Pacoh", "iso_1_code": null, "iso_3_code": "pac", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1266", + "scripts": [], + "own_tokenizer": false }, { "name": "Phuong", "iso_1_code": null, "iso_3_code": "phg", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1267", + "scripts": [], + "own_tokenizer": false }, { "name": "Tareng", "iso_1_code": null, "iso_3_code": "tgr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1268", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1263", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngeq-Nkriang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kriang", "iso_1_code": null, "iso_3_code": "ngt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1270", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1269", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1262", + "scripts": [], + "own_tokenizer": false }, { "name": "West Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bru, Eastern", "iso_1_code": null, "iso_3_code": "bru", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1273", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bru, Western", "iso_1_code": null, "iso_3_code": "brv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1274", + "scripts": [], + "own_tokenizer": false }, { "name": "Katang, Northern", "iso_1_code": null, "iso_3_code": "ncq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1275", + "scripts": [ + "Laoo" + ], + "own_tokenizer": false }, { "name": "Katang, Southern", "iso_1_code": null, "iso_3_code": "sct", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1276", + "scripts": [], + "own_tokenizer": false }, { "name": "So", "iso_1_code": null, "iso_3_code": "sss", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1277", + "scripts": [], + "own_tokenizer": false }, { "name": "Khua", "iso_1_code": null, "iso_3_code": "xhv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1278", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1272", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kuay", "iso_1_code": null, "iso_3_code": "kdt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1280", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyeu", "iso_1_code": null, "iso_3_code": "nyl", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1281", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1279", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1271", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1255", + "scripts": [], + "own_tokenizer": false }, { "name": "Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khmer", "iso_1_code": "km", "iso_3_code": "khm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1283", + "scripts": [ + "Khmr" + ], + "own_tokenizer": false }, { "name": "Khmer, Northern", "iso_1_code": null, "iso_3_code": "kxm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1284", + "scripts": [ + "Thai" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1282", + "scripts": [], + "own_tokenizer": false }, { "name": "Pearic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pear", "iso_1_code": null, "iso_3_code": "pcb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1287", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1286", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chong", "iso_1_code": null, "iso_3_code": "cog", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1290", + "scripts": [], + "own_tokenizer": false }, { "name": "Chung", "iso_1_code": null, "iso_3_code": "scq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1291", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1289", + "scripts": [], + "own_tokenizer": false }, { "name": "Samre", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Somray", "iso_1_code": null, "iso_3_code": "smu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1293", + "scripts": [], + "own_tokenizer": false }, { "name": "Samre", "iso_1_code": null, "iso_3_code": "sxm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1294", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1292", + "scripts": [], + "own_tokenizer": false }, { "name": "Suoy", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Su\u2019ung", "iso_1_code": null, "iso_3_code": "syo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1296", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1295", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1288", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1285", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1192", + "scripts": [], + "own_tokenizer": false }, { "name": "Monic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mon", "iso_1_code": null, "iso_3_code": "mnw", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1298", + "scripts": [ + "Mymr" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1297", + "scripts": [], + "own_tokenizer": false }, { "name": "Nicobar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Car", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nicobarese, Car", "iso_1_code": null, "iso_3_code": "caq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1301", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1300", + "scripts": [], + "own_tokenizer": false }, { "name": "Chowra-Teressa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chaura", "iso_1_code": null, "iso_3_code": "crv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1303", + "scripts": [], + "own_tokenizer": false }, { "name": "Teressa", "iso_1_code": null, "iso_3_code": "tef", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1304", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1302", + "scripts": [], + "own_tokenizer": false }, { "name": "Great Nicobar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nicobarese, Southern", "iso_1_code": null, "iso_3_code": "nik", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1306", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1305", + "scripts": [], + "own_tokenizer": false }, { "name": "Nancowry", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nicobarese, Central", "iso_1_code": null, "iso_3_code": "ncb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1308", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1307", + "scripts": [], + "own_tokenizer": false }, { "name": "Shom Peng", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Shom Peng", "iso_1_code": null, "iso_3_code": "sii", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1310", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1309", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1299", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Mon-Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khasian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "War-Jaintia", "iso_1_code": null, "iso_3_code": "aml", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1313", + "scripts": [], + "own_tokenizer": false }, { "name": "Khasi", "iso_1_code": null, "iso_3_code": "kha", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1314", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lyngngam", "iso_1_code": null, "iso_3_code": "lyg", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1315", + "scripts": [], + "own_tokenizer": false }, { "name": "Pnar", "iso_1_code": null, "iso_3_code": "pbv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1316", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1312", + "scripts": [], + "own_tokenizer": false }, { "name": "Khmuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khao", "iso_1_code": null, "iso_3_code": "xao", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1319", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1318", + "scripts": [], + "own_tokenizer": false }, { "name": "Mal-Khmu\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khmu\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khuen", "iso_1_code": null, "iso_3_code": "khf", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1322", + "scripts": [], + "own_tokenizer": false }, { "name": "Khmu", "iso_1_code": null, "iso_3_code": "kjg", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1323", + "scripts": [], + "own_tokenizer": false }, { "name": "O\u2019du", "iso_1_code": null, "iso_3_code": "tyh", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1324", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1321", + "scripts": [], + "own_tokenizer": false }, { "name": "Mal-Prai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mal", "iso_1_code": null, "iso_3_code": "mlf", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1326", + "scripts": [], + "own_tokenizer": false }, { "name": "Prai", "iso_1_code": null, "iso_3_code": "prt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1327", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1325", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1320", + "scripts": [], + "own_tokenizer": false }, { "name": "Mlabri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mlabri", "iso_1_code": null, "iso_3_code": "mra", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1329", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1328", + "scripts": [], + "own_tokenizer": false }, { "name": "Xinh Mul", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Phong-Kniang", "iso_1_code": null, "iso_3_code": "pnx", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1331", + "scripts": [], + "own_tokenizer": false }, { "name": "Puoc", "iso_1_code": null, "iso_3_code": "puo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1332", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1330", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1317", + "scripts": [], + "own_tokenizer": false }, { "name": "Mang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mang", "iso_1_code": null, "iso_3_code": "zng", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1334", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1333", + "scripts": [], + "own_tokenizer": false }, { "name": "Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Angkuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hu", "iso_1_code": null, "iso_3_code": "huo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1338", + "scripts": [], + "own_tokenizer": false }, { "name": "Kon Keu", "iso_1_code": null, "iso_3_code": "kkn", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1339", + "scripts": [], + "own_tokenizer": false }, { "name": "Man Met", "iso_1_code": null, "iso_3_code": "mml", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1340", + "scripts": [], + "own_tokenizer": false }, { "name": "Mok", "iso_1_code": null, "iso_3_code": "mqt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1341", + "scripts": [], + "own_tokenizer": false }, { "name": "Samtao", "iso_1_code": null, "iso_3_code": "stu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1342", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai Loi", "iso_1_code": null, "iso_3_code": "tlq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1343", + "scripts": [], + "own_tokenizer": false }, { "name": "Muak Sa-aak", "iso_1_code": null, "iso_3_code": "ukk", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1344", + "scripts": [], + "own_tokenizer": false }, { "name": "U", "iso_1_code": null, "iso_3_code": "uuu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1345", + "scripts": [], + "own_tokenizer": false }, { "name": "Kiorr", "iso_1_code": null, "iso_3_code": "xko", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1346", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1337", + "scripts": [], + "own_tokenizer": false }, { "name": "Bit-Khang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bit", "iso_1_code": null, "iso_3_code": "bgk", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1348", + "scripts": [], + "own_tokenizer": false }, { "name": "Bumang", "iso_1_code": null, "iso_3_code": "bvp", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1349", + "scripts": [], + "own_tokenizer": false }, { "name": "Kh\u00e1ng", "iso_1_code": null, "iso_3_code": "kjm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1350", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1347", + "scripts": [], + "own_tokenizer": false }, { "name": "Lametic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Con", "iso_1_code": null, "iso_3_code": "cno", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1352", + "scripts": [], + "own_tokenizer": false }, { "name": "Rmeet", "iso_1_code": null, "iso_3_code": "lbn", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1353", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1351", + "scripts": [], + "own_tokenizer": false }, { "name": "Waic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bulang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Blang", "iso_1_code": null, "iso_3_code": "blr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1356", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1355", + "scripts": [], + "own_tokenizer": false }, { "name": "Lawa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lawa, Western", "iso_1_code": null, "iso_3_code": "lcp", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1358", + "scripts": [ + "Thai" + ], + "own_tokenizer": false }, { "name": "Lawa, Eastern", "iso_1_code": null, "iso_3_code": "lwl", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1359", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1357", + "scripts": [], + "own_tokenizer": false }, { "name": "Wa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Wa, Parauk", "iso_1_code": null, "iso_3_code": "prk", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1361", + "scripts": [], + "own_tokenizer": false }, { "name": "Awa", "iso_1_code": null, "iso_3_code": "vwa", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1362", + "scripts": [], + "own_tokenizer": false }, { "name": "Wa, Vo", "iso_1_code": null, "iso_3_code": "wbm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1363", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1360", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1354", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1336", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Danau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Danau", "iso_1_code": null, "iso_3_code": "dnu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1366", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1365", + "scripts": [], + "own_tokenizer": false }, { "name": "Palaung", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Palaung, Ruching", "iso_1_code": null, "iso_3_code": "pce", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1368", + "scripts": [], + "own_tokenizer": false }, { "name": "Palaung, Shwe", "iso_1_code": null, "iso_3_code": "pll", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1369", + "scripts": [], + "own_tokenizer": false }, { "name": "Palaung, Rumai", "iso_1_code": null, "iso_3_code": "rbb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1370", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1367", + "scripts": [], + "own_tokenizer": false }, { "name": "Riang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Riang Lang", "iso_1_code": null, "iso_3_code": "ril", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1372", + "scripts": [], + "own_tokenizer": false }, { "name": "Riang Lai", "iso_1_code": null, "iso_3_code": "yin", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1373", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1371", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1364", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1335", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1311", + "scripts": [], + "own_tokenizer": false }, { "name": "Palyu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bugan", "iso_1_code": null, "iso_3_code": "bbh", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1375", + "scripts": [], + "own_tokenizer": false }, { "name": "Bolyu", "iso_1_code": null, "iso_3_code": "ply", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1376", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1374", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Monic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyahkur", "iso_1_code": null, "iso_3_code": "cbn", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1378", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1377", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kemiehua", "iso_1_code": null, "iso_3_code": "kfj", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1380", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuanhua", "iso_1_code": null, "iso_3_code": "xnh", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1381", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1379", + "scripts": [], + "own_tokenizer": false }, { "name": "Viet-Muong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Chut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Arem", "iso_1_code": null, "iso_3_code": "aem", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1384", + "scripts": [], + "own_tokenizer": false }, { "name": "Maleng", "iso_1_code": null, "iso_3_code": "pkt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1385", + "scripts": [], + "own_tokenizer": false }, { "name": "Chut", "iso_1_code": null, "iso_3_code": "scb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1386", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1383", + "scripts": [], + "own_tokenizer": false }, { "name": "Cuoi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hung", "iso_1_code": null, "iso_3_code": "hnu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1388", + "scripts": [], + "own_tokenizer": false }, { "name": "Tho", "iso_1_code": null, "iso_3_code": "tou", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1389", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1387", + "scripts": [], + "own_tokenizer": false }, { "name": "Muong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bo", "iso_1_code": null, "iso_3_code": "bgl", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1391", + "scripts": [], + "own_tokenizer": false }, { "name": "Muong", "iso_1_code": null, "iso_3_code": "mtq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1392", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngu\u00f4n", "iso_1_code": null, "iso_3_code": "nuo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1393", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1390", + "scripts": [], + "own_tokenizer": false }, { "name": "Thavung", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aheu", "iso_1_code": null, "iso_3_code": "thm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1395", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1394", + "scripts": [], + "own_tokenizer": false }, { "name": "Vietnamese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Vietnamese", "iso_1_code": "vi", "iso_3_code": "vie", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "1397", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "1396", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1382", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1164", + "scripts": [], + "own_tokenizer": false }, { "name": "Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "North Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kherwari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Agariya", "iso_1_code": null, "iso_3_code": "agi", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1401", + "scripts": [], + "own_tokenizer": false }, { "name": "Bijori", "iso_1_code": null, "iso_3_code": "bix", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1402", + "scripts": [], + "own_tokenizer": false }, { "name": "Kodaku", "iso_1_code": null, "iso_3_code": "ksz", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1403", + "scripts": [], + "own_tokenizer": false }, { "name": "Mundari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Asuri", "iso_1_code": null, "iso_3_code": "asr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1405", + "scripts": [], + "own_tokenizer": false }, { "name": "Birhor", "iso_1_code": null, "iso_3_code": "biy", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1406", + "scripts": [], + "own_tokenizer": false }, { "name": "Koda", "iso_1_code": null, "iso_3_code": "cdz", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1407", + "scripts": [], + "own_tokenizer": false }, { "name": "Kol", "iso_1_code": null, "iso_3_code": "ekl", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1408", + "scripts": [], + "own_tokenizer": false }, { "name": "Ho", "iso_1_code": null, "iso_3_code": "hoc", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1409", + "scripts": [ + "Latn", + "Wara" + ], + "own_tokenizer": false }, { "name": "Korwa", "iso_1_code": null, "iso_3_code": "kfp", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1410", + "scripts": [], + "own_tokenizer": false }, { "name": "Mundari", "iso_1_code": null, "iso_3_code": "unr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1411", + "scripts": [], + "own_tokenizer": false }, { "name": "Munda", "iso_1_code": null, "iso_3_code": "unx", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1412", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1404", + "scripts": [], + "own_tokenizer": false }, { "name": "Santali", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mahali", "iso_1_code": null, "iso_3_code": "mjx", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1414", + "scripts": [], + "own_tokenizer": false }, { "name": "Santhali", "iso_1_code": null, "iso_3_code": "sat", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1415", + "scripts": [ + "Latn", + "Olck" + ], + "own_tokenizer": false }, { "name": "Turi", "iso_1_code": null, "iso_3_code": "trd", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1416", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1413", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1400", + "scripts": [], + "own_tokenizer": false }, { "name": "Korku", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Korku", "iso_1_code": null, "iso_3_code": "kfq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1418", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1417", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1399", + "scripts": [], + "own_tokenizer": false }, { "name": "South Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kharia-Juang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Juang", "iso_1_code": null, "iso_3_code": "jun", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1421", + "scripts": [ + "Orya" + ], + "own_tokenizer": false }, { "name": "Kharia", "iso_1_code": null, "iso_3_code": "khr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1422", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1420", + "scripts": [], + "own_tokenizer": false }, { "name": "Koraput Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gutob-Remo-Geta\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Geta\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gata\u2019", "iso_1_code": null, "iso_3_code": "gaq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1426", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1425", + "scripts": [], + "own_tokenizer": false }, { "name": "Gutob-Remo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bondo", "iso_1_code": null, "iso_3_code": "bfw", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1428", + "scripts": [], + "own_tokenizer": false }, { "name": "Gadaba, Bodo", "iso_1_code": null, "iso_3_code": "gbj", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1429", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1427", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1424", + "scripts": [], + "own_tokenizer": false }, { "name": "Sora-Juray-Gorum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gorum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Parenga", "iso_1_code": null, "iso_3_code": "pcj", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1432", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1431", + "scripts": [], + "own_tokenizer": false }, { "name": "Sora-Juray", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Juray", "iso_1_code": null, "iso_3_code": "juy", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1434", + "scripts": [], + "own_tokenizer": false }, { "name": "Sora", "iso_1_code": null, "iso_3_code": "srb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1435", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1433", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1430", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1423", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1419", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1398", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1163", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file