c-bone commited on
Commit
b81d54d
·
verified ·
1 Parent(s): ce67f5a

Upload CIFTokenizer files

Browse files
Files changed (3) hide show
  1. id_to_token.json +1 -0
  2. token_to_id.json +1 -0
  3. tokenizer_config.json +1 -0
id_to_token.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"0": "Si", "1": "C", "2": "Pb", "3": "I", "4": "Br", "5": "Cl", "6": "Eu", "7": "O", "8": "Fe", "9": "Sb", "10": "In", "11": "S", "12": "N", "13": "U", "14": "Mn", "15": "Lu", "16": "Se", "17": "Tl", "18": "Hf", "19": "Ir", "20": "Ca", "21": "Ta", "22": "Cr", "23": "K", "24": "Pm", "25": "Mg", "26": "Zn", "27": "Cu", "28": "Sn", "29": "Ti", "30": "B", "31": "W", "32": "P", "33": "H", "34": "Pd", "35": "As", "36": "Co", "37": "Np", "38": "Tc", "39": "Hg", "40": "Pu", "41": "Al", "42": "Tm", "43": "Tb", "44": "Ho", "45": "Nb", "46": "Ge", "47": "Zr", "48": "Cd", "49": "V", "50": "Sr", "51": "Ni", "52": "Rh", "53": "Th", "54": "Na", "55": "Ru", "56": "La", "57": "Re", "58": "Y", "59": "Er", "60": "Ce", "61": "Pt", "62": "Ga", "63": "Li", "64": "Cs", "65": "F", "66": "Ba", "67": "Te", "68": "Mo", "69": "Gd", "70": "Pr", "71": "Bi", "72": "Sc", "73": "Ag", "74": "Rb", "75": "Dy", "76": "Yb", "77": "Nd", "78": "Au", "79": "Os", "80": "Pa", "81": "Sm", "82": "Be", "83": "Ac", "84": "Xe", "85": "Kr", "86": "He", "87": "Ne", "88": "Ar", "89": "0", "90": "1", "91": "2", "92": "3", "93": "4", "94": "5", "95": "6", "96": "7", "97": "8", "98": "9", "99": "_cell_length_b", "100": "_atom_site_occupancy", "101": "_atom_site_attached_hydrogens", "102": "_cell_length_a", "103": "_cell_angle_beta", "104": "_symmetry_equiv_pos_as_xyz", "105": "_cell_angle_gamma", "106": "_atom_site_fract_x", "107": "_symmetry_space_group_name_H-M", "108": "_symmetry_Int_Tables_number", "109": "_chemical_formula_structural", "110": "_chemical_name_systematic", "111": "_atom_site_fract_y", "112": "_atom_site_symmetry_multiplicity", "113": "_chemical_formula_sum", "114": "_atom_site_label", "115": "_atom_site_type_symbol", "116": "_cell_length_c", "117": "_atom_site_B_iso_or_equiv", "118": "_symmetry_equiv_pos_site_id", "119": "_cell_volume", "120": "_atom_site_fract_z", "121": "_cell_angle_alpha", "122": "_cell_formula_units_Z", "123": "loop_", "124": "data_", "125": "_atom_type_symbol", "126": "_atom_type_electronegativity", "127": "_atom_type_radius", "128": "_atom_type_ionic_radius", "129": "_atom_type_oxidation_number", "130": "x", "131": "y", "132": "z", "133": ".", "134": "(", "135": ")", "136": "+", "137": "-", "138": "/", "139": "'", "140": ",", "141": " ", "142": "\n", "143": "P6/mmm", "144": "Imma", "145": "P4_32_12", "146": "P4_2/mnm", "147": "Fd-3m", "148": "P3m1", "149": "P-3", "150": "P4mm", "151": "P4_332", "152": "P4/nnc", "153": "P2_12_12", "154": "Pnn2", "155": "Pbcn", "156": "P4_2/n", "157": "Cm", "158": "R3m", "159": "Cmce", "160": "Aea2", "161": "P-42_1m", "162": "P-42m", "163": "P2_13", "164": "R-3", "165": "Fm-3", "166": "Cmm2", "167": "Pn-3n", "168": "P6/mcc", "169": "P-6m2", "170": "P3_2", "171": "P-3m1", "172": "P3_212", "173": "I23", "174": "P-62m", "175": "P4_2nm", "176": "Pma2", "177": "Pmma", "178": "I-42m", "179": "P-31c", "180": "Pa-3", "181": "Pmmn", "182": "Pmmm", "183": "P4_2/ncm", "184": "I4/mcm", "185": "I-4m2", "186": "P3_1", "187": "Pcc2", "188": "Cmcm", "189": "I222", "190": "Fddd", "191": "P312", "192": "Cccm", "193": "P6_1", "194": "F-43c", "195": "P6_322", "196": "Pm-3", "197": "P3_121", "198": "P6_4", "199": "Ia-3d", "200": "Pm-3m", "201": "P2_1/c", "202": "C222_1", "203": "Pc", "204": "P4/n", "205": "Pba2", "206": "Ama2", "207": "Pbcm", "208": "P31m", "209": "Pcca", "210": "P222", "211": "P-43n", "212": "Pccm", "213": "P6_422", "214": "F23", "215": "P42_12", "216": "C222", "217": "Pnnn", "218": "P6_3cm", "219": "P4_12_12", "220": "P6/m", "221": "Fmm2", "222": "I4_1/a", "223": "P4/mbm", "224": "Pmn2_1", "225": "P4_2bc", "226": "P4_22_12", "227": "I-43d", "228": "I4/m", "229": "P4bm", "230": "Fdd2", "231": "P3", "232": "P6_122", "233": "Pnc2", "234": "P4_2/mcm", "235": "P4_122", "236": "Cmc2_1", "237": "P-6c2", "238": "R32", "239": "P4_1", "240": "P4_232", "241": "Pnna", "242": "P422", "243": "Pban", "244": "Cc", "245": "I4_122", "246": "P6_3/m", "247": "P6_3mc", "248": "I4_1/amd", "249": "P4_2", "250": "P4/nmm", "251": "Pmna", "252": "P4/m", "253": "Fm-3m", "254": "P4/mmm", "255": "Imm2", "256": "P4/ncc", "257": "P-62c", "258": "Ima2", "259": "P6_5", "260": "P2/c", "261": "P4/nbm", "262": "Ibam", "263": "P6_522", "264": "P6_3/mmc", "265": "I4/mmm", "266": "Fmmm", "267": "P2/m", "268": "P-4b2", "269": "I-4", "270": "C2/m", "271": "P4_2/mmc", "272": "P4", "273": "Fd-3c", "274": "P4_3", "275": "P2_1/m", "276": "I-43m", "277": "P-42c", "278": "F4_132", "279": "Pm", "280": "Pccn", "281": "P-4n2", "282": "P4_132", "283": "P23", "284": "I4cm", "285": "R3c", "286": "Amm2", "287": "Immm", "288": "Iba2", "289": "I4", "290": "Fd-3", "291": "P1", "292": "Pbam", "293": "P4_2/nbc", "294": "Im-3", "295": "P4_2/nnm", "296": "Pmc2_1", "297": "P-31m", "298": "R-3m", "299": "Ia-3", "300": "P622", "301": "F222", "302": "P2", "303": "P-1", "304": "Pmm2", "305": "P-4", "306": "Aem2", "307": "P6_222", "308": "P-3c1", "309": "P4_322", "310": "I422", "311": "Pnma", "312": "P6_3", "313": "P3c1", "314": "Pn-3", "315": "P4nc", "316": "P-6", "317": "P4/mcc", "318": "I2_12_12_1", "319": "P4_2/mbc", "320": "P31c", "321": "Ccc2", "322": "P4_2/nmc", "323": "P6_3/mcm", "324": "C2", "325": "Pbca", "326": "P-4c2", "327": "I4_1cd", "328": "P2_1", "329": "P3_112", "330": "P4_2mc", "331": "Pn-3m", "332": "C2/c", "333": "R3", "334": "P-43m", "335": "I432", "336": "P222_1", "337": "I-42d", "338": "I-4c2", "339": "P6cc", "340": "P6_2", "341": "P3_221", "342": "P321", "343": "Pca2_1", "344": "I4_1/acd", "345": "I4_132", "346": "F432", "347": "Pna2_1", "348": "Ccce", "349": "Ibca", "350": "P4/mnc", "351": "I4_1md", "352": "P2_12_12_1", "353": "R-3c", "354": "I2_13", "355": "P-4m2", "356": "Pm-3n", "357": "I4mm", "358": "F-43m", "359": "Pnnm", "360": "P-42_1c", "361": "Cmmm", "362": "P6mm", "363": "P4_2cm", "364": "P4_2/m", "365": "Im-3m", "366": "Fm-3c", "367": "I4_1", "368": "P4cc", "369": "Cmme", "370": "<unk>"}
token_to_id.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Si": 0, "C": 1, "Pb": 2, "I": 3, "Br": 4, "Cl": 5, "Eu": 6, "O": 7, "Fe": 8, "Sb": 9, "In": 10, "S": 11, "N": 12, "U": 13, "Mn": 14, "Lu": 15, "Se": 16, "Tl": 17, "Hf": 18, "Ir": 19, "Ca": 20, "Ta": 21, "Cr": 22, "K": 23, "Pm": 24, "Mg": 25, "Zn": 26, "Cu": 27, "Sn": 28, "Ti": 29, "B": 30, "W": 31, "P": 32, "H": 33, "Pd": 34, "As": 35, "Co": 36, "Np": 37, "Tc": 38, "Hg": 39, "Pu": 40, "Al": 41, "Tm": 42, "Tb": 43, "Ho": 44, "Nb": 45, "Ge": 46, "Zr": 47, "Cd": 48, "V": 49, "Sr": 50, "Ni": 51, "Rh": 52, "Th": 53, "Na": 54, "Ru": 55, "La": 56, "Re": 57, "Y": 58, "Er": 59, "Ce": 60, "Pt": 61, "Ga": 62, "Li": 63, "Cs": 64, "F": 65, "Ba": 66, "Te": 67, "Mo": 68, "Gd": 69, "Pr": 70, "Bi": 71, "Sc": 72, "Ag": 73, "Rb": 74, "Dy": 75, "Yb": 76, "Nd": 77, "Au": 78, "Os": 79, "Pa": 80, "Sm": 81, "Be": 82, "Ac": 83, "Xe": 84, "Kr": 85, "He": 86, "Ne": 87, "Ar": 88, "0": 89, "1": 90, "2": 91, "3": 92, "4": 93, "5": 94, "6": 95, "7": 96, "8": 97, "9": 98, "_cell_length_b": 99, "_atom_site_occupancy": 100, "_atom_site_attached_hydrogens": 101, "_cell_length_a": 102, "_cell_angle_beta": 103, "_symmetry_equiv_pos_as_xyz": 104, "_cell_angle_gamma": 105, "_atom_site_fract_x": 106, "_symmetry_space_group_name_H-M": 107, "_symmetry_Int_Tables_number": 108, "_chemical_formula_structural": 109, "_chemical_name_systematic": 110, "_atom_site_fract_y": 111, "_atom_site_symmetry_multiplicity": 112, "_chemical_formula_sum": 113, "_atom_site_label": 114, "_atom_site_type_symbol": 115, "_cell_length_c": 116, "_atom_site_B_iso_or_equiv": 117, "_symmetry_equiv_pos_site_id": 118, "_cell_volume": 119, "_atom_site_fract_z": 120, "_cell_angle_alpha": 121, "_cell_formula_units_Z": 122, "loop_": 123, "data_": 124, "_atom_type_symbol": 125, "_atom_type_electronegativity": 126, "_atom_type_radius": 127, "_atom_type_ionic_radius": 128, "_atom_type_oxidation_number": 129, "x": 130, "y": 131, "z": 132, ".": 133, "(": 134, ")": 135, "+": 136, "-": 137, "/": 138, "'": 139, ",": 140, " ": 141, "\n": 142, "P6/mmm_sg": 143, "Imma_sg": 144, "P4_32_12_sg": 145, "P4_2/mnm_sg": 146, "Fd-3m_sg": 147, "P3m1_sg": 148, "P-3_sg": 149, "P4mm_sg": 150, "P4_332_sg": 151, "P4/nnc_sg": 152, "P2_12_12_sg": 153, "Pnn2_sg": 154, "Pbcn_sg": 155, "P4_2/n_sg": 156, "Cm_sg": 157, "R3m_sg": 158, "Cmce_sg": 159, "Aea2_sg": 160, "P-42_1m_sg": 161, "P-42m_sg": 162, "P2_13_sg": 163, "R-3_sg": 164, "Fm-3_sg": 165, "Cmm2_sg": 166, "Pn-3n_sg": 167, "P6/mcc_sg": 168, "P-6m2_sg": 169, "P3_2_sg": 170, "P-3m1_sg": 171, "P3_212_sg": 172, "I23_sg": 173, "P-62m_sg": 174, "P4_2nm_sg": 175, "Pma2_sg": 176, "Pmma_sg": 177, "I-42m_sg": 178, "P-31c_sg": 179, "Pa-3_sg": 180, "Pmmn_sg": 181, "Pmmm_sg": 182, "P4_2/ncm_sg": 183, "I4/mcm_sg": 184, "I-4m2_sg": 185, "P3_1_sg": 186, "Pcc2_sg": 187, "Cmcm_sg": 188, "I222_sg": 189, "Fddd_sg": 190, "P312_sg": 191, "Cccm_sg": 192, "P6_1_sg": 193, "F-43c_sg": 194, "P6_322_sg": 195, "Pm-3_sg": 196, "P3_121_sg": 197, "P6_4_sg": 198, "Ia-3d_sg": 199, "Pm-3m_sg": 200, "P2_1/c_sg": 201, "C222_1_sg": 202, "Pc_sg": 203, "P4/n_sg": 204, "Pba2_sg": 205, "Ama2_sg": 206, "Pbcm_sg": 207, "P31m_sg": 208, "Pcca_sg": 209, "P222_sg": 210, "P-43n_sg": 211, "Pccm_sg": 212, "P6_422_sg": 213, "F23_sg": 214, "P42_12_sg": 215, "C222_sg": 216, "Pnnn_sg": 217, "P6_3cm_sg": 218, "P4_12_12_sg": 219, "P6/m_sg": 220, "Fmm2_sg": 221, "I4_1/a_sg": 222, "P4/mbm_sg": 223, "Pmn2_1_sg": 224, "P4_2bc_sg": 225, "P4_22_12_sg": 226, "I-43d_sg": 227, "I4/m_sg": 228, "P4bm_sg": 229, "Fdd2_sg": 230, "P3_sg": 231, "P6_122_sg": 232, "Pnc2_sg": 233, "P4_2/mcm_sg": 234, "P4_122_sg": 235, "Cmc2_1_sg": 236, "P-6c2_sg": 237, "R32_sg": 238, "P4_1_sg": 239, "P4_232_sg": 240, "Pnna_sg": 241, "P422_sg": 242, "Pban_sg": 243, "Cc_sg": 244, "I4_122_sg": 245, "P6_3/m_sg": 246, "P6_3mc_sg": 247, "I4_1/amd_sg": 248, "P4_2_sg": 249, "P4/nmm_sg": 250, "Pmna_sg": 251, "P4/m_sg": 252, "Fm-3m_sg": 253, "P4/mmm_sg": 254, "Imm2_sg": 255, "P4/ncc_sg": 256, "P-62c_sg": 257, "Ima2_sg": 258, "P6_5_sg": 259, "P2/c_sg": 260, "P4/nbm_sg": 261, "Ibam_sg": 262, "P6_522_sg": 263, "P6_3/mmc_sg": 264, "I4/mmm_sg": 265, "Fmmm_sg": 266, "P2/m_sg": 267, "P-4b2_sg": 268, "I-4_sg": 269, "C2/m_sg": 270, "P4_2/mmc_sg": 271, "P4_sg": 272, "Fd-3c_sg": 273, "P4_3_sg": 274, "P2_1/m_sg": 275, "I-43m_sg": 276, "P-42c_sg": 277, "F4_132_sg": 278, "Pm_sg": 279, "Pccn_sg": 280, "P-4n2_sg": 281, "P4_132_sg": 282, "P23_sg": 283, "I4cm_sg": 284, "R3c_sg": 285, "Amm2_sg": 286, "Immm_sg": 287, "Iba2_sg": 288, "I4_sg": 289, "Fd-3_sg": 290, "P1_sg": 291, "Pbam_sg": 292, "P4_2/nbc_sg": 293, "Im-3_sg": 294, "P4_2/nnm_sg": 295, "Pmc2_1_sg": 296, "P-31m_sg": 297, "R-3m_sg": 298, "Ia-3_sg": 299, "P622_sg": 300, "F222_sg": 301, "P2_sg": 302, "P-1_sg": 303, "Pmm2_sg": 304, "P-4_sg": 305, "Aem2_sg": 306, "P6_222_sg": 307, "P-3c1_sg": 308, "P4_322_sg": 309, "I422_sg": 310, "Pnma_sg": 311, "P6_3_sg": 312, "P3c1_sg": 313, "Pn-3_sg": 314, "P4nc_sg": 315, "P-6_sg": 316, "P4/mcc_sg": 317, "I2_12_12_1_sg": 318, "P4_2/mbc_sg": 319, "P31c_sg": 320, "Ccc2_sg": 321, "P4_2/nmc_sg": 322, "P6_3/mcm_sg": 323, "C2_sg": 324, "Pbca_sg": 325, "P-4c2_sg": 326, "I4_1cd_sg": 327, "P2_1_sg": 328, "P3_112_sg": 329, "P4_2mc_sg": 330, "Pn-3m_sg": 331, "C2/c_sg": 332, "R3_sg": 333, "P-43m_sg": 334, "I432_sg": 335, "P222_1_sg": 336, "I-42d_sg": 337, "I-4c2_sg": 338, "P6cc_sg": 339, "P6_2_sg": 340, "P3_221_sg": 341, "P321_sg": 342, "Pca2_1_sg": 343, "I4_1/acd_sg": 344, "I4_132_sg": 345, "F432_sg": 346, "Pna2_1_sg": 347, "Ccce_sg": 348, "Ibca_sg": 349, "P4/mnc_sg": 350, "I4_1md_sg": 351, "P2_12_12_1_sg": 352, "R-3c_sg": 353, "I2_13_sg": 354, "P-4m2_sg": 355, "Pm-3n_sg": 356, "I4mm_sg": 357, "F-43m_sg": 358, "Pnnm_sg": 359, "P-42_1c_sg": 360, "Cmmm_sg": 361, "P6mm_sg": 362, "P4_2cm_sg": 363, "P4_2/m_sg": 364, "Im-3m_sg": 365, "Fm-3c_sg": 366, "I4_1_sg": 367, "P4cc_sg": 368, "Cmme_sg": 369, "<unk>": 370}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name": "CIFTokenizer", "unk_token": "<unk>"}