import pandas as pd from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t33_650M_UR50D") for i in range(6, 50): df = pd.read_csv(f'/home/tc415/discrete-diffusion-guidance/samples/step_128/{i}.csv') seqs = df['sequence'].tolist() outliers = [] for seq in seqs: tok = tokenizer(seq)['input_ids'] if len(tok) - 2 != i: outliers.append(seq) print(f"{i}.csv: {outliers}")