from transformers import ( AutoTokenizer, AutoModelForSeq2SeqLM, ) class Paraphraser: def __init__(self, model_name='humarin/chatgpt_paraphraser_on_T5_base'): self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name) self.model.eval() def paraphrase(self, text, num_return_sequences=5, num_beams=10, num_beam_groups=5, diversity_penalty=0.8): try: input_text = "paraphrase: " + text + " " encoding = self.tokenizer.encode_plus(input_text, return_tensors="pt") input_ids = encoding["input_ids"] outputs = self.model.generate( input_ids=input_ids, max_length=256, num_beams=num_beams, num_beam_groups=num_beam_groups, num_return_sequences=num_return_sequences, diversity_penalty=diversity_penalty, early_stopping=True ) paraphrases = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs] return paraphrases except Exception as e: print(f"Error in paraphrasing: {e}") return []