|
import gradio as gr |
|
import re |
|
import torch |
|
import asyncio |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
|
|
|
|
model_name = "Vamsi/T5_Paraphrase_Paws" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) |
|
|
|
|
|
|
|
paraphrase_pipeline = pipeline( |
|
"text2text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
truncation=True |
|
) |
|
|
|
def split_sentences(text): |
|
"""Split text into sentences using regex (faster than nltk).""" |
|
return re.split(r'(?<=[.!?])\s+', text.strip()) |
|
|
|
async def paraphrase_text(text): |
|
"""Paraphrases input text while maintaining sentence structure asynchronously.""" |
|
if not text.strip(): |
|
return "⚠️ Please enter some text to paraphrase." |
|
|
|
sentences = split_sentences(text) |
|
|
|
|
|
paraphrased_results = await asyncio.to_thread(paraphrase_pipeline, |
|
[f"paraphrase: {sentence} </s>" for sentence in sentences if sentence], |
|
max_length=80, |
|
do_sample=True, |
|
temperature=0.7, |
|
top_p=0.85, |
|
top_k=50, |
|
repetition_penalty=1.2, |
|
num_return_sequences=1, |
|
batch_size=8 |
|
) |
|
|
|
|
|
paraphrased_sentences = [result['generated_text'] for result in paraphrased_results] |
|
return " ".join(paraphrased_sentences) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# 🚀 Fast & Parallel T5 Paraphraser") |
|
input_box = gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10) |
|
output_box = gr.Textbox(label="Paraphrased Text", lines=10) |
|
button = gr.Button("Paraphrase") |
|
|
|
button.click(paraphrase_text, inputs=input_box, outputs=output_box) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True, queue=False) |
|
|