vikigitonga11's picture
Update app.py
6baacdd verified
raw
history blame
1.98 kB
import gradio as gr
import re
import torch
import asyncio
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
# Load T5 paraphrase model
model_name = "Vamsi/T5_Paraphrase_Paws"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)
# Initialize paraphrase pipeline
paraphrase_pipeline = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
truncation=True
)
def split_sentences(text):
"""Split text into sentences using regex (faster than nltk)."""
return re.split(r'(?<=[.!?])\s+', text.strip())
async def paraphrase_text(text):
"""Paraphrases input text while maintaining sentence structure asynchronously."""
if not text.strip():
return "⚠️ Please enter some text to paraphrase."
sentences = split_sentences(text)
# Apply T5 paraphrasing with optimized settings
paraphrased_results = await asyncio.to_thread(paraphrase_pipeline,
[f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
max_length=80,
do_sample=True,
temperature=0.7,
top_p=0.85,
top_k=50,
repetition_penalty=1.2,
num_return_sequences=1,
batch_size=8
)
# Extract and join paraphrased sentences
paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
return " ".join(paraphrased_sentences)
# Define Gradio Interface (Disable queueing)
with gr.Blocks() as demo:
gr.Markdown("# 🚀 Fast & Parallel T5 Paraphraser")
input_box = gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10)
output_box = gr.Textbox(label="Paraphrased Text", lines=10)
button = gr.Button("Paraphrase")
button.click(paraphrase_text, inputs=input_box, outputs=output_box)
if __name__ == "__main__":
demo.launch(share=True, queue=False) # Disable queueing