t5-paraphrase-generation

Sleeping

App Files Files Community

t5-paraphrase-generation / app.py

vikigitonga11

Update app.py

6baacdd verified 6 months ago

raw

history blame

1.98 kB

	import gradio as gr
	import re
	import torch
	import asyncio
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

	# Load T5 paraphrase model
	model_name = "Vamsi/T5_Paraphrase_Paws"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)


	# Initialize paraphrase pipeline
	paraphrase_pipeline = pipeline(
	"text2text-generation",
	model=model,
	tokenizer=tokenizer,
	truncation=True
	)

	def split_sentences(text):
	"""Split text into sentences using regex (faster than nltk)."""
	return re.split(r'(?<=[.!?])\s+', text.strip())

	async def paraphrase_text(text):
	"""Paraphrases input text while maintaining sentence structure asynchronously."""
	if not text.strip():
	return "⚠️ Please enter some text to paraphrase."

	sentences = split_sentences(text)

	# Apply T5 paraphrasing with optimized settings
	paraphrased_results = await asyncio.to_thread(paraphrase_pipeline,
	[f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
	max_length=80,
	do_sample=True,
	temperature=0.7,
	top_p=0.85,
	top_k=50,
	repetition_penalty=1.2,
	num_return_sequences=1,
	batch_size=8
	)

	# Extract and join paraphrased sentences
	paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
	return " ".join(paraphrased_sentences)

	# Define Gradio Interface (Disable queueing)
	with gr.Blocks() as demo:
	gr.Markdown("# 🚀 Fast & Parallel T5 Paraphraser")
	input_box = gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10)
	output_box = gr.Textbox(label="Paraphrased Text", lines=10)
	button = gr.Button("Paraphrase")

	button.click(paraphrase_text, inputs=input_box, outputs=output_box)

	if __name__ == "__main__":
	demo.launch(share=True, queue=False) # Disable queueing