vikigitonga11's picture
Update app.py
3f7d7a9 verified
raw
history blame
2.04 kB
import gradio as gr
import re
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
# Load T5 paraphrase model
model_name = "Vamsi/T5_Paraphrase_Paws"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Use fp16 for speed
# Move model to CPU
model.to("cpu")
# Initialize paraphrase pipeline
paraphrase_pipeline = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
truncation=True
)
def split_sentences(text):
"""Split text into sentences using regex (faster than nltk)."""
return re.split(r'(?<=[.!?])\s+', text.strip())
def paraphrase_text(text):
"""Paraphrases input text while maintaining sentence structure."""
if not text.strip():
return "⚠️ Please enter some text to paraphrase."
sentences = split_sentences(text)
# Apply T5 paraphrasing with controlled creativity
paraphrased_results = paraphrase_pipeline(
[f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
max_length=80,
do_sample=True,
temperature=0.7, # More controlled creativity
top_p=0.85, # Reduce randomness
top_k=50,
repetition_penalty=1.2, # Avoid excessive repetition
num_return_sequences=1, # Only one paraphrase per sentence
batch_size=8
)
# Extract and join paraphrased sentences
paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
return " ".join(paraphrased_sentences)
# Define Gradio Interface
demo = gr.Interface(
fn=paraphrase_text,
inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
outputs=gr.Textbox(label="Paraphrased Text", lines=10),
title="🎨 Controlled T5 Paraphraser",
description="Enter text and get a well-structured paraphrased version without randomness!",
theme="huggingface"
)
if __name__ == "__main__":
demo.launch()