t5-paraphrase-generation

Sleeping

App Files Files Community

vikigitonga11 commited on Mar 18

Commit

0316316

verified ·

1 Parent(s): 4d3e068

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -16

app.py CHANGED Viewed

@@ -1,39 +1,51 @@
 import gradio as gr
 import re
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-# Load PEGASUS paraphrase model
-model_name = "tuner007/pegasus_paraphrase"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-# Initialize pipeline
-paraphrase_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, truncation=True)
 def split_sentences(text):
-    """Split text into sentences using regex instead of NLTK."""
-    return re.split(r'(?<=[.!?])\s+', text.strip())  # Split at sentence-ending punctuation
 def paraphrase_text(text):
     """Paraphrases input text while maintaining sentence structure."""
     if not text.strip():
         return "⚠️ Please enter some text to paraphrase."
-    sentences = split_sentences(text)  # Use regex to split sentences
-    paraphrased_sentences = [
-        paraphrase_pipeline(sentence, max_length=60, do_sample=False)[0]['generated_text']
-        for sentence in sentences if sentence
-    ]
-    return " ".join(paraphrased_sentences)  # Reassemble into a paragraph
 # Define Gradio Interface
 demo = gr.Interface(
     fn=paraphrase_text,
     inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
     outputs=gr.Textbox(label="Paraphrased Text", lines=10),
-    title="📝 PEGASUS Paraphraser",
-    description="Enter text and let AI generate a paraphrased version using the PEGASUS model!",
     theme="huggingface"
 )

 import gradio as gr
 import re
+import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+# Load T5 paraphrase model (faster than PEGASUS)
+model_name = "Vamsi/T5_Paraphrase_Paws"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)  # Use fp16 for speed
+# Move model to CPU (remove if using GPU)
+model.to("cpu")
+# Initialize paraphrase pipeline with optimized settings
+paraphrase_pipeline = pipeline(
+    "text2text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    truncation=True
+)
 def split_sentences(text):
+    """Split text into sentences using regex (faster than nltk)."""
+    return re.split(r'(?<=[.!?])\s+', text.strip())
 def paraphrase_text(text):
     """Paraphrases input text while maintaining sentence structure."""
     if not text.strip():
         return "⚠️ Please enter some text to paraphrase."
+    sentences = split_sentences(text)
+    # Apply T5 paraphrasing to each sentence
+    paraphrased_results = paraphrase_pipeline(
+        [f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
+        max_length=50, do_sample=True, batch_size=8, num_return_sequences=1  # Faster settings
+    )
+    paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
+    return " ".join(paraphrased_sentences)
 # Define Gradio Interface
 demo = gr.Interface(
     fn=paraphrase_text,
     inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
     outputs=gr.Textbox(label="Paraphrased Text", lines=10),
+    title="🚀 Fast & Clean T5 Paraphraser",
+    description="Enter text and let AI generate a paraphrased version using an optimized T5 model!",
     theme="huggingface"
 )