vikigitonga11 commited on
Commit
0316316
·
verified ·
1 Parent(s): 4d3e068

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -16
app.py CHANGED
@@ -1,39 +1,51 @@
1
  import gradio as gr
2
  import re
 
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
 
5
- # Load PEGASUS paraphrase model
6
- model_name = "tuner007/pegasus_paraphrase"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
 
10
- # Initialize pipeline
11
- paraphrase_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, truncation=True)
 
 
 
 
 
 
 
 
12
 
13
  def split_sentences(text):
14
- """Split text into sentences using regex instead of NLTK."""
15
- return re.split(r'(?<=[.!?])\s+', text.strip()) # Split at sentence-ending punctuation
16
 
17
  def paraphrase_text(text):
18
  """Paraphrases input text while maintaining sentence structure."""
19
  if not text.strip():
20
  return "⚠️ Please enter some text to paraphrase."
21
 
22
- sentences = split_sentences(text) # Use regex to split sentences
23
- paraphrased_sentences = [
24
- paraphrase_pipeline(sentence, max_length=60, do_sample=False)[0]['generated_text']
25
- for sentence in sentences if sentence
26
- ]
27
-
28
- return " ".join(paraphrased_sentences) # Reassemble into a paragraph
 
 
 
29
 
30
  # Define Gradio Interface
31
  demo = gr.Interface(
32
  fn=paraphrase_text,
33
  inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
34
  outputs=gr.Textbox(label="Paraphrased Text", lines=10),
35
- title="📝 PEGASUS Paraphraser",
36
- description="Enter text and let AI generate a paraphrased version using the PEGASUS model!",
37
  theme="huggingface"
38
  )
39
 
 
1
  import gradio as gr
2
  import re
3
+ import torch
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
 
6
+ # Load T5 paraphrase model (faster than PEGASUS)
7
+ model_name = "Vamsi/T5_Paraphrase_Paws"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Use fp16 for speed
10
 
11
+ # Move model to CPU (remove if using GPU)
12
+ model.to("cpu")
13
+
14
+ # Initialize paraphrase pipeline with optimized settings
15
+ paraphrase_pipeline = pipeline(
16
+ "text2text-generation",
17
+ model=model,
18
+ tokenizer=tokenizer,
19
+ truncation=True
20
+ )
21
 
22
  def split_sentences(text):
23
+ """Split text into sentences using regex (faster than nltk)."""
24
+ return re.split(r'(?<=[.!?])\s+', text.strip())
25
 
26
  def paraphrase_text(text):
27
  """Paraphrases input text while maintaining sentence structure."""
28
  if not text.strip():
29
  return "⚠️ Please enter some text to paraphrase."
30
 
31
+ sentences = split_sentences(text)
32
+
33
+ # Apply T5 paraphrasing to each sentence
34
+ paraphrased_results = paraphrase_pipeline(
35
+ [f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
36
+ max_length=50, do_sample=True, batch_size=8, num_return_sequences=1 # Faster settings
37
+ )
38
+
39
+ paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
40
+ return " ".join(paraphrased_sentences)
41
 
42
  # Define Gradio Interface
43
  demo = gr.Interface(
44
  fn=paraphrase_text,
45
  inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
46
  outputs=gr.Textbox(label="Paraphrased Text", lines=10),
47
+ title="🚀 Fast & Clean T5 Paraphraser",
48
+ description="Enter text and let AI generate a paraphrased version using an optimized T5 model!",
49
  theme="huggingface"
50
  )
51