vikigitonga11 commited on
Commit
8371da7
·
verified ·
1 Parent(s): f612c6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -14
app.py CHANGED
@@ -1,39 +1,52 @@
1
  import gradio as gr
2
  import re
 
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
 
5
- # Load the Vamsi/T5 Paraphrase model
6
- model_name = "Vamsi/T5_Paraphrase_Paws"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
 
10
- # Initialize pipeline
11
- paraphrase_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, truncation=True)
 
 
 
 
 
 
 
 
 
 
12
 
13
  def split_sentences(text):
14
- """Split text into sentences using regex instead of NLTK."""
15
- return re.split(r'(?<=[.!?])\s+', text.strip()) # Split at sentence-ending punctuation
16
 
17
  def paraphrase_text(text):
18
  """Paraphrases input text while maintaining sentence structure."""
19
  if not text.strip():
20
  return "⚠️ Please enter some text to paraphrase."
21
 
22
- sentences = split_sentences(text) # Use regex to split sentences
23
- paraphrased_sentences = [
24
- paraphrase_pipeline(f"paraphrase: {sentence}", max_length=60, do_sample=False)[0]['generated_text']
25
- for sentence in sentences if sentence
26
- ]
27
 
28
- return " ".join(paraphrased_sentences) # Reassemble into a paragraph
 
 
 
 
 
 
29
 
30
  # Define Gradio Interface
31
  demo = gr.Interface(
32
  fn=paraphrase_text,
33
  inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
34
  outputs=gr.Textbox(label="Paraphrased Text", lines=10),
35
- title="🔄 T5 Paraphraser",
36
- description="Enter text and let AI generate a paraphrased version using the T5 model!",
37
  theme="huggingface"
38
  )
39
 
 
1
  import gradio as gr
2
  import re
3
+ import torch
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
 
6
+ # Load PEGASUS paraphrase model
7
+ model_name = "tuner007/pegasus_paraphrase"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
10
 
11
+ # Move model to GPU if available (for faster processing)
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ model.to(device)
14
+
15
+ # Initialize pipeline with batch processing and optimized settings
16
+ paraphrase_pipeline = pipeline(
17
+ "text2text-generation",
18
+ model=model,
19
+ tokenizer=tokenizer,
20
+ device=0 if torch.cuda.is_available() else -1, # Use GPU if available
21
+ truncation=True
22
+ )
23
 
24
  def split_sentences(text):
25
+ """Split text into sentences using regex (faster than nltk)."""
26
+ return re.split(r'(?<=[.!?])\s+', text.strip())
27
 
28
  def paraphrase_text(text):
29
  """Paraphrases input text while maintaining sentence structure."""
30
  if not text.strip():
31
  return "⚠️ Please enter some text to paraphrase."
32
 
33
+ sentences = split_sentences(text)
 
 
 
 
34
 
35
+ # Process multiple sentences in one batch (improves speed)
36
+ paraphrased_results = paraphrase_pipeline(
37
+ sentences, max_length=60, do_sample=False, batch_size=4 # Increase batch_size for speed
38
+ )
39
+
40
+ paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
41
+ return " ".join(paraphrased_sentences)
42
 
43
  # Define Gradio Interface
44
  demo = gr.Interface(
45
  fn=paraphrase_text,
46
  inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
47
  outputs=gr.Textbox(label="Paraphrased Text", lines=10),
48
+ title="🚀 Fast PEGASUS Paraphraser",
49
+ description="Enter text and let AI generate a paraphrased version using the optimized PEGASUS model!",
50
  theme="huggingface"
51
  )
52