vikigitonga11 commited on
Commit
4d3e068
·
verified ·
1 Parent(s): 847952d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -26
app.py CHANGED
@@ -1,50 +1,39 @@
1
  import gradio as gr
2
  import re
3
- import torch
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
 
6
- # Load PEGASUS model in optimized mode
7
  model_name = "tuner007/pegasus_paraphrase"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Use half precision
10
 
11
- # Move model to CPU for consistency
12
- model.to("cpu")
13
-
14
- # Initialize paraphrase pipeline with optimized settings
15
- paraphrase_pipeline = pipeline(
16
- "text2text-generation",
17
- model=model,
18
- tokenizer=tokenizer,
19
- truncation=True
20
- )
21
 
22
  def split_sentences(text):
23
- """Split text into sentences using regex (faster than nltk)."""
24
- return re.split(r'(?<=[.!?])\s+', text.strip())
25
 
26
  def paraphrase_text(text):
27
  """Paraphrases input text while maintaining sentence structure."""
28
  if not text.strip():
29
  return "⚠️ Please enter some text to paraphrase."
30
 
31
- sentences = split_sentences(text)
32
-
33
- # Batch processing with optimized settings
34
- paraphrased_results = paraphrase_pipeline(
35
- sentences, max_length=50, do_sample=False, batch_size=8 # Reduced max_length & increased batch_size
36
- )
37
-
38
- paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
39
- return " ".join(paraphrased_sentences)
40
 
41
  # Define Gradio Interface
42
  demo = gr.Interface(
43
  fn=paraphrase_text,
44
  inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
45
  outputs=gr.Textbox(label="Paraphrased Text", lines=10),
46
- title="🚀 Faster PEGASUS Paraphraser",
47
- description="Enter text and let AI generate a paraphrased version using an optimized PEGASUS model!",
48
  theme="huggingface"
49
  )
50
 
 
1
  import gradio as gr
2
  import re
 
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
 
5
+ # Load PEGASUS paraphrase model
6
  model_name = "tuner007/pegasus_paraphrase"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
 
10
+ # Initialize pipeline
11
+ paraphrase_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, truncation=True)
 
 
 
 
 
 
 
 
12
 
13
  def split_sentences(text):
14
+ """Split text into sentences using regex instead of NLTK."""
15
+ return re.split(r'(?<=[.!?])\s+', text.strip()) # Split at sentence-ending punctuation
16
 
17
  def paraphrase_text(text):
18
  """Paraphrases input text while maintaining sentence structure."""
19
  if not text.strip():
20
  return "⚠️ Please enter some text to paraphrase."
21
 
22
+ sentences = split_sentences(text) # Use regex to split sentences
23
+ paraphrased_sentences = [
24
+ paraphrase_pipeline(sentence, max_length=60, do_sample=False)[0]['generated_text']
25
+ for sentence in sentences if sentence
26
+ ]
27
+
28
+ return " ".join(paraphrased_sentences) # Reassemble into a paragraph
 
 
29
 
30
  # Define Gradio Interface
31
  demo = gr.Interface(
32
  fn=paraphrase_text,
33
  inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
34
  outputs=gr.Textbox(label="Paraphrased Text", lines=10),
35
+ title="📝 PEGASUS Paraphraser",
36
+ description="Enter text and let AI generate a paraphrased version using the PEGASUS model!",
37
  theme="huggingface"
38
  )
39