Update app.py
Browse files
app.py
CHANGED
@@ -3,20 +3,21 @@ import re
|
|
3 |
import torch
|
4 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
5 |
|
6 |
-
# Load T5 paraphrase model
|
7 |
model_name = "Vamsi/T5_Paraphrase_Paws"
|
8 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
9 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Use fp16 for speed
|
10 |
|
11 |
-
# Move model to CPU
|
12 |
model.to("cpu")
|
13 |
|
14 |
-
# Initialize paraphrase pipeline
|
15 |
paraphrase_pipeline = pipeline(
|
16 |
"text2text-generation",
|
17 |
model=model,
|
18 |
tokenizer=tokenizer,
|
19 |
truncation=True
|
|
|
20 |
)
|
21 |
|
22 |
def split_sentences(text):
|
@@ -30,20 +31,12 @@ def paraphrase_text(text):
|
|
30 |
|
31 |
sentences = split_sentences(text)
|
32 |
|
33 |
-
# Apply T5 paraphrasing
|
34 |
paraphrased_results = paraphrase_pipeline(
|
35 |
[f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
|
36 |
-
max_length=
|
37 |
-
do_sample=True,
|
38 |
-
temperature=0.7, # More controlled creativity
|
39 |
-
top_p=0.85, # Reduce randomness
|
40 |
-
top_k=50,
|
41 |
-
repetition_penalty=1.2, # Avoid excessive repetition
|
42 |
-
num_return_sequences=1, # Only one paraphrase per sentence
|
43 |
-
batch_size=8
|
44 |
)
|
45 |
|
46 |
-
# Extract and join paraphrased sentences
|
47 |
paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
|
48 |
return " ".join(paraphrased_sentences)
|
49 |
|
@@ -52,8 +45,8 @@ demo = gr.Interface(
|
|
52 |
fn=paraphrase_text,
|
53 |
inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
|
54 |
outputs=gr.Textbox(label="Paraphrased Text", lines=10),
|
55 |
-
title="
|
56 |
-
description="Enter text and
|
57 |
theme="huggingface"
|
58 |
)
|
59 |
|
|
|
3 |
import torch
|
4 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
5 |
|
6 |
+
# Load T5 paraphrase model (faster than PEGASUS)
|
7 |
model_name = "Vamsi/T5_Paraphrase_Paws"
|
8 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
9 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Use fp16 for speed
|
10 |
|
11 |
+
# Move model to CPU (remove if using GPU)
|
12 |
model.to("cpu")
|
13 |
|
14 |
+
# Initialize paraphrase pipeline with optimized settings
|
15 |
paraphrase_pipeline = pipeline(
|
16 |
"text2text-generation",
|
17 |
model=model,
|
18 |
tokenizer=tokenizer,
|
19 |
truncation=True
|
20 |
+
temperature=0.5,
|
21 |
)
|
22 |
|
23 |
def split_sentences(text):
|
|
|
31 |
|
32 |
sentences = split_sentences(text)
|
33 |
|
34 |
+
# Apply T5 paraphrasing to each sentence
|
35 |
paraphrased_results = paraphrase_pipeline(
|
36 |
[f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
|
37 |
+
max_length=50, do_sample=True, batch_size=8, num_return_sequences=1 # Faster settings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
)
|
39 |
|
|
|
40 |
paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
|
41 |
return " ".join(paraphrased_sentences)
|
42 |
|
|
|
45 |
fn=paraphrase_text,
|
46 |
inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
|
47 |
outputs=gr.Textbox(label="Paraphrased Text", lines=10),
|
48 |
+
title="🚀 Fast & Clean T5 Paraphraser",
|
49 |
+
description="Enter text and let AI generate a paraphrased version using an optimized T5 model!",
|
50 |
theme="huggingface"
|
51 |
)
|
52 |
|