vikigitonga11 commited on
Commit
ddc7bfd
·
verified ·
1 Parent(s): 3f7d7a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -15
app.py CHANGED
@@ -3,20 +3,21 @@ import re
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
 
6
- # Load T5 paraphrase model
7
  model_name = "Vamsi/T5_Paraphrase_Paws"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Use fp16 for speed
10
 
11
- # Move model to CPU
12
  model.to("cpu")
13
 
14
- # Initialize paraphrase pipeline
15
  paraphrase_pipeline = pipeline(
16
  "text2text-generation",
17
  model=model,
18
  tokenizer=tokenizer,
19
  truncation=True
 
20
  )
21
 
22
  def split_sentences(text):
@@ -30,20 +31,12 @@ def paraphrase_text(text):
30
 
31
  sentences = split_sentences(text)
32
 
33
- # Apply T5 paraphrasing with controlled creativity
34
  paraphrased_results = paraphrase_pipeline(
35
  [f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
36
- max_length=80,
37
- do_sample=True,
38
- temperature=0.7, # More controlled creativity
39
- top_p=0.85, # Reduce randomness
40
- top_k=50,
41
- repetition_penalty=1.2, # Avoid excessive repetition
42
- num_return_sequences=1, # Only one paraphrase per sentence
43
- batch_size=8
44
  )
45
 
46
- # Extract and join paraphrased sentences
47
  paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
48
  return " ".join(paraphrased_sentences)
49
 
@@ -52,8 +45,8 @@ demo = gr.Interface(
52
  fn=paraphrase_text,
53
  inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
54
  outputs=gr.Textbox(label="Paraphrased Text", lines=10),
55
- title="🎨 Controlled T5 Paraphraser",
56
- description="Enter text and get a well-structured paraphrased version without randomness!",
57
  theme="huggingface"
58
  )
59
 
 
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
 
6
+ # Load T5 paraphrase model (faster than PEGASUS)
7
  model_name = "Vamsi/T5_Paraphrase_Paws"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Use fp16 for speed
10
 
11
+ # Move model to CPU (remove if using GPU)
12
  model.to("cpu")
13
 
14
+ # Initialize paraphrase pipeline with optimized settings
15
  paraphrase_pipeline = pipeline(
16
  "text2text-generation",
17
  model=model,
18
  tokenizer=tokenizer,
19
  truncation=True
20
+ temperature=0.5,
21
  )
22
 
23
  def split_sentences(text):
 
31
 
32
  sentences = split_sentences(text)
33
 
34
+ # Apply T5 paraphrasing to each sentence
35
  paraphrased_results = paraphrase_pipeline(
36
  [f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
37
+ max_length=50, do_sample=True, batch_size=8, num_return_sequences=1 # Faster settings
 
 
 
 
 
 
 
38
  )
39
 
 
40
  paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
41
  return " ".join(paraphrased_sentences)
42
 
 
45
  fn=paraphrase_text,
46
  inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
47
  outputs=gr.Textbox(label="Paraphrased Text", lines=10),
48
+ title="🚀 Fast & Clean T5 Paraphraser",
49
+ description="Enter text and let AI generate a paraphrased version using an optimized T5 model!",
50
  theme="huggingface"
51
  )
52