BigSalmon commited on
Commit
5016ffb
·
1 Parent(s): 0b4e67f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import T5ForConditionalGeneration,T5Tokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import streamlit as st
4
+ st.title("Paraphrase")
5
+
6
+ model = AutoModelForSeq2SeqLM.from_pretrained("eugenesiow/bart-paraphrase")
7
+ tokenizer = AutoTokenizer.from_pretrained("geckos/pegasus-fined-tuned-on-paraphrase")
8
+
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ model = model.to(device)
11
+ temp = st.sidebar.slider("Temperature", 0.7, 1.5)
12
+ number_of_outputs = st.sidebar.slider("Number of Outputs", 1, 10)
13
+
14
+ def translate_to_english(model, tokenizer, text):
15
+ translated_text = []
16
+ text = text + " </s>"
17
+ encoding = tokenizer.encode_plus(text,pad_to_max_length=True, return_tensors="pt")
18
+ input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)
19
+ beam_outputs = model.generate(
20
+ input_ids=input_ids, attention_mask=attention_masks,
21
+ do_sample=True,
22
+ max_length=256,
23
+ temperature = temp,
24
+ top_k=120,
25
+ top_p=0.98,
26
+ early_stopping=True,
27
+ num_return_sequences=number_of_outputs,
28
+ )
29
+ for beam_output in beam_outputs:
30
+ sent = tokenizer.decode(beam_output, skip_special_tokens=True,clean_up_tokenization_spaces=True)
31
+ print(sent)
32
+ translated_text.append(sent)
33
+ return translated_text
34
+
35
+ text = st.text_input("Okay")
36
+ st.text("What you wrote: ")
37
+ st.write(text)
38
+ st.text("Output: ")
39
+ if text:
40
+ translated_text = translate_to_english(model, tokenizer, text)
41
+ st.write(translated_text if translated_text else "No translation found")