fixed typo / added English bert comparison / changed description
Browse files
app.py
CHANGED
@@ -22,28 +22,45 @@ txt="a polynomial [MASK] from 3-SAT." #reduction
|
|
22 |
#print(res["sequence"])
|
23 |
#print(res["score"])
|
24 |
|
25 |
-
#
|
|
|
|
|
|
|
26 |
|
|
|
|
|
|
|
|
|
27 |
def unmask_words(txt_with_mask,k_suggestions=5):
|
28 |
-
|
|
|
29 |
labels={}
|
30 |
-
for res in
|
31 |
labels["".join(res["token_str"].split(" "))]=res["score"]
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
#trying our function
|
35 |
#val=unmask_words(txt)
|
36 |
|
37 |
import gradio as gr
|
38 |
-
description="""CC bert is a MLM model pretrained on data collected from ~200k papers
|
39 |
-
|
40 |
or contact [[email protected]]([email protected]).
|
41 |
-
|
42 |
"""
|
|
|
43 |
examples=[["as pspace is [MASK] under complement."],
|
44 |
["n!-(n-1)[MASK]"],
|
45 |
["[MASK] these two classes is a major problem."],
|
46 |
-
["This would show that the polynomial
|
47 |
["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
|
48 |
and their combined [MASK]"""]
|
49 |
]
|
@@ -53,7 +70,7 @@ examples=[["as pspace is [MASK] under complement."],
|
|
53 |
input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
|
54 |
interface=gr.Interface(fn=unmask_words,inputs=[input_box,
|
55 |
gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
|
56 |
-
outputs=gr.outputs.Label(label="top words:"),
|
57 |
examples=examples,
|
58 |
theme="darkhuggingface",
|
59 |
title="CC-Bert MLM",description=description,allow_flagging=True)
|
|
|
22 |
#print(res["sequence"])
|
23 |
#print(res["score"])
|
24 |
|
25 |
+
#now for BERT on English
|
26 |
+
default_name="bert-base-uncased"
|
27 |
+
|
28 |
+
tokenizer = AutoTokenizer.from_pretrained(default_name)
|
29 |
|
30 |
+
model = TFAutoModelForMaskedLM.from_pretrained(default_name)
|
31 |
+
unmasker_bert = FillMaskPipeline(model=model,tokenizer=tokenizer)
|
32 |
+
|
33 |
+
#make a function out of the unmasker
|
34 |
def unmask_words(txt_with_mask,k_suggestions=5):
|
35 |
+
results_cc=unmasker(txt_with_mask,top_k=k_suggestions)
|
36 |
+
|
37 |
labels={}
|
38 |
+
for res in results_cc:
|
39 |
labels["".join(res["token_str"].split(" "))]=res["score"]
|
40 |
+
|
41 |
+
results_bert=unmasker_bert(txt_with_mask,top_k=k_suggestions)
|
42 |
+
|
43 |
+
labels_bert={}
|
44 |
+
for res in results_bert:
|
45 |
+
labels_bert["".join(res["token_str"].split(" "))]=res["score"]
|
46 |
+
|
47 |
+
return labels,labels_bert
|
48 |
+
|
49 |
+
|
50 |
|
51 |
#trying our function
|
52 |
#val=unmask_words(txt)
|
53 |
|
54 |
import gradio as gr
|
55 |
+
description="""CC bert is a MLM model pretrained on data collected from ~200k papers on arXiv comprising of mathematical proofs and theorems. The aim of this interface is to show the difference between english and scientific english pretraining.
|
56 |
+
For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb)
|
57 |
or contact [[email protected]]([email protected]).
|
|
|
58 |
"""
|
59 |
+
|
60 |
examples=[["as pspace is [MASK] under complement."],
|
61 |
["n!-(n-1)[MASK]"],
|
62 |
["[MASK] these two classes is a major problem."],
|
63 |
+
["This would show that the polynomial hierarchy at the second [MASK], which is considered only"],
|
64 |
["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
|
65 |
and their combined [MASK]"""]
|
66 |
]
|
|
|
70 |
input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
|
71 |
interface=gr.Interface(fn=unmask_words,inputs=[input_box,
|
72 |
gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
|
73 |
+
outputs=[gr.outputs.Label(label="top words:"),gr.outputs.Label(label="top words eng-bert:")],
|
74 |
examples=examples,
|
75 |
theme="darkhuggingface",
|
76 |
title="CC-Bert MLM",description=description,allow_flagging=True)
|