Spaces:

Shrey
/

cc-bert

Sleeping

App Files Files Community

Shrey commited on Oct 19, 2022

Commit

20223ca

1 Parent(s): 87b291b

fixed typo / added English bert comparison / changed description

Browse files

Files changed (1) hide show

app.py +26 -9

app.py CHANGED Viewed

@@ -22,28 +22,45 @@ txt="a polynomial [MASK] from 3-SAT." #reduction
     #print(res["sequence"])
     #print(res["score"])
-#make a function out of the unmasker
 def unmask_words(txt_with_mask,k_suggestions=5):
-    results=unmasker(txt_with_mask,top_k=k_suggestions)
     labels={}
-    for res in results:
         labels["".join(res["token_str"].split(" "))]=res["score"]
-    return labels
 #trying our function
 #val=unmask_words(txt)
 import gradio as gr
-description="""CC bert is a MLM model pretrained on data collected from ~200k papers in mainly Computational Complexity
-or related domain.  For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb)
 or contact [[email protected]]([email protected]).
 """
 examples=[["as pspace is [MASK] under complement."],
           ["n!-(n-1)[MASK]"],
          ["[MASK] these two classes is a major problem."],
-          ["This would show that the polynomial heirarchy at the second [MASK], which is considered only"],
           ["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
     and their combined [MASK]"""]
          ]
@@ -53,7 +70,7 @@ examples=[["as pspace is [MASK] under complement."],
 input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
 interface=gr.Interface(fn=unmask_words,inputs=[input_box,
                                                gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
-                       outputs=gr.outputs.Label(label="top words:"),
                        examples=examples,
                        theme="darkhuggingface",
                       title="CC-Bert MLM",description=description,allow_flagging=True)

     #print(res["sequence"])
     #print(res["score"])
+#now for BERT on English
+default_name="bert-base-uncased"
+tokenizer = AutoTokenizer.from_pretrained(default_name)
+model = TFAutoModelForMaskedLM.from_pretrained(default_name)
+unmasker_bert = FillMaskPipeline(model=model,tokenizer=tokenizer)
+#make a function out of the unmasker
 def unmask_words(txt_with_mask,k_suggestions=5):
+    results_cc=unmasker(txt_with_mask,top_k=k_suggestions)
     labels={}
+    for res in results_cc:
         labels["".join(res["token_str"].split(" "))]=res["score"]
+    results_bert=unmasker_bert(txt_with_mask,top_k=k_suggestions)
+    labels_bert={}
+    for res in results_bert:
+        labels_bert["".join(res["token_str"].split(" "))]=res["score"]
+    return labels,labels_bert
 #trying our function
 #val=unmask_words(txt)
 import gradio as gr
+description="""CC bert is a MLM model pretrained on data collected from ~200k papers on arXiv comprising of mathematical proofs and theorems. The aim of this interface is to show the difference between english and scientific english pretraining.
+For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb)
 or contact [[email protected]]([email protected]).
 """
 examples=[["as pspace is [MASK] under complement."],
           ["n!-(n-1)[MASK]"],
          ["[MASK] these two classes is a major problem."],
+          ["This would show that the polynomial hierarchy at the second [MASK], which is considered only"],
           ["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
     and their combined [MASK]"""]
          ]
 input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
 interface=gr.Interface(fn=unmask_words,inputs=[input_box,
                                                gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
+                       outputs=[gr.outputs.Label(label="top words:"),gr.outputs.Label(label="top words eng-bert:")],
                        examples=examples,
                        theme="darkhuggingface",
                       title="CC-Bert MLM",description=description,allow_flagging=True)