Shrey commited on
Commit
20223ca
·
1 Parent(s): 87b291b

fixed typo / added English bert comparison / changed description

Browse files
Files changed (1) hide show
  1. app.py +26 -9
app.py CHANGED
@@ -22,28 +22,45 @@ txt="a polynomial [MASK] from 3-SAT." #reduction
22
  #print(res["sequence"])
23
  #print(res["score"])
24
 
25
- #make a function out of the unmasker
 
 
 
26
 
 
 
 
 
27
  def unmask_words(txt_with_mask,k_suggestions=5):
28
- results=unmasker(txt_with_mask,top_k=k_suggestions)
 
29
  labels={}
30
- for res in results:
31
  labels["".join(res["token_str"].split(" "))]=res["score"]
32
- return labels
 
 
 
 
 
 
 
 
 
33
 
34
  #trying our function
35
  #val=unmask_words(txt)
36
 
37
  import gradio as gr
38
- description="""CC bert is a MLM model pretrained on data collected from ~200k papers in mainly Computational Complexity
39
- or related domain. For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb)
40
41
-
42
  """
 
43
  examples=[["as pspace is [MASK] under complement."],
44
  ["n!-(n-1)[MASK]"],
45
  ["[MASK] these two classes is a major problem."],
46
- ["This would show that the polynomial heirarchy at the second [MASK], which is considered only"],
47
  ["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
48
  and their combined [MASK]"""]
49
  ]
@@ -53,7 +70,7 @@ examples=[["as pspace is [MASK] under complement."],
53
  input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
54
  interface=gr.Interface(fn=unmask_words,inputs=[input_box,
55
  gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
56
- outputs=gr.outputs.Label(label="top words:"),
57
  examples=examples,
58
  theme="darkhuggingface",
59
  title="CC-Bert MLM",description=description,allow_flagging=True)
 
22
  #print(res["sequence"])
23
  #print(res["score"])
24
 
25
+ #now for BERT on English
26
+ default_name="bert-base-uncased"
27
+
28
+ tokenizer = AutoTokenizer.from_pretrained(default_name)
29
 
30
+ model = TFAutoModelForMaskedLM.from_pretrained(default_name)
31
+ unmasker_bert = FillMaskPipeline(model=model,tokenizer=tokenizer)
32
+
33
+ #make a function out of the unmasker
34
  def unmask_words(txt_with_mask,k_suggestions=5):
35
+ results_cc=unmasker(txt_with_mask,top_k=k_suggestions)
36
+
37
  labels={}
38
+ for res in results_cc:
39
  labels["".join(res["token_str"].split(" "))]=res["score"]
40
+
41
+ results_bert=unmasker_bert(txt_with_mask,top_k=k_suggestions)
42
+
43
+ labels_bert={}
44
+ for res in results_bert:
45
+ labels_bert["".join(res["token_str"].split(" "))]=res["score"]
46
+
47
+ return labels,labels_bert
48
+
49
+
50
 
51
  #trying our function
52
  #val=unmask_words(txt)
53
 
54
  import gradio as gr
55
+ description="""CC bert is a MLM model pretrained on data collected from ~200k papers on arXiv comprising of mathematical proofs and theorems. The aim of this interface is to show the difference between english and scientific english pretraining.
56
+ For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb)
57
 
58
  """
59
+
60
  examples=[["as pspace is [MASK] under complement."],
61
  ["n!-(n-1)[MASK]"],
62
  ["[MASK] these two classes is a major problem."],
63
+ ["This would show that the polynomial hierarchy at the second [MASK], which is considered only"],
64
  ["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
65
  and their combined [MASK]"""]
66
  ]
 
70
  input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
71
  interface=gr.Interface(fn=unmask_words,inputs=[input_box,
72
  gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
73
+ outputs=[gr.outputs.Label(label="top words:"),gr.outputs.Label(label="top words eng-bert:")],
74
  examples=examples,
75
  theme="darkhuggingface",
76
  title="CC-Bert MLM",description=description,allow_flagging=True)