dexay commited on
Commit
85c9131
·
1 Parent(s): 3b7e628

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -3,7 +3,10 @@ import pandas as pd
3
  import transformers
4
  from transformers import pipeline, TokenClassificationPipeline, BertForTokenClassification , AutoTokenizer , TextClassificationPipeline , AutoModelForSequenceClassification
5
 
6
- x = st.text_area('enter')
 
 
 
7
 
8
  #model.to("cpu")
9
  tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1", truncation = True, padding=True, model_max_length=512,)
@@ -13,6 +16,7 @@ model_checkpoint = BertForTokenClassification.from_pretrained("dexay/Ner2HgF", )
13
  model_re = AutoModelForSequenceClassification.from_pretrained("dexay/reDs3others", )
14
  token_classifier = pipeline("token-classification", tokenizer = tokenizer,model=model_checkpoint, )
15
 
 
16
 
17
  biotext = x
18
 
@@ -144,6 +148,9 @@ for itsent in az:
144
 
145
  #lstSentEnc,lstSentEnt,lstSentbilbl
146
 
 
 
 
147
  # Relation extraction part
148
 
149
  token_classifier = pipeline("text-classification", tokenizer = tokenizer,model=model_re,
@@ -203,7 +210,7 @@ edccan = []
203
 
204
 
205
  for i in range(len(outrelbl)):
206
- if outrelbl[i]== "other":
207
  edccan += [[lstSentEnc[i],lstSentEnt[i][0], lstSentEnt[i][1],lstSentbilbl[i][0]+" "+outrelbl[i][:-7]+" "+lstSentbilbl[i][1]]]
208
 
209
  edccandf = pd.DataFrame(edccan, columns= ["Sentence", "Entity 1", "Entity 2", "Relation"] )
 
3
  import transformers
4
  from transformers import pipeline, TokenClassificationPipeline, BertForTokenClassification , AutoTokenizer , TextClassificationPipeline , AutoModelForSequenceClassification
5
 
6
+ st.header("Knowledge extraction on Endocrine disruptors")
7
+ st.text("This tool lets you extract relation triples concerning interactions between: endocrine disrupting chemicals, hormones, receptors and cancers.")
8
+ st.text("It is the result of an end of studies project within ESI school and dedicated to biomedical researchers looking to extract precise information about the subject without digging into long publications.")
9
+ x = st.text_area('Entre you text on EDCs:')
10
 
11
  #model.to("cpu")
12
  tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1", truncation = True, padding=True, model_max_length=512,)
 
16
  model_re = AutoModelForSequenceClassification.from_pretrained("dexay/reDs3others", )
17
  token_classifier = pipeline("token-classification", tokenizer = tokenizer,model=model_checkpoint, )
18
 
19
+ st.text("Knowledge extraction is in progress ...")
20
 
21
  biotext = x
22
 
 
148
 
149
  #lstSentEnc,lstSentEnt,lstSentbilbl
150
 
151
+ st.text("Entities detected, Next: Relation detection ...")
152
+
153
+
154
  # Relation extraction part
155
 
156
  token_classifier = pipeline("text-classification", tokenizer = tokenizer,model=model_re,
 
210
 
211
 
212
  for i in range(len(outrelbl)):
213
+ if outrelbl[i] != "other":
214
  edccan += [[lstSentEnc[i],lstSentEnt[i][0], lstSentEnt[i][1],lstSentbilbl[i][0]+" "+outrelbl[i][:-7]+" "+lstSentbilbl[i][1]]]
215
 
216
  edccandf = pd.DataFrame(edccan, columns= ["Sentence", "Entity 1", "Entity 2", "Relation"] )