Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,10 @@ import pandas as pd
|
|
3 |
import transformers
|
4 |
from transformers import pipeline, TokenClassificationPipeline, BertForTokenClassification , AutoTokenizer , TextClassificationPipeline , AutoModelForSequenceClassification
|
5 |
|
6 |
-
|
|
|
|
|
|
|
7 |
|
8 |
#model.to("cpu")
|
9 |
tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1", truncation = True, padding=True, model_max_length=512,)
|
@@ -13,6 +16,7 @@ model_checkpoint = BertForTokenClassification.from_pretrained("dexay/Ner2HgF", )
|
|
13 |
model_re = AutoModelForSequenceClassification.from_pretrained("dexay/reDs3others", )
|
14 |
token_classifier = pipeline("token-classification", tokenizer = tokenizer,model=model_checkpoint, )
|
15 |
|
|
|
16 |
|
17 |
biotext = x
|
18 |
|
@@ -144,6 +148,9 @@ for itsent in az:
|
|
144 |
|
145 |
#lstSentEnc,lstSentEnt,lstSentbilbl
|
146 |
|
|
|
|
|
|
|
147 |
# Relation extraction part
|
148 |
|
149 |
token_classifier = pipeline("text-classification", tokenizer = tokenizer,model=model_re,
|
@@ -203,7 +210,7 @@ edccan = []
|
|
203 |
|
204 |
|
205 |
for i in range(len(outrelbl)):
|
206 |
-
if outrelbl[i]
|
207 |
edccan += [[lstSentEnc[i],lstSentEnt[i][0], lstSentEnt[i][1],lstSentbilbl[i][0]+" "+outrelbl[i][:-7]+" "+lstSentbilbl[i][1]]]
|
208 |
|
209 |
edccandf = pd.DataFrame(edccan, columns= ["Sentence", "Entity 1", "Entity 2", "Relation"] )
|
|
|
3 |
import transformers
|
4 |
from transformers import pipeline, TokenClassificationPipeline, BertForTokenClassification , AutoTokenizer , TextClassificationPipeline , AutoModelForSequenceClassification
|
5 |
|
6 |
+
st.header("Knowledge extraction on Endocrine disruptors")
|
7 |
+
st.text("This tool lets you extract relation triples concerning interactions between: endocrine disrupting chemicals, hormones, receptors and cancers.")
|
8 |
+
st.text("It is the result of an end of studies project within ESI school and dedicated to biomedical researchers looking to extract precise information about the subject without digging into long publications.")
|
9 |
+
x = st.text_area('Entre you text on EDCs:')
|
10 |
|
11 |
#model.to("cpu")
|
12 |
tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1", truncation = True, padding=True, model_max_length=512,)
|
|
|
16 |
model_re = AutoModelForSequenceClassification.from_pretrained("dexay/reDs3others", )
|
17 |
token_classifier = pipeline("token-classification", tokenizer = tokenizer,model=model_checkpoint, )
|
18 |
|
19 |
+
st.text("Knowledge extraction is in progress ...")
|
20 |
|
21 |
biotext = x
|
22 |
|
|
|
148 |
|
149 |
#lstSentEnc,lstSentEnt,lstSentbilbl
|
150 |
|
151 |
+
st.text("Entities detected, Next: Relation detection ...")
|
152 |
+
|
153 |
+
|
154 |
# Relation extraction part
|
155 |
|
156 |
token_classifier = pipeline("text-classification", tokenizer = tokenizer,model=model_re,
|
|
|
210 |
|
211 |
|
212 |
for i in range(len(outrelbl)):
|
213 |
+
if outrelbl[i] != "other":
|
214 |
edccan += [[lstSentEnc[i],lstSentEnt[i][0], lstSentEnt[i][1],lstSentbilbl[i][0]+" "+outrelbl[i][:-7]+" "+lstSentbilbl[i][1]]]
|
215 |
|
216 |
edccandf = pd.DataFrame(edccan, columns= ["Sentence", "Entity 1", "Entity 2", "Relation"] )
|