Spaces:
Runtime error
Runtime error
File size: 2,158 Bytes
2d5fb99 bbf7aff 90928a3 2d5fb99 bbf7aff bebd184 2d5fb99 90928a3 2d5fb99 bbf7aff fd97dee dfc2950 bbf7aff 2d5fb99 821b19f bbf7aff 7daebfc 2d5fb99 bbf7aff 7daebfc bbf7aff 2d5fb99 bbf7aff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
# finetuned model
language_model_path = "juliaannjose/finetuned_model"
# load the dataset to
# use the patent number, abstract and claim columns for UI
with st.spinner("Setting up the app..."):
dataset_dict = load_dataset(
"HUPD/hupd",
name="sample",
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
icpr_label=None,
train_filing_start_date="2016-01-01",
train_filing_end_date="2016-01-21",
val_filing_start_date="2016-01-22",
val_filing_end_date="2016-01-31",
)
# drop down menu with patent numbers
_patent_id = st.selectbox(
"Select the Patent Number",
dataset_dict["train"]["patent_number"],
)
# display abstract and claim
def get_abs_claim(_patent_id):
# get abstract and claim corresponding to this patent id
_abstract = dataset_dict["train"][["patent_number"] == _patent_id]["abstract"]
_claim = dataset_dict["train"][["patent_number"] == _patent_id]["claims"]
return _abstract, _claim
st.write(_patent_id)
_abstract, _claim = get_abs_claim(_patent_id)
st.write(_abstract) # display abstract
st.write(_claim) # display claims
# model and tokenizer initialization
@st.cache_resource
def load_model(language_model_path):
tokenizer = AutoTokenizer.from_pretrained(language_model_path)
model = AutoModelForSequenceClassification.from_pretrained(language_model_path)
return tokenizer, model
tokenizer, model = load_model(language_model_path)
# input to our model
input_text = _abstract + _claim
# get tokens
inputs = tokenizer(
input_text,
truncation=True,
padding=True,
return_tensors="pt",
)
# get predictions
id2label = {0: "REJECTED", 1: "ACCEPTED"}
# when submit button clicked, run the model and get result
if st.button("Submit"):
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
pred_label = id2label[predicted_class_id]
st.write(pred_label)
|