File size: 2,158 Bytes
2d5fb99
bbf7aff
 
90928a3
2d5fb99
bbf7aff
 
 
bebd184
2d5fb99
90928a3
 
 
 
 
 
 
 
 
 
 
2d5fb99
 
 
 
 
 
 
 
bbf7aff
fd97dee
 
dfc2950
 
 
bbf7aff
 
2d5fb99
821b19f
bbf7aff
7daebfc
 
2d5fb99
bbf7aff
 
7daebfc
 
 
 
 
 
 
 
 
 
 
bbf7aff
 
 
 
 
 
 
 
 
2d5fb99
 
bbf7aff
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset

# finetuned model
language_model_path = "juliaannjose/finetuned_model"

# load the dataset to
# use the patent number, abstract and claim columns for UI
with st.spinner("Setting up the app..."):
    dataset_dict = load_dataset(
        "HUPD/hupd",
        name="sample",
        data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
        icpr_label=None,
        train_filing_start_date="2016-01-01",
        train_filing_end_date="2016-01-21",
        val_filing_start_date="2016-01-22",
        val_filing_end_date="2016-01-31",
    )


# drop down menu with patent numbers
_patent_id = st.selectbox(
    "Select the Patent Number",
    dataset_dict["train"]["patent_number"],
)


# display abstract and claim
def get_abs_claim(_patent_id):
    # get abstract and claim corresponding to this patent id
    _abstract = dataset_dict["train"][["patent_number"] == _patent_id]["abstract"]
    _claim = dataset_dict["train"][["patent_number"] == _patent_id]["claims"]
    return _abstract, _claim


st.write(_patent_id)
_abstract, _claim = get_abs_claim(_patent_id)
st.write(_abstract)  # display abstract
st.write(_claim)  # display claims


# model and tokenizer initialization
@st.cache_resource
def load_model(language_model_path):
    tokenizer = AutoTokenizer.from_pretrained(language_model_path)
    model = AutoModelForSequenceClassification.from_pretrained(language_model_path)
    return tokenizer, model


tokenizer, model = load_model(language_model_path)
# input to our model
input_text = _abstract + _claim
# get tokens
inputs = tokenizer(
    input_text,
    truncation=True,
    padding=True,
    return_tensors="pt",
)

# get predictions
id2label = {0: "REJECTED", 1: "ACCEPTED"}
# when submit button clicked, run the model and get result
if st.button("Submit"):
    with torch.no_grad():
        logits = model(**inputs).logits

    predicted_class_id = logits.argmax().item()
    pred_label = id2label[predicted_class_id]
    st.write(pred_label)