File size: 1,359 Bytes
2d5fb99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import streamlit as st
from transformers import pipeline
from datasets import load_dataset, Dataset, DatasetDict

# load the dataset and
# use the patent number, abstract and claim columns for UI
dataset_dict = load_dataset(
    "HUPD/hupd",
    name="sample",
    data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
    icpr_label=None,
    train_filing_start_date="2016-01-01",
    train_filing_end_date="2016-01-21",
    val_filing_start_date="2016-01-22",
    val_filing_end_date="2016-01-31",
)

# widget for selecting our finetuned langugae model
language_model_path = "juliaannjose/finetuned_model"

# pass the model to transformers pipeline - model selection component.
classifier_model = pipeline(model=language_model_path)

# drop down menu with patent numbers
_patent_id = st.selectbox(
    "Select the Patent Number",
    dataset_dict["train"]["patent_number"],
)

# get abstract and claim corresponding to this patent id
_abstract = dataset_dict["train"][["patent_number"] == _patent_id]["abstract"]
_claim = dataset_dict["train"][["patent_number"] == _patent_id]["claim"]

# display abstract and claim
st.write(_abstract)
st.write(_claim)

# when submit button clicked, run the model and get result
if st.button("Submit"):
    results = classifier_model([_abstract + _claim])
    st.write(results)