Spaces:

juliaannjose
/

hupd_patent_classifier

Runtime error

File size: 2,158 Bytes

2d5fb99
bbf7aff
 
90928a3
2d5fb99
bbf7aff
 
 
bebd184
2d5fb99
90928a3
 
 
 
 
 
 
 
 
 
 
2d5fb99
 
 
 
 
 
 
 
bbf7aff
fd97dee
 
dfc2950
 
 
bbf7aff
 
2d5fb99
821b19f
bbf7aff
7daebfc
 
2d5fb99
bbf7aff
 
7daebfc
 
 
 
 
 
 
 
 
 
 
bbf7aff
 
 
 
 
 
 
 
 
2d5fb99
 
bbf7aff

import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset

# finetuned model
language_model_path = "juliaannjose/finetuned_model"

# load the dataset to
# use the patent number, abstract and claim columns for UI
with st.spinner("Setting up the app..."):
    dataset_dict = load_dataset(
        "HUPD/hupd",
        name="sample",
        data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
        icpr_label=None,
        train_filing_start_date="2016-01-01",
        train_filing_end_date="2016-01-21",
        val_filing_start_date="2016-01-22",
        val_filing_end_date="2016-01-31",
    )


# drop down menu with patent numbers
_patent_id = st.selectbox(
    "Select the Patent Number",
    dataset_dict["train"]["patent_number"],
)


# display abstract and claim
def get_abs_claim(_patent_id):
    # get abstract and claim corresponding to this patent id
    _abstract = dataset_dict["train"][["patent_number"] == _patent_id]["abstract"]
    _claim = dataset_dict["train"][["patent_number"] == _patent_id]["claims"]
    return _abstract, _claim


st.write(_patent_id)
_abstract, _claim = get_abs_claim(_patent_id)
st.write(_abstract)  # display abstract
st.write(_claim)  # display claims


# model and tokenizer initialization
@st.cache_resource
def load_model(language_model_path):
    tokenizer = AutoTokenizer.from_pretrained(language_model_path)
    model = AutoModelForSequenceClassification.from_pretrained(language_model_path)
    return tokenizer, model


tokenizer, model = load_model(language_model_path)
# input to our model
input_text = _abstract + _claim
# get tokens
inputs = tokenizer(
    input_text,
    truncation=True,
    padding=True,
    return_tensors="pt",
)

# get predictions
id2label = {0: "REJECTED", 1: "ACCEPTED"}
# when submit button clicked, run the model and get result
if st.button("Submit"):
    with torch.no_grad():
        logits = model(**inputs).logits

    predicted_class_id = logits.argmax().item()
    pred_label = id2label[predicted_class_id]
    st.write(pred_label)