Spaces:
Runtime error
Runtime error
File size: 2,309 Bytes
2d5fb99 bbf7aff 90928a3 2efaef6 2d5fb99 bbf7aff bebd184 2d5fb99 90928a3 2efaef6 2d5fb99 2efaef6 2d5fb99 bbf7aff fd97dee 5fd502b dfc2950 5fd502b 2efaef6 ec648c1 5fd502b bbf7aff 2d5fb99 821b19f bbf7aff 6b257fd 2d5fb99 bbf7aff 7daebfc bbf7aff 2d5fb99 bbf7aff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import pandas as pd
# finetuned model
language_model_path = "juliaannjose/finetuned_model"
# load the dataset to
# use the patent number, abstract and claim columns for UI
with st.spinner("Setting up the app..."):
dataset_dict = load_dataset(
"HUPD/hupd",
name="sample",
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
icpr_label=None,
train_filing_start_date="2016-01-01",
train_filing_end_date="2016-01-21",
val_filing_start_date="2016-01-22",
val_filing_end_date="2016-01-31",
)
df_train = pd.DataFrame(dataset_dict["train"])
df_val = pd.DataFrame(dataset_dict["validation"])
df = pd.concat([df_train, df_val], ignore_index=True)
# drop down menu with patent numbers
_patent_id = st.selectbox(
"Select the Patent Number",
options=df["patent_number"],
)
# display abstract and claim
def get_abs_claim(_pid):
# get abstract and claim corresponding to this patent id
st.write(_pid)
_abs = df.loc[["patent_number"] == _pid]["abstract"]
_cl = df.loc[["patent_number"] == _pid]["claims"]
st.write(_abs)
return _abs, _cl
st.write(_patent_id)
_abstract, _claim = get_abs_claim(_patent_id)
# st.write(_abstract) # display abstract
# st.write(_claim) # display claims
# model and tokenizer initialization
@st.cache_resource
def load_model(language_model_path):
tokenizer = AutoTokenizer.from_pretrained(language_model_path)
model = AutoModelForSequenceClassification.from_pretrained(language_model_path)
return tokenizer, model
tokenizer, model = load_model(language_model_path)
# input to our model
input_text = _abstract + _claim
# get tokens
inputs = tokenizer(
input_text,
truncation=True,
padding=True,
return_tensors="pt",
)
# get predictions
id2label = {0: "REJECTED", 1: "ACCEPTED"}
# when submit button clicked, run the model and get result
if st.button("Submit"):
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
pred_label = id2label[predicted_class_id]
st.write(pred_label)
|