Spaces:
Runtime error
Runtime error
File size: 2,640 Bytes
2d5fb99 bbf7aff 90928a3 2efaef6 2d5fb99 bbf7aff bebd184 2d5fb99 9bcd196 90928a3 2efaef6 2d5fb99 2efaef6 2d5fb99 bbf7aff fd97dee 5fd502b dfc2950 9bcd196 bbf7aff 2d5fb99 bbf7aff 9bcd196 2d5fb99 bbf7aff 7daebfc bbf7aff 2d5fb99 bbf7aff 8aad599 bbf7aff 8aad599 bbf7aff 9bcd196 8aad599 0cd3e56 8aad599 0cd3e56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import pandas as pd
# finetuned model
language_model_path = "juliaannjose/finetuned_model"
# load the dataset to
# use the patent number, abstract and claim columns for UI
with st.spinner("Loading..."):
dataset_dict = load_dataset(
"HUPD/hupd",
name="sample",
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
icpr_label=None,
train_filing_start_date="2016-01-01",
train_filing_end_date="2016-01-21",
val_filing_start_date="2016-01-22",
val_filing_end_date="2016-01-31",
)
df_train = pd.DataFrame(dataset_dict["train"])
df_val = pd.DataFrame(dataset_dict["validation"])
df = pd.concat([df_train, df_val], ignore_index=True)
# drop down menu with patent numbers
_patent_id = st.selectbox(
"Select the Patent Number",
options=df["patent_number"],
)
# display abstract and claim
def get_abs_claim(_pid):
# get abstract and claim corresponding to this patent id
_abs = df.loc[df["patent_number"] == _pid]["abstract"]
_cl = df.loc[df["patent_number"] == _pid]["claims"]
return _abs.values[0], _cl.values[0]
_abstract, _claim = get_abs_claim(_patent_id)
st.title("Abstract:") # display abstract
st.write(_abstract)
st.title("Claim:") # display claims
st.write(_claim)
# model and tokenizer initialization
@st.cache_resource
def load_model(language_model_path):
tokenizer = AutoTokenizer.from_pretrained(language_model_path)
model = AutoModelForSequenceClassification.from_pretrained(language_model_path)
return tokenizer, model
tokenizer, model = load_model(language_model_path)
# input to our model
input_text = _abstract + _claim
# get tokens
inputs = tokenizer(
input_text,
truncation=True,
padding=True,
return_tensors="pt",
)
# get predictions
id2label = {0: "REJECTED", 1: "ACCEPTED"}
# when submit button clicked, run the model and get result
if st.button("Submit"):
with torch.no_grad():
outputs = model(**inputs)
probability = torch.nn.functional.softmax(outputs.logits, dim=1)
predicted_class_id = probability.argmax().item()
pred_label = id2label[predicted_class_id]
st.title("Predicted Patentability")
if probability[0][0] > probability[0][1]:
st.write("Rejection Score:")
st.write(probability[0][0].item())
else:
st.write("Acceptance Score:")
st.write(probability[0][1].item())
st.write("Result:", pred_label)
|