DrishtiSharma commited on
Commit
1ac4555
·
verified ·
1 Parent(s): 7f183cb

Update quick_ref.txt

Browse files
Files changed (1) hide show
  1. quick_ref.txt +95 -0
quick_ref.txt CHANGED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ from datasets import load_dataset
5
+ import pandas as pd
6
+
7
+
8
+ # finetuned model
9
+ language_model_path = "juliaannjose/finetuned_model"
10
+
11
+ # load the dataset to
12
+ # use the patent number, abstract and claim columns for UI
13
+ with st.spinner("Loading..."):
14
+ dataset_dict = load_dataset(
15
+ "HUPD/hupd",
16
+ name="sample",
17
+ data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
18
+ icpr_label=None,
19
+ train_filing_start_date="2016-01-01",
20
+ train_filing_end_date="2016-01-21",
21
+ val_filing_start_date="2016-01-22",
22
+ val_filing_end_date="2016-01-31",
23
+ )
24
+ df_train = pd.DataFrame(dataset_dict["train"])
25
+ df_val = pd.DataFrame(dataset_dict["validation"])
26
+ df = pd.concat([df_train, df_val], ignore_index=True)
27
+
28
+
29
+ # drop down menu with patent numbers
30
+ _patent_id = st.selectbox(
31
+ "Select the Patent Number",
32
+ options=df["patent_number"],
33
+ )
34
+
35
+
36
+ # display abstract and claim
37
+ def get_abs_claim(_pid):
38
+ # get abstract and claim corresponding to this patent id
39
+ _abs = df.loc[df["patent_number"] == _pid]["abstract"]
40
+ _cl = df.loc[df["patent_number"] == _pid]["claims"]
41
+ return _abs.values[0], _cl.values[0]
42
+
43
+
44
+ _abstract, _claim = get_abs_claim(_patent_id)
45
+ st.title("Abstract:") # display abstract
46
+ st.write(_abstract)
47
+ st.title("Claim:") # display claims
48
+ st.write(_claim)
49
+
50
+
51
+ # model and tokenizer initialization
52
+ @st.cache_resource
53
+ def load_model(language_model_path):
54
+ tokenizer = AutoTokenizer.from_pretrained(language_model_path)
55
+ model = AutoModelForSequenceClassification.from_pretrained(language_model_path)
56
+ return tokenizer, model
57
+
58
+
59
+ tokenizer, model = load_model(language_model_path)
60
+ # input to our model
61
+ input_text = _abstract + _claim
62
+ # get tokens
63
+ inputs = tokenizer(
64
+ input_text,
65
+ truncation=True,
66
+ padding=True,
67
+ return_tensors="pt",
68
+ )
69
+
70
+ # get predictions
71
+ id2label = {0: "REJECTED", 1: "ACCEPTED"}
72
+ # when submit button clicked, run the model and get result
73
+ if st.button("Submit"):
74
+ with torch.no_grad():
75
+ outputs = model(**inputs)
76
+ probability = torch.nn.functional.softmax(outputs.logits, dim=1)
77
+
78
+ predicted_class_id = probability.argmax().item()
79
+ pred_label = id2label[predicted_class_id]
80
+ st.title("Predicted Patentability")
81
+ if probability[0][0] > probability[0][1]:
82
+ st.write("Rejection Score:")
83
+ st.write(probability[0][0].item())
84
+ else:
85
+ st.write("Acceptance Score:")
86
+ st.write(probability[0][1].item())
87
+ st.write("Result:", pred_label)
88
+
89
+
90
+
91
+
92
+
93
+ -----------------------------------------------------------
94
+
95
+