DrishtiSharma commited on
Commit
ad2e060
·
verified ·
1 Parent(s): 754a44f

Delete quick_ref.txt

Browse files
Files changed (1) hide show
  1. quick_ref.txt +0 -98
quick_ref.txt DELETED
@@ -1,98 +0,0 @@
1
- https://huggingface.co/spaces/juliaannjose/hupd_patent_classifier/
2
-
3
-
4
- import streamlit as st
5
- import torch
6
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
- from datasets import load_dataset
8
- import pandas as pd
9
-
10
-
11
- # finetuned model
12
- language_model_path = "juliaannjose/finetuned_model"
13
-
14
- # load the dataset to
15
- # use the patent number, abstract and claim columns for UI
16
- with st.spinner("Loading..."):
17
- dataset_dict = load_dataset(
18
- "HUPD/hupd",
19
- name="sample",
20
- data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
21
- icpr_label=None,
22
- train_filing_start_date="2016-01-01",
23
- train_filing_end_date="2016-01-21",
24
- val_filing_start_date="2016-01-22",
25
- val_filing_end_date="2016-01-31",
26
- )
27
- df_train = pd.DataFrame(dataset_dict["train"])
28
- df_val = pd.DataFrame(dataset_dict["validation"])
29
- df = pd.concat([df_train, df_val], ignore_index=True)
30
-
31
-
32
- # drop down menu with patent numbers
33
- _patent_id = st.selectbox(
34
- "Select the Patent Number",
35
- options=df["patent_number"],
36
- )
37
-
38
-
39
- # display abstract and claim
40
- def get_abs_claim(_pid):
41
- # get abstract and claim corresponding to this patent id
42
- _abs = df.loc[df["patent_number"] == _pid]["abstract"]
43
- _cl = df.loc[df["patent_number"] == _pid]["claims"]
44
- return _abs.values[0], _cl.values[0]
45
-
46
-
47
- _abstract, _claim = get_abs_claim(_patent_id)
48
- st.title("Abstract:") # display abstract
49
- st.write(_abstract)
50
- st.title("Claim:") # display claims
51
- st.write(_claim)
52
-
53
-
54
- # model and tokenizer initialization
55
- @st.cache_resource
56
- def load_model(language_model_path):
57
- tokenizer = AutoTokenizer.from_pretrained(language_model_path)
58
- model = AutoModelForSequenceClassification.from_pretrained(language_model_path)
59
- return tokenizer, model
60
-
61
-
62
- tokenizer, model = load_model(language_model_path)
63
- # input to our model
64
- input_text = _abstract + _claim
65
- # get tokens
66
- inputs = tokenizer(
67
- input_text,
68
- truncation=True,
69
- padding=True,
70
- return_tensors="pt",
71
- )
72
-
73
- # get predictions
74
- id2label = {0: "REJECTED", 1: "ACCEPTED"}
75
- # when submit button clicked, run the model and get result
76
- if st.button("Submit"):
77
- with torch.no_grad():
78
- outputs = model(**inputs)
79
- probability = torch.nn.functional.softmax(outputs.logits, dim=1)
80
-
81
- predicted_class_id = probability.argmax().item()
82
- pred_label = id2label[predicted_class_id]
83
- st.title("Predicted Patentability")
84
- if probability[0][0] > probability[0][1]:
85
- st.write("Rejection Score:")
86
- st.write(probability[0][0].item())
87
- else:
88
- st.write("Acceptance Score:")
89
- st.write(probability[0][1].item())
90
- st.write("Result:", pred_label)
91
-
92
-
93
-
94
-
95
-
96
- -----------------------------------------------------------
97
-
98
-