theresatvan commited on
Commit
4372a92
·
1 Parent(s): 8da68fd

Fix merged changes

Browse files
Files changed (1) hide show
  1. app.py +0 -32
app.py CHANGED
@@ -1,15 +1,10 @@
1
  import streamlit as st
2
- <<<<<<< HEAD
3
- from datasets import load_dataset, Features, Value, Sequence
4
- =======
5
  from datasets import load_dataset
6
- >>>>>>> 81414ba96ac55f927033c62ee5c2db6c6a22349c
7
  from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
8
 
9
  decision_to_str = {'REJECTED': 0, 'ACCEPTED': 1, 'PENDING': 2, 'CONT-REJECTED': 3, 'CONT-ACCEPTED': 4, 'CONT-PENDING': 5}
10
 
11
  dataset_dict = load_dataset('HUPD/hupd',
12
- <<<<<<< HEAD
13
  name='sample',
14
  data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
15
  icpr_label=None,
@@ -26,25 +21,6 @@ tokenizer_abstract = DistilBertTokenizer.from_pretrained('theresatvan/hupd-disti
26
 
27
  model_claims = DistilBertForSequenceClassification.from_pretrained('theresatvan/hupd-distilbert-claims')
28
  tokenizer_claims = DistilBertTokenizer.from_pretrained('theresatvan/hupd-distilbert-claims')
29
- =======
30
- name='all',
31
- data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
32
- icpr_label=None,
33
- force_extract=True,
34
- train_filing_start_date='2016-01-01',
35
- train_filing_end_date='2016-01-01',
36
- val_filing_start_date='2017-01-01',
37
- val_filing_end_date='2017-05-31',
38
- )
39
-
40
- dataset = dataset_dict['validation'].filter(lambda e: e['decision'] in ['REJECTED', 'ACCEPTED'])
41
-
42
- model_abstract = DistilBertForSequenceClassification('theresatvan/hupd-distilbert-abstract')
43
- tokenizer_abstract = DistilBertTokenizer('theresatvan/hupd-distilbert-abstract')
44
-
45
- model_claims = DistilBertForSequenceClassification('theresatvan/hupd-distilbert-claims')
46
- tokenizer_claims = DistilBertTokenizer('theresatvan/hupd-distilbert-claims')
47
- >>>>>>> 81414ba96ac55f927033c62ee5c2db6c6a22349c
48
 
49
 
50
  def predict(model_abstract, model_claims, tokenizer_abstract, tokenizer_claims, input):
@@ -75,21 +51,13 @@ if __name__ == '__main__':
75
  st.title = "Can I Patent This?"
76
 
77
  form = st.form('patent-prediction-form')
78
- <<<<<<< HEAD
79
  dropdown = [example['application_number'] for example in dataset]
80
- =======
81
- dropdown = []
82
- >>>>>>> 81414ba96ac55f927033c62ee5c2db6c6a22349c
83
 
84
  input_application = form.selectbox('Select a patent\'s application number', patents_dropdown)
85
  submit = form.form_submit_button("Submit")
86
 
87
  if submit:
88
- <<<<<<< HEAD
89
  input = dataset.filter(lambda e: e['patent_number'] == input_application)
90
- =======
91
- input = dataset.filter(lambda e: e['application_number'] == input_application)
92
- >>>>>>> 81414ba96ac55f927033c62ee5c2db6c6a22349c
93
 
94
  label, prob = predict(model_abstract, model_claims, tokenizer_abstract, tokenizer_claims, input)
95
 
 
1
  import streamlit as st
 
 
 
2
  from datasets import load_dataset
 
3
  from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
4
 
5
  decision_to_str = {'REJECTED': 0, 'ACCEPTED': 1, 'PENDING': 2, 'CONT-REJECTED': 3, 'CONT-ACCEPTED': 4, 'CONT-PENDING': 5}
6
 
7
  dataset_dict = load_dataset('HUPD/hupd',
 
8
  name='sample',
9
  data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
10
  icpr_label=None,
 
21
 
22
  model_claims = DistilBertForSequenceClassification.from_pretrained('theresatvan/hupd-distilbert-claims')
23
  tokenizer_claims = DistilBertTokenizer.from_pretrained('theresatvan/hupd-distilbert-claims')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
 
26
  def predict(model_abstract, model_claims, tokenizer_abstract, tokenizer_claims, input):
 
51
  st.title = "Can I Patent This?"
52
 
53
  form = st.form('patent-prediction-form')
 
54
  dropdown = [example['application_number'] for example in dataset]
 
 
 
55
 
56
  input_application = form.selectbox('Select a patent\'s application number', patents_dropdown)
57
  submit = form.form_submit_button("Submit")
58
 
59
  if submit:
 
60
  input = dataset.filter(lambda e: e['patent_number'] == input_application)
 
 
 
61
 
62
  label, prob = predict(model_abstract, model_claims, tokenizer_abstract, tokenizer_claims, input)
63