AIEcosystem commited on
Commit
5ea6f29
·
verified ·
1 Parent(s): 41afe7c

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +79 -93
src/streamlit_app.py CHANGED
@@ -69,7 +69,7 @@ st.markdown(
69
  # --- Page Configuration and UI Elements ---
70
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
71
 
72
- st.subheader("Compliance", divider="gray")
73
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
74
 
75
  expander = st.expander("**Important notes on the ProductTag**")
@@ -122,114 +122,100 @@ if not comet_initialized:
122
 
123
  # --- Label Definitions ---
124
  labels = [
125
- "medical_record_number",
126
- "date_of_birth",
127
- "ssn",
128
- "date",
129
- "first_name",
130
- "email",
131
- "last_name",
132
- "customer_id",
133
- "employee_id",
134
- "name",
135
- "street_address",
136
- "phone_number",
137
- "ipv4",
138
- "credit_card_number",
139
- "license_plate",
140
- "address",
141
- "user_name",
142
- "device_identifier",
143
- "bank_routing_number",
144
- "date_time",
145
- "company_name",
146
- "unique_identifier",
147
- "biometric_identifier",
148
- "account_number",
149
- "city",
150
- "certificate_license_number",
151
- "time",
152
- "postcode",
153
- "vehicle_identifier",
154
- "coordinate",
155
- "country",
156
- "api_key",
157
- "ipv6",
158
- "password",
159
- "health_plan_beneficiary_number",
160
- "national_id",
161
- "tax_id",
162
- "url",
163
- "state",
164
- "swift_bic",
165
- "cvv",
166
- "pin"
167
  ]
 
168
 
169
  # Create a mapping dictionary for labels to categories
170
  category_mapping = {
171
- "Personal Identifiers": [
172
- "date_of_birth",
173
- "first_name",
174
- "last_name",
175
- "name",
176
- "biometric_identifier",
177
- "user_name",
178
- "password"
 
 
 
179
  ],
180
 
181
- "Contact & Location Information": [
182
- "email",
183
- "street_address",
184
- "phone_number",
185
- "address",
186
- "city",
187
- "postcode",
188
- "coordinate",
189
- "country",
190
- "state",
191
- "url"
192
  ],
193
 
194
- "Financial & Business Data": [
195
- "credit_card_number",
196
- "bank_routing_number",
197
- "account_number",
198
- "swift_bic",
199
- "cvv",
200
- "pin",
201
- "company_name",
202
- "api_key"
203
  ],
204
 
205
- "Government & Official IDs": [
206
- "ssn",
207
- "license_plate",
208
- "certificate_license_number",
209
- "national_id",
210
- "tax_id",
211
- "medical_record_number",
212
- "health_plan_beneficiary_number"
213
  ],
214
 
215
- "Technical & System Data ": [
216
- "ipv4",
217
- "device_identifier",
218
- "ipv6"
219
  ],
220
 
221
- "Unique Identifiers & Registration Numbers": [
222
- "customer_id",
223
- "employee_id",
224
- "unique_identifier",
225
- "vehicle_identifier"
226
  ],
227
 
228
- "Date & Time Stamps": [
229
- "date",
230
- "date_time",
231
- "time"
232
  ]
 
 
 
 
 
 
 
 
 
 
 
233
  }
234
 
235
 
@@ -239,7 +225,7 @@ category_mapping = {
239
  def load_ner_model():
240
  """Loads the GLiNER model and caches it."""
241
  try:
242
- return GLiNER.from_pretrained("gretelai/gretel-gliner-bi-large-v1.0", nested_ner=True, num_gen_sequences=2, gen_constraints= labels)
243
  except Exception as e:
244
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
245
  st.stop()
 
69
  # --- Page Configuration and UI Elements ---
70
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
71
 
72
+ st.subheader("Legal", divider="gray")
73
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
74
 
75
  expander = st.expander("**Important notes on the ProductTag**")
 
122
 
123
  # --- Label Definitions ---
124
  labels = [
125
+ "Plaintiff",
126
+ "Petitioner",
127
+
128
+ "Defendant",
129
+ "Respondent",
130
+
131
+ "Appellant",
132
+ "Appellee",
133
+
134
+ "Debtor",
135
+ "Creditor",
136
+
137
+ "Signer",
138
+ "Witness",
139
+ "Courts",
140
+ "Judges",
141
+ "Lawyers",
142
+ "Attorneys",
143
+ "Statutes",
144
+ "Laws",
145
+ "Provisions",
146
+ "Case Citations",
147
+ "Legal Documents"
148
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  ]
150
+
151
 
152
  # Create a mapping dictionary for labels to categories
153
  category_mapping = {
154
+ "Parties": [
155
+ "Plaintiff",
156
+ "Petitioner",
157
+ "Defendant",
158
+ "Respondent",
159
+ "Appellant",
160
+ "Appellee",
161
+ "Debtor",
162
+ "Creditor",
163
+ "Signer",
164
+ "Witness"
165
  ],
166
 
167
+ "Judicial & Governmental Bodies": [
168
+ "Courts",
169
+ "Judges",
170
+ "Lawyers",
171
+ "Attorneys"
172
+
 
 
 
 
 
173
  ],
174
 
175
+ "Legal Instruments & Concepts": [
176
+ "Statutes",
177
+ "Laws",
178
+ "Provisions",
179
+ "Case_citations",
180
+ "Legal_documents"
181
+
 
 
182
  ],
183
 
184
+ "Dates & Timeframes:": [
185
+ "Effective_dates",
186
+ "Execution_dates",
187
+ "Expiration_dates"
188
+
189
+
 
 
190
  ],
191
 
192
+ "Financial & Monetary Entities:": [
193
+ "Money",
194
+ "Amounts"
195
+
196
  ],
197
 
198
+ "Contracts": [
199
+ "Contract_terms"
200
+
 
 
201
  ],
202
 
203
+ "Court Judgments:": [
204
+ "Case_number",
205
+ "Witnesses",
206
+
207
  ]
208
+
209
+
210
+
211
+ "Criminal Law:": [
212
+ "Crimes",
213
+ "Offenses",
214
+ "Victims"
215
+
216
+ ]
217
+
218
+
219
  }
220
 
221
 
 
225
  def load_ner_model():
226
  """Loads the GLiNER model and caches it."""
227
  try:
228
+ return GLiNER.from_pretrained("knowledgator/gliner-decoder-large-v1.0", nested_ner=True, num_gen_sequences=2, gen_constraints= labels)
229
  except Exception as e:
230
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
231
  st.stop()