AIEcosystem commited on
Commit
f90ad55
·
verified ·
1 Parent(s): 5024c50

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +44 -94
src/streamlit_app.py CHANGED
@@ -13,7 +13,6 @@ from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
15
 
16
-
17
  st.markdown(
18
  """
19
  <style>
@@ -27,7 +26,7 @@ st.markdown(
27
  background-color: #B2F2B2; /* A pale green for the sidebar */
28
  secondary-background-color: #B2F2B2;
29
  }
30
-
31
  /* Expander background color */
32
  .streamlit-expanderContent {
33
  background-color: #F5FFFA;
@@ -61,99 +60,55 @@ st.markdown(
61
  unsafe_allow_html=True
62
  )
63
 
 
 
 
 
 
 
64
 
 
65
 
 
66
 
 
67
 
 
68
 
69
- # --- Page Configuration and UI Elements ---
70
- st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
71
 
72
- st.subheader("HR.ai", divider="orange")
73
- st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
74
-
75
- expander = st.expander("**Important notes**")
76
- expander.write("""
77
- **Named Entities:** This HR.ai predicts sixty (60) labels:
78
- "Email", "Phone_number", "Street_address", "City", "State", "Zip_code", "Country", "Date_of_birth", "Gender", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Temporary", "Terminated", "Active", "Retired", "Job_title", "Employment_type", "Year", "Date", "Company", "Organization", "Role", "Position",
79
- "Performance_review", "Performance_rating", "Performance_score", "Sick_days", "Vacation_days", "Leave_of_absence", "Holidays", "Pension", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance","Retire_date",
80
- "Pay_rate", "Hourly_wage", "Annual_salary", "Overtime_pay", "Tax", "Social_security", "Deductions", "Job_posting", "Job_description", "Interview_type", "Applicant", "Candidate", "Referral", "Job_board", "Recruiter",
81
- "Contract", "Offer_letter", "Agreement", "Training_course", "Certification", "Skill"
82
-
83
- Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
84
-
85
- **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
86
-
87
- **Usage Limits:** You can request results unlimited times for one (1) month.
88
-
89
- **Supported Languages:** English, German, French, Italian, Spanish, Portuguese
90
-
91
- **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
92
-
93
- For any errors or inquiries, please contact us at [email protected]
94
- """)
95
 
96
  with st.sidebar:
97
-
98
-
99
  st.write("Use the following code to embed the HR.ai web app on your website. Feel free to adjust the width and height values to fit your page.")
100
  code = '''
101
- <iframe
102
- src="https://aiecosystem-hr-ai.hf.space"
103
- frameborder="0"
104
- width="850"
105
- height="450"
106
- ></iframe>
107
  '''
108
  st.code(code, language="html")
109
-
110
  st.text("")
111
  st.text("")
112
  st.divider()
113
-
114
-
115
-
116
  st.subheader("Ready to build your own NER Web App? 🚀", divider="orange")
117
  st.link_button("NER Builder", "https://nlpblogs.com", type="primary")
118
 
119
-
120
-
121
  # --- Comet ML Setup ---
122
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
123
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
124
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
125
-
126
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
127
  if not comet_initialized:
128
  st.warning("Comet ML not initialized. Check environment variables.")
129
 
130
  # --- Label Definitions ---
131
- labels = [
132
- "Email", "Phone_number", "Street_address", "City", "State", "Zip_code", "Country",
133
- "Date_of_birth", "Gender", "Marital_status", "Person",
134
- "Full_time", "Part_time", "Contract", "Temporary", "Terminated", "Active", "Retired",
135
- "Job_title", "Employment_type", "Year", "Date", "Company", "Organization", "Role", "Position",
136
- "Performance_review", "Performance_rating", "Performance_score",
137
- "Sick_days", "Vacation_days", "Leave_of_absence", "Holidays",
138
- "Pension", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance","Retire_date",
139
- "Pay_rate", "Hourly_wage", "Annual_salary", "Overtime_pay",
140
- "Tax", "Social_security", "Deductions",
141
- "Job_posting", "Job_description", "Interview_type", "Applicant", "Candidate", "Referral", "Job_board", "Recruiter",
142
- "Contract", "Offer_letter", "Agreement",
143
- "Training_course", "Certification", "Skill"]
144
-
145
-
146
-
147
 
148
  # Create a mapping dictionary for labels to categories
149
  category_mapping = {
150
-
151
-
152
  "Contact Information": ["Email", "Phone_number", "Street_address", "City", "State", "Zip_code", "Country"],
153
  "Personal Details": ["Date_of_birth", "Gender", "Marital_status", "Person"],
154
  "Employment Status": ["Full_time", "Part_time", "Contract", "Temporary", "Terminated", "Active", "Retired"],
155
  "Employment Information" : ["Job_title", "Employment_type", "Year", "Date", "Company", "Organization", "Role", "Position"],
156
-
157
  "Performance": ["Performance_review", "Performance_rating", "Performance_score"],
158
  "Attendance": ["Sick_days", "Vacation_days", "Leave_of_absence", "Holidays"],
159
  "Benefits": ["Pension", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance","Retire_date"],
@@ -164,22 +119,17 @@ category_mapping = {
164
  "Professional_Development": ["Training_course", "Certification", "Skill"]
165
  }
166
 
167
-
168
  # --- Model Loading ---
169
  @st.cache_resource
170
  def load_ner_model():
171
  """Loads the GLiNER model and caches it."""
172
  try:
173
- return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2)
174
-
175
  except Exception as e:
176
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
177
  st.stop()
178
-
179
  model = load_ner_model()
180
 
181
-
182
-
183
  # Flatten the mapping to a single dictionary
184
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
185
 
@@ -205,7 +155,6 @@ if st.button("Results"):
205
 
206
  if not df.empty:
207
  df['category'] = df['label'].map(reverse_category_mapping)
208
-
209
  if comet_initialized:
210
  experiment = Experiment(
211
  api_key=COMET_API_KEY,
@@ -214,9 +163,22 @@ if st.button("Results"):
214
  )
215
  experiment.log_parameter("input_text", text)
216
  experiment.log_table("predicted_entities", df)
217
-
218
  st.subheader("Extracted Entities", divider = "orange")
219
- st.dataframe(df.style.set_properties(**{"border": "2px solid gray", "color": "blue", "font-size": "16px"}))
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
  with st.expander("See Glossary of tags"):
222
  st.write('''
@@ -227,34 +189,30 @@ if st.button("Results"):
227
  - **start**: ['index of the start of the corresponding entity']
228
  - **end**: ['index of the end of the corresponding entity']
229
  ''')
230
-
231
  st.divider()
232
-
233
-
234
  # Tree map
235
  st.subheader("Tree map", divider = "orange")
236
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
237
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
238
  st.plotly_chart(fig_treemap)
239
-
240
  # Pie and Bar charts
241
  grouped_counts = df['category'].value_counts().reset_index()
242
  grouped_counts.columns = ['category', 'count']
243
-
244
  col1, col2 = st.columns(2)
 
245
  with col1:
246
  st.subheader("Pie chart", divider = "orange")
247
- fig_pie = px.pie(grouped_counts, values='count', names='category',
248
- hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
249
  fig_pie.update_traces(textposition='inside', textinfo='percent+label')
250
  st.plotly_chart(fig_pie)
251
-
252
  with col2:
253
  st.subheader("Bar chart", divider = "orange")
254
- fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True,
255
- title='Occurrences of predicted categories')
256
  st.plotly_chart(fig_bar)
257
-
258
  # Most Frequent Entities
259
  st.subheader("Most Frequent Entities", divider="orange")
260
  word_counts = df['text'].value_counts().reset_index()
@@ -267,15 +225,10 @@ if st.button("Results"):
267
  st.plotly_chart(fig_repeating_bar)
268
  else:
269
  st.warning("No entities were found that occur more than once.")
270
-
271
-
272
-
273
-
274
-
275
-
276
  # Download Section
277
  st.divider()
278
-
279
  dfa = pd.DataFrame(
280
  data={
281
  'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
@@ -289,12 +242,11 @@ if st.button("Results"):
289
  ]
290
  }
291
  )
292
-
293
  buf = io.BytesIO()
294
  with zipfile.ZipFile(buf, "w") as myzip:
295
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
296
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
297
-
298
  with stylable_container(
299
  key="download_button",
300
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
@@ -305,17 +257,15 @@ if st.button("Results"):
305
  file_name="nlpblogs_results.zip",
306
  mime="application/zip",
307
  )
308
-
309
  if comet_initialized:
310
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
311
  experiment.end()
312
-
313
  else: # If df is empty
314
  st.warning("No entities were found in the provided text.")
315
-
316
  end_time = time.time()
317
  elapsed_time = end_time - start_time
318
-
319
  st.text("")
320
  st.text("")
321
  st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
 
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
15
 
 
16
  st.markdown(
17
  """
18
  <style>
 
26
  background-color: #B2F2B2; /* A pale green for the sidebar */
27
  secondary-background-color: #B2F2B2;
28
  }
29
+
30
  /* Expander background color */
31
  .streamlit-expanderContent {
32
  background-color: #F5FFFA;
 
60
  unsafe_allow_html=True
61
  )
62
 
63
+ # --- Page Configuration and UI Elements ---
64
+ st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
65
+ st.subheader("HR.ai", divider="orange")
66
+ st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
67
+ expander = st.expander("**Important notes**")
68
+ expander.write("""**Named Entities:** This HR.ai predicts sixty (60) labels:"Email", "Phone_number", "Street_address", "City", "State", "Zip_code", "Country", "Date_of_birth", "Gender", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Temporary", "Terminated", "Active", "Retired", "Job_title", "Employment_type", "Year", "Date", "Company", "Organization", "Role", "Position","Performance_review", "Performance_rating", "Performance_score", "Sick_days", "Vacation_days", "Leave_of_absence", "Holidays", "Pension", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Retire_date", "Pay_rate", "Hourly_wage", "Annual_salary", "Overtime_pay", "Tax", "Social_security", "Deductions", "Job_posting", "Job_description", "Interview_type", "Applicant", "Candidate", "Referral", "Job_board", "Recruiter","Contract", "Offer_letter", "Agreement", "Training_course", "Certification", "Skill"
69
 
70
+ Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
71
 
72
+ **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
73
 
74
+ **Usage Limits:** You can request results unlimited times for one (1) month.
75
 
76
+ **Supported Languages:** English, German, French, Italian, Spanish, Portuguese
77
 
78
+ **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
 
79
 
80
+ For any errors or inquiries, please contact us at info@nlpblogs.com""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  with st.sidebar:
 
 
83
  st.write("Use the following code to embed the HR.ai web app on your website. Feel free to adjust the width and height values to fit your page.")
84
  code = '''
85
+ <iframe src="https://aiecosystem-hr-ai.hf.space" frameborder="0" width="850" height="450"></iframe>
 
 
 
 
 
86
  '''
87
  st.code(code, language="html")
 
88
  st.text("")
89
  st.text("")
90
  st.divider()
 
 
 
91
  st.subheader("Ready to build your own NER Web App? 🚀", divider="orange")
92
  st.link_button("NER Builder", "https://nlpblogs.com", type="primary")
93
 
 
 
94
  # --- Comet ML Setup ---
95
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
96
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
97
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
 
98
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
99
+
100
  if not comet_initialized:
101
  st.warning("Comet ML not initialized. Check environment variables.")
102
 
103
  # --- Label Definitions ---
104
+ labels = ["Email", "Phone_number", "Street_address", "City", "State", "Zip_code", "Country", "Date_of_birth", "Gender", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Temporary", "Terminated", "Active", "Retired", "Job_title", "Employment_type", "Year", "Date", "Company", "Organization", "Role", "Position", "Performance_review", "Performance_rating", "Performance_score", "Sick_days", "Vacation_days", "Leave_of_absence", "Holidays", "Pension", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Retire_date", "Pay_rate", "Hourly_wage", "Annual_salary", "Overtime_pay", "Tax", "Social_security", "Deductions", "Job_posting", "Job_description", "Interview_type", "Applicant", "Candidate", "Referral", "Job_board", "Recruiter", "Contract", "Offer_letter", "Agreement", "Training_course", "Certification", "Skill"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  # Create a mapping dictionary for labels to categories
107
  category_mapping = {
 
 
108
  "Contact Information": ["Email", "Phone_number", "Street_address", "City", "State", "Zip_code", "Country"],
109
  "Personal Details": ["Date_of_birth", "Gender", "Marital_status", "Person"],
110
  "Employment Status": ["Full_time", "Part_time", "Contract", "Temporary", "Terminated", "Active", "Retired"],
111
  "Employment Information" : ["Job_title", "Employment_type", "Year", "Date", "Company", "Organization", "Role", "Position"],
 
112
  "Performance": ["Performance_review", "Performance_rating", "Performance_score"],
113
  "Attendance": ["Sick_days", "Vacation_days", "Leave_of_absence", "Holidays"],
114
  "Benefits": ["Pension", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance","Retire_date"],
 
119
  "Professional_Development": ["Training_course", "Certification", "Skill"]
120
  }
121
 
 
122
  # --- Model Loading ---
123
  @st.cache_resource
124
  def load_ner_model():
125
  """Loads the GLiNER model and caches it."""
126
  try:
127
+ return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints= labels)
 
128
  except Exception as e:
129
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
130
  st.stop()
 
131
  model = load_ner_model()
132
 
 
 
133
  # Flatten the mapping to a single dictionary
134
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
135
 
 
155
 
156
  if not df.empty:
157
  df['category'] = df['label'].map(reverse_category_mapping)
 
158
  if comet_initialized:
159
  experiment = Experiment(
160
  api_key=COMET_API_KEY,
 
163
  )
164
  experiment.log_parameter("input_text", text)
165
  experiment.log_table("predicted_entities", df)
166
+
167
  st.subheader("Extracted Entities", divider = "orange")
168
+
169
+ # Create tabs for each category
170
+ category_names = sorted(list(category_mapping.keys()))
171
+ category_tabs = st.tabs(category_names)
172
+
173
+ for i, category_name in enumerate(category_names):
174
+ with category_tabs[i]:
175
+ df_category_filtered = df[df['category'] == category_name]
176
+ if not df_category_filtered.empty:
177
+ st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
178
+ else:
179
+ st.info(f"No entities found for the '{category_name}' category.")
180
+
181
+ st.divider()
182
 
183
  with st.expander("See Glossary of tags"):
184
  st.write('''
 
189
  - **start**: ['index of the start of the corresponding entity']
190
  - **end**: ['index of the end of the corresponding entity']
191
  ''')
 
192
  st.divider()
193
+
 
194
  # Tree map
195
  st.subheader("Tree map", divider = "orange")
196
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
197
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
198
  st.plotly_chart(fig_treemap)
199
+
200
  # Pie and Bar charts
201
  grouped_counts = df['category'].value_counts().reset_index()
202
  grouped_counts.columns = ['category', 'count']
 
203
  col1, col2 = st.columns(2)
204
+
205
  with col1:
206
  st.subheader("Pie chart", divider = "orange")
207
+ fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
 
208
  fig_pie.update_traces(textposition='inside', textinfo='percent+label')
209
  st.plotly_chart(fig_pie)
210
+
211
  with col2:
212
  st.subheader("Bar chart", divider = "orange")
213
+ fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
 
214
  st.plotly_chart(fig_bar)
215
+
216
  # Most Frequent Entities
217
  st.subheader("Most Frequent Entities", divider="orange")
218
  word_counts = df['text'].value_counts().reset_index()
 
225
  st.plotly_chart(fig_repeating_bar)
226
  else:
227
  st.warning("No entities were found that occur more than once.")
228
+
 
 
 
 
 
229
  # Download Section
230
  st.divider()
231
+
232
  dfa = pd.DataFrame(
233
  data={
234
  'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
 
242
  ]
243
  }
244
  )
 
245
  buf = io.BytesIO()
246
  with zipfile.ZipFile(buf, "w") as myzip:
247
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
248
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
249
+
250
  with stylable_container(
251
  key="download_button",
252
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
 
257
  file_name="nlpblogs_results.zip",
258
  mime="application/zip",
259
  )
260
+
261
  if comet_initialized:
262
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
263
  experiment.end()
 
264
  else: # If df is empty
265
  st.warning("No entities were found in the provided text.")
266
+
267
  end_time = time.time()
268
  elapsed_time = end_time - start_time
 
269
  st.text("")
270
  st.text("")
271
  st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")