Maria Tsilimos commited on
Commit
6aa1b23
·
unverified ·
1 Parent(s): 6d77d99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -135
app.py CHANGED
@@ -6,46 +6,28 @@ from transformers import pipeline
6
  from streamlit_extras.stylable_container import stylable_container
7
  import plotly.express as px
8
  import zipfile
9
-
10
-
11
  import os
12
- from comet_ml import Experiment
13
-
14
-
15
 
16
- st.subheader("7-Persian Named Entity Recognition Web App", divider = "red")
17
- st.link_button("by nlpblogs", "https://nlpblogs.com", type = "tertiary")
18
 
19
  expander = st.expander("**Important notes on the 7-Persian Named Entity Recognition Web App**")
20
  expander.write('''
21
-
22
- **Named Entities:**
23
- This 7-Persian Named Entity Recognition Web App predicts seven (7) labels (“person”, “location”, “money”, “organization”, “date”, “percent value”, “time”). Results are presented in an easy-to-read table, visualized in an interactive tree map, pie chart, and bar chart, and are available for download along with a Glossary of tags.
24
- Please check and adjust the language settings in your computer, so the Persian characters are handled properly in your downloaded file.
25
-
26
- **How to Use:**
27
- Type or paste your text and press Ctrl + Enter. Then, click the 'Results' button to extract and tag entities in your text data.
28
-
29
- **Usage Limits:**
30
- Unlimited number of Result requests.
31
-
32
- **Customization:**
33
- To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
34
-
35
- **Technical issues:**
36
- If your connection times out, please refresh the page or reopen the app's URL.
37
-
38
- For any errors or inquiries, please contact us at [email protected]
39
-
40
- ''')
41
-
42
 
43
  with st.sidebar:
44
  container = st.container(border=True)
45
  container.write("**Named Entity Recognition (NER)** is the task of extracting and tagging entities in text data. Entities can be persons, organizations, locations, countries, products, events etc.")
46
- st.subheader("Related NLP Web Apps", divider = "red")
47
- st.link_button("14-Named Entity Recognition Web App", "https://nlpblogs.com/shop/named-entity-recognition-ner/14-named-entity-recognition-web-app/", type = "primary")
48
-
49
 
50
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
51
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
@@ -57,7 +39,15 @@ else:
57
  comet_initialized = False
58
  st.warning("Comet ML not initialized. Check environment variables.")
59
 
 
 
 
60
 
 
 
 
 
 
61
 
62
  text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", key='my_text_area')
63
  st.write("**Input text**: ", text)
@@ -66,114 +56,104 @@ def clear_text():
66
  st.session_state['my_text_area'] = ""
67
 
68
  st.button("Clear text", on_click=clear_text)
69
-
70
  st.divider()
71
 
72
  if st.button("Results"):
73
- with st.spinner("Wait for it...", show_time=True):
74
- time.sleep(5)
75
- model = pipeline("token-classification", model="HooshvareLab/bert-fa-base-uncased-ner-peyma", aggregation_strategy = "max")
76
- text1 = model(text)
77
-
78
- df1 = pd.DataFrame(text1)
79
- pattern = r'[^\w\s]'
80
- df1['word'] = df1['word'].replace(pattern, '', regex=True)
81
-
82
- df2 = df1.replace('', 'Unknown')
83
- df = df2.dropna()
84
-
85
- if comet_initialized:
86
- experiment = Experiment(
87
- api_key=COMET_API_KEY,
88
- workspace=COMET_WORKSPACE,
89
- project_name=COMET_PROJECT_NAME,
90
- )
91
- experiment.log_parameter("input_text", text)
92
- experiment.log_table("predicted_entities", df)
93
-
94
- properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"}
95
- df_styled = df.style.set_properties(**properties)
96
- st.dataframe(df_styled)
97
-
98
- with st.expander("See Glossary of tags"):
99
- st.write('''
100
- '**word**': ['entity extracted from your text data']
101
-
102
- '**score**': ['accuracy score; how accurately a tag has been assigned to a given entity']
103
-
104
- '**entity_group**': ['label (tag) assigned to a given extracted entity']
105
-
106
- '**start**': ['index of the start of the corresponding entity']
107
-
108
- '**end**': ['index of the end of the corresponding entity']
109
-
110
- **What does B and I mean in front of each entity_group?**
111
-
112
- Supposing that there are two words (word A, word B).
113
-
114
- **B** indicates that word A is the beginning of an entity_group and **I** indicates that word B is inside that entity_group.
115
-
116
- For example, **Los** is the beginning of the entity_group **Location** and **Angeles** is inside the entity_group **Location**.
117
-
118
- Los (B-LOC) - Beginning of the entity_group **Location**
119
-
120
- Angeles (I-LOC) - Inside the entity_group **Location**
121
-
122
- ''')
123
-
124
- if df is not None:
125
- fig = px.treemap(df, path=[px.Constant("all"), 'word', 'entity_group'],
126
- values='score', color='entity_group')
127
- fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
128
- st.subheader("Tree map", divider = "red")
129
- st.plotly_chart(fig)
130
  if comet_initialized:
131
- experiment.log_figure(figure=fig, figure_name="entity_treemap")
132
-
133
- if df is not None:
134
- value_counts1 = df['entity_group'].value_counts()
135
- df1 = pd.DataFrame(value_counts1)
136
- final_df = df1.reset_index().rename(columns={"index": "entity_group"})
137
- col1, col2 = st.columns(2)
138
- with col1:
139
- fig1 = px.pie(final_df, values='count', names='entity_group', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted labels')
140
- fig1.update_traces(textposition='inside', textinfo='percent+label')
141
- st.subheader("Pie Chart", divider = "red")
142
- st.plotly_chart(fig1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  if comet_initialized:
144
- experiment.log_figure(figure=fig1, figure_name="label_pie_chart")
145
- with col2:
146
- fig2 = px.bar(final_df, x="count", y="entity_group", color="entity_group", text_auto=True, title='Occurrences of predicted labels')
147
- st.subheader("Bar Chart", divider = "red")
148
- st.plotly_chart(fig2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  if comet_initialized:
150
- experiment.log_figure(figure=fig2, figure_name="label_bar_chart")
151
-
152
- dfa = pd.DataFrame(
153
- data={
154
- 'word': ['entity extracted from your text data'], 'score': ['accuracy score; how accurately a tag has been assigned to a given entity'], 'entity_group': ['label (tag) assigned to a given extracted entity'],
155
- 'start': ['index of the start of the corresponding entity'],
156
- 'end': ['index of the end of the corresponding entity'],
157
- })
158
- buf = io.BytesIO()
159
- with zipfile.ZipFile(buf, "w") as myzip:
160
- myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
161
- myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
162
-
163
-
164
- with stylable_container(
165
- key="download_button",
166
- css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",
167
- ):
168
- st.download_button(
169
- label="Download zip file",
170
- data=buf.getvalue(),
171
- file_name="zip file.zip",
172
- mime="application/zip",
173
- )
174
- if comet_initialized:
175
- experiment.log_asset(buf.getvalue(), file_name="downloadable_results.zip")
176
 
177
- st.divider()
178
- if comet_initialized:
179
- experiment.end()
 
6
  from streamlit_extras.stylable_container import stylable_container
7
  import plotly.express as px
8
  import zipfile
 
 
9
  import os
10
+ from comet_ml import Experiment # Comet ML is imported, but not used in the exact same way for caching
 
 
11
 
12
+ st.subheader("7-Persian Named Entity Recognition Web App", divider="red")
13
+ st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
14
 
15
  expander = st.expander("**Important notes on the 7-Persian Named Entity Recognition Web App**")
16
  expander.write('''
17
+ **Named Entities:** This 7-Persian Named Entity Recognition Web App predicts seven (7) labels (“person”, “location”, “money”, “organization”, “date”, “percent value”, “time”). Results are presented in an easy-to-read table, visualized in an interactive tree map, pie chart, and bar chart, and are available for download along with a Glossary of tags. Please check and adjust the language settings in your computer, so the Persian characters are handled properly in your downloaded file.
18
+ **How to Use:** Type or paste your text and press Ctrl + Enter. Then, click the 'Results' button to extract and tag entities in your text data.
19
+ **Usage Limits:** Unlimited number of Result requests.
20
+
21
+ **Customization:** To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
22
+ **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
23
+ For any errors or inquiries, please contact us at info@nlpblogs.com
24
+ ''')
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  with st.sidebar:
27
  container = st.container(border=True)
28
  container.write("**Named Entity Recognition (NER)** is the task of extracting and tagging entities in text data. Entities can be persons, organizations, locations, countries, products, events etc.")
29
+ st.subheader("Related NLP Web Apps", divider="red")
30
+ st.link_button("14-Named Entity Recognition Web App", "https://nlpblogs.com/shop/named-entity-recognition-ner/14-named-entity-recognition-web-app/", type="primary")
 
31
 
32
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
33
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
 
39
  comet_initialized = False
40
  st.warning("Comet ML not initialized. Check environment variables.")
41
 
42
+ # --- Caching the model with st.cache_resource ---
43
+ @st.cache_resource
44
+ def load_ner_model():
45
 
46
+ return pipeline("token-classification", model="HooshvareLab/bert-fa-base-uncased-ner-peyma", aggregation_strategy="max")
47
+
48
+ # Load the model using the cached function
49
+ model = load_ner_model()
50
+ # --- End Caching ---
51
 
52
  text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", key='my_text_area')
53
  st.write("**Input text**: ", text)
 
56
  st.session_state['my_text_area'] = ""
57
 
58
  st.button("Clear text", on_click=clear_text)
 
59
  st.divider()
60
 
61
  if st.button("Results"):
62
+ if not text.strip(): # Add a check for empty input
63
+ st.warning("Please enter some text to process.")
64
+ else:
65
+ with st.spinner("Wait for it...", show_time=True):
66
+ # No need for time.sleep(5) here unless it's for artificial delay
67
+ # The model is already loaded thanks to st.cache_resource
68
+ text1 = model(text)
69
+
70
+ df1 = pd.DataFrame(text1)
71
+ pattern = r'[^\w\s]'
72
+ df1['word'] = df1['word'].replace(pattern, '', regex=True)
73
+ df2 = df1.replace('', 'Unknown')
74
+ df = df2.dropna()
75
+
76
+ # Initialize Comet ML experiment here, as it's per-run
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  if comet_initialized:
78
+ experiment = Experiment(
79
+ api_key=COMET_API_KEY,
80
+ workspace=COMET_WORKSPACE,
81
+ project_name=COMET_PROJECT_NAME,
82
+ )
83
+ experiment.log_parameter("input_text", text)
84
+ experiment.log_table("predicted_entities", df)
85
+
86
+ properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"}
87
+ df_styled = df.style.set_properties(**properties)
88
+ st.dataframe(df_styled)
89
+
90
+ with st.expander("See Glossary of tags"):
91
+ st.write('''
92
+ '**word**': ['entity extracted from your text data']
93
+ '**score**': ['accuracy score; how accurately a tag has been assigned to a given entity']
94
+ '**entity_group**': ['label (tag) assigned to a given extracted entity']
95
+ '**start**': ['index of the start of the corresponding entity']
96
+ '**end**': ['index of the end of the corresponding entity']
97
+ **What does B and I mean in front of each entity_group?**
98
+ Supposing that there are two words (word A, word B).
99
+ **B** indicates that word A is the beginning of an entity_group and **I** indicates that word B is inside that entity_group.
100
+ For example, **Los** is the beginning of the entity_group **Location** and **Angeles** is inside the entity_group **Location**.
101
+ Los (B-LOC) - Beginning of the entity_group **Location**
102
+ Angeles (I-LOC) - Inside the entity_group **Location**
103
+ ''')
104
+
105
+ if df is not None and not df.empty: # Added check for empty DataFrame
106
+ fig = px.treemap(df, path=[px.Constant("all"), 'word', 'entity_group'],
107
+ values='score', color='entity_group')
108
+ fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
109
+ st.subheader("Tree map", divider="red")
110
+ st.plotly_chart(fig)
111
  if comet_initialized:
112
+ experiment.log_figure(figure=fig, figure_name="entity_treemap")
113
+
114
+ if df is not None and not df.empty: # Added check for empty DataFrame
115
+ value_counts1 = df['entity_group'].value_counts()
116
+ df1 = pd.DataFrame(value_counts1)
117
+ final_df = df1.reset_index().rename(columns={"index": "entity_group"})
118
+ col1, col2 = st.columns(2)
119
+ with col1:
120
+ fig1 = px.pie(final_df, values='count', names='entity_group', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted labels')
121
+ fig1.update_traces(textposition='inside', textinfo='percent+label')
122
+ st.subheader("Pie Chart", divider="red")
123
+ st.plotly_chart(fig1)
124
+ if comet_initialized:
125
+ experiment.log_figure(figure=fig1, figure_name="label_pie_chart")
126
+ with col2:
127
+ fig2 = px.bar(final_df, x="count", y="entity_group", color="entity_group", text_auto=True, title='Occurrences of predicted labels')
128
+ st.subheader("Bar Chart", divider="red")
129
+ st.plotly_chart(fig2)
130
+ if comet_initialized:
131
+ experiment.log_figure(figure=fig2, figure_name="label_bar_chart")
132
+
133
+ dfa = pd.DataFrame(
134
+ data={
135
+ 'word': ['entity extracted from your text data'], 'score': ['accuracy score; how accurately a tag has been assigned to a given entity'], 'entity_group': ['label (tag) assigned to a given extracted entity'],
136
+ 'start': ['index of the start of the corresponding entity'],
137
+ 'end': ['index of the end of the corresponding entity'],
138
+ })
139
+ buf = io.BytesIO()
140
+ with zipfile.ZipFile(buf, "w") as myzip:
141
+ myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
142
+ myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
143
+
144
+ with stylable_container(
145
+ key="download_button",
146
+ css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",
147
+ ):
148
+ st.download_button(
149
+ label="Download zip file",
150
+ data=buf.getvalue(),
151
+ file_name="zip file.zip",
152
+ mime="application/zip",
153
+ )
154
  if comet_initialized:
155
+ experiment.log_asset(buf.getvalue(), file_name="downloadable_results.zip")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ st.divider()
158
+ if comet_initialized:
159
+ experiment.end()