AIEcosystem commited on
Commit
19caa3e
·
verified ·
1 Parent(s): 19b756c

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +346 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,348 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ import os
2
+ os.environ['HF_HOME'] = '/tmp'
3
+ import time
4
  import streamlit as st
5
+ import pandas as pd
6
+ import io
7
+ import plotly.express as px
8
+ import zipfile
9
+ import json
10
+ from cryptography.fernet import Fernet
11
+ from streamlit_extras.stylable_container import stylable_container
12
+ from typing import Optional
13
+ from gliner import GLiNER
14
+ from comet_ml import Experiment
15
+ from transformers import pipeline
16
+
17
+
18
+
19
+
20
+ st.markdown(
21
+ """
22
+ <style>
23
+ /* Main app background with a subtle rainbow gradient */
24
+ .stApp {
25
+ background: linear-gradient(135deg, #f0f8ff, #f5f0ff, #fff0f5);
26
+ color: #000000;
27
+ font-family: 'Inter', sans-serif;
28
+ }
29
+
30
+ /* Rainbow gradient for the sidebar */
31
+ .css-1d36184, .css-1d36184:hover, .css-1d36184:focus {
32
+ background: linear-gradient(180deg, #FFC0CB, #FFD700, #98FB98, #ADD8E6, #BA55D3);
33
+ secondary-background-color: #FFC080;
34
+ }
35
+
36
+ /* Expander background color with a slight transparency */
37
+ .streamlit-expanderContent {
38
+ background-color: rgba(255, 255, 255, 0.7);
39
+ border-radius: 10px;
40
+ }
41
+
42
+ /* Expander header with a gentle gradient and bold text */
43
+ .streamlit-expanderHeader {
44
+ background: linear-gradient(90deg, #FADADD, #FFF9E0, #E0FFF8);
45
+ border-radius: 10px;
46
+ font-weight: bold;
47
+ }
48
+
49
+ /* Text Area with a light background and subtle border */
50
+ .stTextArea textarea {
51
+ background-color: #FFF0F5;
52
+ color: #000000;
53
+ border: 1px solid #ccc;
54
+ border-radius: 8px;
55
+ }
56
+
57
+ /* Button with a solid color and elegant hover effect */
58
+ .stButton > button {
59
+ background-color: #FF69B4;
60
+ color: #FFFFFF;
61
+ font-weight: bold;
62
+ border-radius: 12px;
63
+ transition: all 0.2s ease-in-out;
64
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
65
+ }
66
+ .stButton > button:hover {
67
+ background-color: #FFB6C1;
68
+ box-shadow: 0 6px 8px rgba(0, 0, 0, 0.15);
69
+ transform: translateY(-2px);
70
+ }
71
+
72
+ /* Warning box with a soft orange and rounded corners */
73
+ .stAlert.st-warning {
74
+ background-color: #FFDDAA;
75
+ color: #000000;
76
+ border-radius: 10px;
77
+ border-left: 5px solid #FFA500;
78
+ }
79
+
80
+ /* Success box with a fresh green and rounded corners */
81
+ .stAlert.st-success {
82
+ background-color: #D4EDDA;
83
+ color: #155724;
84
+ border-radius: 10px;
85
+ border-left: 5px solid #28A745;
86
+ }
87
+
88
+ /* Custom CSS to make the title text rainbow-colored */
89
+ h1 {
90
+ background: linear-gradient(45deg, #FF69B4, #FFD700, #00FF7F, #00BFFF, #8A2BE2);
91
+ -webkit-background-clip: text;
92
+ -webkit-text-fill-color: transparent;
93
+ font-size: 3em;
94
+ font-weight: 800;
95
+ }
96
+
97
+ </style>
98
+ """,
99
+ unsafe_allow_html=True
100
+ )
101
+
102
+
103
+ st.set_page_config(
104
+ layout="wide",
105
+ page_title="English Keyphrase"
106
+ )
107
+
108
+
109
+
110
+ # --- Comet ML Setup ---
111
+ COMET_API_KEY = os.environ.get("COMET_API_KEY")
112
+ COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
113
+ COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
114
+ comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
115
+
116
+ if not comet_initialized:
117
+ st.warning("Comet ML not initialized. Check environment variables.")
118
+
119
+
120
+
121
+
122
+
123
+ # --- UI Header and Notes ---
124
+ st.subheader("AcademiaMiner", divider="rainbow")
125
+ st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
126
+
127
+ expander = st.expander("**Important notes*")
128
+ expander.write('''
129
+ **Named Entities:** This AcademiaMiner extracts keyphrases from English academic and scientific papers.
130
+
131
+ Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
132
+
133
+ **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
134
+
135
+ **Usage Limits:** You can request results unlimited times for one (1) month.
136
+
137
+ **Supported Languages:** English
138
+
139
+ **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
140
+
141
+ For any errors or inquiries, please contact us at [email protected]'''
142
+ )
143
+
144
+
145
+
146
+ with st.sidebar:
147
+ st.write("Use the following code to embed the AcademiaMiner web app on your website. Feel free to adjust the width and height values to fit your page.")
148
+ code = '''
149
+ <iframe
150
+ src="https://aiecosystem-business-core.hf.space"
151
+ frameborder="0"
152
+ width="850"
153
+ height="450"
154
+ ></iframe>
155
+ '''
156
+ st.code(code, language="html")
157
+ st.text("")
158
+ st.text("")
159
+ st.divider()
160
+ st.subheader("🚀 Ready to build your own NER Web App?", divider="rainbow")
161
+ st.link_button("NER Builder", "https://nlpblogs.com", type="primary")
162
+
163
+
164
+ @st.cache_resource
165
+ def load_ner_model():
166
+ """Loads the GLiNER model and caches it."""
167
+ try:
168
+ return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints= labels)
169
+ except Exception as e:
170
+ st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
171
+ st.stop()
172
+ model = load_ner_model()
173
+
174
+
175
+ @st.cache_resource
176
+ def load_ner_model():
177
+ return pipeline("token-classification",
178
+ model="ml6team/keyphrase-extraction-kbir-inspec",
179
+ aggregation_strategy="max",
180
+ stride=128,
181
+ ignore_labels=["O"])
182
+
183
+ model = load_ner_model()
184
+
185
+
186
+
187
+ text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
188
+
189
+ def clear_text():
190
+ """Clears the text area."""
191
+ st.session_state['my_text_area'] = ""
192
+
193
+ st.button("Clear text", on_click=clear_text)
194
+
195
+
196
+ if st.button("Results"):
197
+ start_time = time.time()
198
+ if not text.strip():
199
+ st.warning("Please enter some text to extract entities.")
200
+ else:
201
+ with st.spinner("Analyzing text...", show_time=True):
202
+ entities = model(text_for_ner)
203
+ data = []
204
+ if entities:
205
+ for entity in entities:
206
+ if all(k in entity for k in ['word', 'entity_group', 'score', 'start', 'end']):
207
+ data.append({
208
+ 'word': entity['word'],
209
+ 'entity_group': entity['entity_group'],
210
+ 'score': entity['score'],
211
+ 'start': entity['start'],
212
+ 'end': entity['end']
213
+ })
214
+ else:
215
+ st.warning(f"Skipping malformed entity encountered: {entity}. Missing expected keys.")
216
+ df = pd.DataFrame(data)
217
+ else:
218
+ df = pd.DataFrame(columns=['word', 'entity_group', 'score', 'start', 'end'])
219
+
220
+ if not df.empty:
221
+ pattern = r'[^\w\s]'
222
+ df['word'] = df['word'].replace(pattern, '', regex=True)
223
+ df = df.replace('', 'Unknown')
224
+
225
+ st.subheader("All Extracted Keyphrases", divider="rainbow")
226
+ st.dataframe(df, use_container_width=True)
227
+
228
+ with st.expander("See Glossary of tags"):
229
+ st.write('''
230
+ **word**: ['entity extracted from your text data']
231
+
232
+ **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
233
+
234
+ **entity_group**: ['label (tag) assigned to a given extracted entity']
235
+
236
+ **start**: ['index of the start of the corresponding entity']
237
+
238
+ **end**: ['index of the end of the corresponding entity']
239
+
240
+ ''')
241
+ st.divider()
242
+
243
+ st.subheader("Most Frequent Keyphrases", divider="rainbow")
244
+ word_counts = df['word'].value_counts().reset_index()
245
+ word_counts.columns = ['word', 'count']
246
+ df_frequent = word_counts.sort_values(by='count', ascending=False).head(15)
247
+
248
+ if not df_frequent.empty:
249
+ tab1, tab2 = st.tabs(["Table", "Chart"])
250
+
251
+ with tab1:
252
+ st.dataframe(df_frequent, use_container_width=True)
253
+
254
+ with tab2:
255
+ fig_frequent_bar = px.bar(
256
+ df_frequent,
257
+ x='count',
258
+ y='word',
259
+ orientation='h',
260
+ title='Top Frequent Keyphrases by Count',
261
+ color='count',
262
+ color_continuous_scale=px.colors.sequential.Viridis
263
+ )
264
+ fig_frequent_bar.update_layout(yaxis={'categoryorder':'total ascending'})
265
+ st.plotly_chart(fig_frequent_bar, use_container_width=True)
266
+
267
+ if comet_initialized and 'experiment' in locals():
268
+ experiment.log_figure(figure=fig_frequent_bar, figure_name="frequent_keyphrases_bar_chart")
269
+ else:
270
+ st.info("No keyphrases found with more than one occurrence to display in tabs.")
271
+
272
+ st.divider()
273
+
274
+ experiment = None
275
+ if comet_initialized:
276
+ experiment = Experiment(
277
+ api_key=COMET_API_KEY,
278
+ workspace=COMET_WORKSPACE,
279
+ project_name=COMET_PROJECT_NAME,
280
+ )
281
+ experiment.log_parameter("input_source_type", source_type)
282
+ experiment.log_parameter("input_content_length", len(text_for_ner))
283
+ experiment.log_table("predicted_entities", df)
284
+
285
+ st.subheader("Treemap of All Keyphrases", divider="rainbow")
286
+ fig_treemap = px.treemap(
287
+ df,
288
+ path=[px.Constant("all"), 'entity_group', 'word'],
289
+ values='score',
290
+ color='word',
291
+ color_continuous_scale=px.colors.sequential.Plasma
292
+ )
293
+ fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
294
+ st.plotly_chart(fig_treemap, use_container_width=True)
295
+
296
+ if comet_initialized and experiment:
297
+ experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
298
+
299
+ # --- Download Section ---
300
+ dfa = pd.DataFrame(
301
+ data={
302
+ 'Column Name': ['word', 'entity_group', 'score', 'start', 'end'],
303
+ 'Description': [
304
+ 'entity extracted from your text data',
305
+ 'label (tag) assigned to a given extracted entity',
306
+ 'accuracy score; how accurately a tag has been assigned to a given entity',
307
+ 'index of the start of the corresponding entity',
308
+ 'index of the end of the corresponding entity'
309
+ ]
310
+ }
311
+ )
312
+ buf = io.BytesIO()
313
+ with zipfile.ZipFile(buf, "w") as myzip:
314
+ if not df.empty:
315
+ myzip.writestr("Summary_of_results.csv", df.to_csv(index=False))
316
+ myzip.writestr("Most_frequent_keyphrases.csv", df_frequent.to_csv(index=False))
317
+ myzip.writestr("Glossary_of_tags.csv", dfa.to_csv(index=False))
318
 
319
+ with stylable_container(
320
+ key="download_button",
321
+ css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",
322
+ ):
323
+ st.download_button(
324
+ label="Download zip file",
325
+ data=buf.getvalue(),
326
+ file_name="nlpblogs_ner_results.zip",
327
+ mime="application/zip",
328
+ )
329
+ st.divider()
330
+ else:
331
+ st.warning("No entities found to generate visualizations.")
332
+ else:
333
+ st.warning("No meaningful text found to process. Please enter a URL, upload a text file, or type/paste text.")
334
+ except Exception as e:
335
+ st.error(f"An unexpected error occurred during processing: {e}")
336
+ finally:
337
+ if comet_initialized and experiment is not None:
338
+ try:
339
+ experiment.end()
340
+ except Exception as comet_e:
341
+ st.warning(f"Comet ML experiment.end() failed: {comet_e}")
342
+ if start_time_overall is not None:
343
+ end_time_overall = time.time()
344
+ elapsed_time_overall = end_time_overall - start_time_overall
345
+ st.info(f"Results processed in **{elapsed_time_overall:.2f} seconds**.")
346
+ st.write(f"Number of times you requested results: **{st.session_state['source_type_attempts']}/{max_attempts}**")
347
+ else:
348
+ st.warning("Please enter some text, a URL, or upload a file to analyze.")