Spaces:
Sleeping
Sleeping
Delete app.py
Browse files
app.py
DELETED
@@ -1,233 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
os.environ['HF_HOME'] = '/tmp'
|
3 |
-
import time
|
4 |
-
import streamlit as st
|
5 |
-
import pandas as pd
|
6 |
-
import io
|
7 |
-
import plotly.express as px
|
8 |
-
import zipfile
|
9 |
-
import json
|
10 |
-
from cryptography.fernet import Fernet
|
11 |
-
from streamlit_extras.stylable_container import stylable_container
|
12 |
-
from typing import Optional
|
13 |
-
from gliner import GLiNER
|
14 |
-
from comet_ml import Experiment
|
15 |
-
|
16 |
-
# --- Page Configuration and UI Elements ---
|
17 |
-
st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
|
18 |
-
|
19 |
-
st.subheader("ProductTag", divider="orange")
|
20 |
-
st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
|
21 |
-
|
22 |
-
expander = st.expander("**Important notes on the ProductTag**")
|
23 |
-
expander.write("""
|
24 |
-
**Named Entities:** This ProductTag predicts twenty-four (24) labels: "Product", "Service", "Organization", "Company", "Currency", "City", "Country", "Region", "Market", "Store", "Shop", "Customer_segment", "Demographics", "Target_market", "Market_segment", "Fiscal_period", "Timeframe", "Date", "Campaign", "Advertisement", "Event", "Media_platform", "Media_channel", "Social_media_platform"
|
25 |
-
Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
|
26 |
-
**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
|
27 |
-
**Usage Limits:** You can request results unlimited times for one (1) week.
|
28 |
-
**Supported Languages:** English
|
29 |
-
**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
|
30 |
-
For any errors or inquiries, please contact us at [email protected]
|
31 |
-
""")
|
32 |
-
|
33 |
-
with st.sidebar:
|
34 |
-
st.subheader("Build your own NER Web App in a minute without writing a single line of code.", divider="orange")
|
35 |
-
st.link_button("NER File Builder", "https://nlpblogs.com/shop/named-entity-recognition-ner/ner-file-builder/", type="primary")
|
36 |
-
|
37 |
-
st.text("")
|
38 |
-
st.text("")
|
39 |
-
|
40 |
-
st.write("Use the following code to embed the ProductTag web app on your website. Feel free to adjust the width and height values to fit your page.")
|
41 |
-
code = '''
|
42 |
-
<iframe
|
43 |
-
src="https://aiecosystem-producttag.hf.space"
|
44 |
-
frameborder="0"
|
45 |
-
width="850"
|
46 |
-
height="450"
|
47 |
-
></iframe>
|
48 |
-
'''
|
49 |
-
st.code(code, language="html")
|
50 |
-
|
51 |
-
# --- Comet ML Setup ---
|
52 |
-
COMET_API_KEY = os.environ.get("COMET_API_KEY")
|
53 |
-
COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
|
54 |
-
COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
|
55 |
-
|
56 |
-
comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
|
57 |
-
if not comet_initialized:
|
58 |
-
st.warning("Comet ML not initialized. Check environment variables.")
|
59 |
-
|
60 |
-
# --- Model Loading ---
|
61 |
-
@st.cache_resource
|
62 |
-
def load_ner_model():
|
63 |
-
"""Loads the GLiNER model and caches it."""
|
64 |
-
try:
|
65 |
-
return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True)
|
66 |
-
except Exception as e:
|
67 |
-
st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
|
68 |
-
st.stop()
|
69 |
-
|
70 |
-
model = load_ner_model()
|
71 |
-
|
72 |
-
# --- Label Definitions ---
|
73 |
-
labels = [
|
74 |
-
|
75 |
-
"Product", "Service", "Organization", "Company",
|
76 |
-
"Currency",
|
77 |
-
"City", "Country", "Region", "Market", "Store", "Shop",
|
78 |
-
"Customer_segment", "Demographics", "Target_market", "Market_segment",
|
79 |
-
"Fiscal_period", "Timeframe", "Date",
|
80 |
-
"Campaign", "Advertisement", "Event",
|
81 |
-
"Media_platform", "Media_channel", "Social_media_platform"
|
82 |
-
]
|
83 |
-
|
84 |
-
# Create a mapping dictionary for labels to categories
|
85 |
-
category_mapping = {
|
86 |
-
|
87 |
-
"Product & Service Details ": ["Product", "Service", "Organization", "Company"],
|
88 |
-
"Financial Details" :["Currency"],
|
89 |
-
"Location & Geographic Information": ["City", "Country", "Region", "Market", "Store", "Shop"],
|
90 |
-
"Customer & Market Segments ": ["Customer_segment", "Demographics", "Target_market", "Market_segment"],
|
91 |
-
"Time-Based Information": ["Fiscal_period", "Timeframe", "Date"],
|
92 |
-
"Marketing & Campaign Details": ["Campaign", "Advertisement", "Event"],
|
93 |
-
"Digital & Media Information": ["Media_platform", "Media_channel", "Social_media_platform"]
|
94 |
-
}
|
95 |
-
# Flatten the mapping to a single dictionary
|
96 |
-
reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
|
97 |
-
|
98 |
-
# --- Text Input and Clear Button ---
|
99 |
-
text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
|
100 |
-
|
101 |
-
def clear_text():
|
102 |
-
"""Clears the text area."""
|
103 |
-
st.session_state['my_text_area'] = ""
|
104 |
-
|
105 |
-
st.button("Clear text", on_click=clear_text)
|
106 |
-
st.divider()
|
107 |
-
|
108 |
-
# --- Results Section ---
|
109 |
-
if st.button("Results"):
|
110 |
-
start_time = time.time()
|
111 |
-
if not text.strip():
|
112 |
-
st.warning("Please enter some text to extract entities.")
|
113 |
-
else:
|
114 |
-
with st.spinner("Extracting entities...", show_time=True):
|
115 |
-
entities = model.predict_entities(text, labels)
|
116 |
-
df = pd.DataFrame(entities)
|
117 |
-
|
118 |
-
if not df.empty:
|
119 |
-
df['category'] = df['label'].map(reverse_category_mapping)
|
120 |
-
|
121 |
-
if comet_initialized:
|
122 |
-
experiment = Experiment(
|
123 |
-
api_key=COMET_API_KEY,
|
124 |
-
workspace=COMET_WORKSPACE,
|
125 |
-
project_name=COMET_PROJECT_NAME,
|
126 |
-
)
|
127 |
-
experiment.log_parameter("input_text", text)
|
128 |
-
experiment.log_table("predicted_entities", df)
|
129 |
-
|
130 |
-
st.subheader("Extracted Entities", divider = "orange")
|
131 |
-
st.dataframe(df.style.set_properties(**{"border": "2px solid gray", "color": "blue", "font-size": "16px"}))
|
132 |
-
|
133 |
-
with st.expander("See Glossary of tags"):
|
134 |
-
st.write('''
|
135 |
-
- **text**: ['entity extracted from your text data']
|
136 |
-
- **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
|
137 |
-
- **label**: ['label (tag) assigned to a given extracted entity']
|
138 |
-
- **category**: ['the high-level category for the label']
|
139 |
-
- **start**: ['index of the start of the corresponding entity']
|
140 |
-
- **end**: ['index of the end of the corresponding entity']
|
141 |
-
''')
|
142 |
-
|
143 |
-
st.divider()
|
144 |
-
|
145 |
-
|
146 |
-
# Tree map
|
147 |
-
st.subheader("Tree map", divider = "orange")
|
148 |
-
fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
|
149 |
-
fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
|
150 |
-
st.plotly_chart(fig_treemap)
|
151 |
-
|
152 |
-
# Pie and Bar charts
|
153 |
-
grouped_counts = df['category'].value_counts().reset_index()
|
154 |
-
grouped_counts.columns = ['category', 'count']
|
155 |
-
|
156 |
-
col1, col2 = st.columns(2)
|
157 |
-
with col1:
|
158 |
-
st.subheader("Pie chart", divider = "orange")
|
159 |
-
fig_pie = px.pie(grouped_counts, values='count', names='category',
|
160 |
-
hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
|
161 |
-
fig_pie.update_traces(textposition='inside', textinfo='percent+label')
|
162 |
-
st.plotly_chart(fig_pie)
|
163 |
-
|
164 |
-
with col2:
|
165 |
-
st.subheader("Bar chart", divider = "orange")
|
166 |
-
fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True,
|
167 |
-
title='Occurrences of predicted categories')
|
168 |
-
st.plotly_chart(fig_bar)
|
169 |
-
|
170 |
-
# Most Frequent Entities
|
171 |
-
st.subheader("Most Frequent Entities", divider="orange")
|
172 |
-
word_counts = df['text'].value_counts().reset_index()
|
173 |
-
word_counts.columns = ['Entity', 'Count']
|
174 |
-
repeating_entities = word_counts[word_counts['Count'] > 1]
|
175 |
-
if not repeating_entities.empty:
|
176 |
-
st.dataframe(repeating_entities, use_container_width=True)
|
177 |
-
fig_repeating_bar = px.bar(repeating_entities, x='Entity', y='Count', color='Entity')
|
178 |
-
fig_repeating_bar.update_layout(xaxis={'categoryorder': 'total descending'})
|
179 |
-
st.plotly_chart(fig_repeating_bar)
|
180 |
-
else:
|
181 |
-
st.warning("No entities were found that occur more than once.")
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
# Download Section
|
189 |
-
st.divider()
|
190 |
-
|
191 |
-
dfa = pd.DataFrame(
|
192 |
-
data={
|
193 |
-
'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
|
194 |
-
'Description': [
|
195 |
-
'entity extracted from your text data',
|
196 |
-
'label (tag) assigned to a given extracted entity',
|
197 |
-
'accuracy score; how accurately a tag has been assigned to a given entity',
|
198 |
-
'index of the start of the corresponding entity',
|
199 |
-
'index of the end of the corresponding entity',
|
200 |
-
'the broader category the entity belongs to',
|
201 |
-
]
|
202 |
-
}
|
203 |
-
)
|
204 |
-
|
205 |
-
buf = io.BytesIO()
|
206 |
-
with zipfile.ZipFile(buf, "w") as myzip:
|
207 |
-
myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
|
208 |
-
myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
|
209 |
-
|
210 |
-
with stylable_container(
|
211 |
-
key="download_button",
|
212 |
-
css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",
|
213 |
-
):
|
214 |
-
st.download_button(
|
215 |
-
label="Download results and glossary (zip)",
|
216 |
-
data=buf.getvalue(),
|
217 |
-
file_name="markettag_results.zip",
|
218 |
-
mime="application/zip",
|
219 |
-
)
|
220 |
-
|
221 |
-
if comet_initialized:
|
222 |
-
experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
|
223 |
-
experiment.end()
|
224 |
-
|
225 |
-
else: # If df is empty
|
226 |
-
st.warning("No entities were found in the provided text.")
|
227 |
-
|
228 |
-
end_time = time.time()
|
229 |
-
elapsed_time = end_time - start_time
|
230 |
-
|
231 |
-
st.text("")
|
232 |
-
st.text("")
|
233 |
-
st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|