Spaces:

AIEcosystem
/

AcademiaMiner

Sleeping

App Files Files Community

AIEcosystem commited on 1 day ago

Commit

19caa3e

verified ·

1 Parent(s): 19b756c

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +346 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,348 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+import os
+os.environ['HF_HOME'] = '/tmp'
+import time
 import streamlit as st
+import pandas as pd
+import io
+import plotly.express as px
+import zipfile
+import json
+from cryptography.fernet import Fernet
+from streamlit_extras.stylable_container import stylable_container
+from typing import Optional
+from gliner import GLiNER
+from comet_ml import Experiment
+from transformers import pipeline
+st.markdown(
+    """
+    <style>
+    /* Main app background with a subtle rainbow gradient */
+    .stApp {
+        background: linear-gradient(135deg, #f0f8ff, #f5f0ff, #fff0f5);
+        color: #000000;
+        font-family: 'Inter', sans-serif;
+    }
+    /* Rainbow gradient for the sidebar */
+    .css-1d36184, .css-1d36184:hover, .css-1d36184:focus {
+        background: linear-gradient(180deg, #FFC0CB, #FFD700, #98FB98, #ADD8E6, #BA55D3);
+        secondary-background-color: #FFC080;
+    }
+    /* Expander background color with a slight transparency */
+    .streamlit-expanderContent {
+        background-color: rgba(255, 255, 255, 0.7);
+        border-radius: 10px;
+    }
+    /* Expander header with a gentle gradient and bold text */
+    .streamlit-expanderHeader {
+        background: linear-gradient(90deg, #FADADD, #FFF9E0, #E0FFF8);
+        border-radius: 10px;
+        font-weight: bold;
+    }
+    /* Text Area with a light background and subtle border */
+    .stTextArea textarea {
+        background-color: #FFF0F5;
+        color: #000000;
+        border: 1px solid #ccc;
+        border-radius: 8px;
+    }
+    /* Button with a solid color and elegant hover effect */
+    .stButton > button {
+        background-color: #FF69B4;
+        color: #FFFFFF;
+        font-weight: bold;
+        border-radius: 12px;
+        transition: all 0.2s ease-in-out;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+    .stButton > button:hover {
+        background-color: #FFB6C1;
+        box-shadow: 0 6px 8px rgba(0, 0, 0, 0.15);
+        transform: translateY(-2px);
+    }
+    /* Warning box with a soft orange and rounded corners */
+    .stAlert.st-warning {
+        background-color: #FFDDAA;
+        color: #000000;
+        border-radius: 10px;
+        border-left: 5px solid #FFA500;
+    }
+    /* Success box with a fresh green and rounded corners */
+    .stAlert.st-success {
+        background-color: #D4EDDA;
+        color: #155724;
+        border-radius: 10px;
+        border-left: 5px solid #28A745;
+    }
+    /* Custom CSS to make the title text rainbow-colored */
+    h1 {
+        background: linear-gradient(45deg, #FF69B4, #FFD700, #00FF7F, #00BFFF, #8A2BE2);
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+        font-size: 3em;
+        font-weight: 800;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+st.set_page_config(
+    layout="wide",
+    page_title="English Keyphrase"
+)
+# --- Comet ML Setup ---
+COMET_API_KEY = os.environ.get("COMET_API_KEY")
+COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
+COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
+comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
+if not comet_initialized:
+    st.warning("Comet ML not initialized. Check environment variables.")
+# --- UI Header and Notes ---
+st.subheader("AcademiaMiner", divider="rainbow")
+st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
+expander = st.expander("**Important notes*")
+expander.write('''
+**Named Entities:** This AcademiaMiner extracts keyphrases from English academic and scientific papers.
+Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
+**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
+**Usage Limits:** You can request results unlimited times for one (1) month.
+**Supported Languages:** English
+**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
+For any errors or inquiries, please contact us at [email protected]'''
+)
+with st.sidebar:
+    st.write("Use the following code to embed the AcademiaMiner web app on your website. Feel free to adjust the width and height values to fit your page.")
+    code = '''
+    <iframe
+	src="https://aiecosystem-business-core.hf.space"
+	frameborder="0"
+	width="850"
+	height="450"
+    ></iframe>
+    '''
+    st.code(code, language="html")
+    st.text("")
+    st.text("")
+    st.divider()
+    st.subheader("🚀 Ready to build your own NER Web App?", divider="rainbow")
+    st.link_button("NER Builder", "https://nlpblogs.com", type="primary")
+@st.cache_resource
+def load_ner_model():
+    """Loads the GLiNER model and caches it."""
+    try:
+        return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints= labels)
+    except Exception as e:
+        st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
+        st.stop()
+model = load_ner_model()
+@st.cache_resource
+def load_ner_model():
+    return pipeline("token-classification",
+                    model="ml6team/keyphrase-extraction-kbir-inspec",
+                    aggregation_strategy="max",
+                    stride=128,
+                    ignore_labels=["O"])
+model = load_ner_model()
+text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
+def clear_text():
+    """Clears the text area."""
+    st.session_state['my_text_area'] = ""
+st.button("Clear text", on_click=clear_text)
+if st.button("Results"):
+    start_time = time.time()
+    if not text.strip():
+        st.warning("Please enter some text to extract entities.")
+    else:
+        with st.spinner("Analyzing text...", show_time=True):
+            entities = model(text_for_ner)
+            data = []
+            if entities:
+                for entity in entities:
+                    if all(k in entity for k in ['word', 'entity_group', 'score', 'start', 'end']):
+                        data.append({
+                                    'word': entity['word'],
+                                    'entity_group': entity['entity_group'],
+                                    'score': entity['score'],
+                                    'start': entity['start'],
+                                    'end': entity['end']
+                                })
+                    else:
+                        st.warning(f"Skipping malformed entity encountered: {entity}. Missing expected keys.")
+                        df = pd.DataFrame(data)
+                    else:
+                        df = pd.DataFrame(columns=['word', 'entity_group', 'score', 'start', 'end'])
+                    if not df.empty:
+                        pattern = r'[^\w\s]'
+                        df['word'] = df['word'].replace(pattern, '', regex=True)
+                        df = df.replace('', 'Unknown')
+                        st.subheader("All Extracted Keyphrases", divider="rainbow")
+                        st.dataframe(df, use_container_width=True)
+                        with st.expander("See Glossary of tags"):
+                            st.write('''
+                            **word**: ['entity extracted from your text data']
+                            **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
+                            **entity_group**: ['label (tag) assigned to a given extracted entity']
+                            **start**: ['index of the start of the corresponding entity']
+                            **end**: ['index of the end of the corresponding entity']
+                            ''')
+                        st.divider()
+                        st.subheader("Most Frequent Keyphrases", divider="rainbow")
+                        word_counts = df['word'].value_counts().reset_index()
+                        word_counts.columns = ['word', 'count']
+                        df_frequent = word_counts.sort_values(by='count', ascending=False).head(15)
+                        if not df_frequent.empty:
+                            tab1, tab2 = st.tabs(["Table", "Chart"])
+                            with tab1:
+                                st.dataframe(df_frequent, use_container_width=True)
+                            with tab2:
+                                fig_frequent_bar = px.bar(
+                                    df_frequent,
+                                    x='count',
+                                    y='word',
+                                    orientation='h',
+                                    title='Top Frequent Keyphrases by Count',
+                                    color='count',
+                                    color_continuous_scale=px.colors.sequential.Viridis
+                                )
+                                fig_frequent_bar.update_layout(yaxis={'categoryorder':'total ascending'})
+                                st.plotly_chart(fig_frequent_bar, use_container_width=True)
+                                if comet_initialized and 'experiment' in locals():
+                                    experiment.log_figure(figure=fig_frequent_bar, figure_name="frequent_keyphrases_bar_chart")
+                        else:
+                            st.info("No keyphrases found with more than one occurrence to display in tabs.")
+                        st.divider()
+                        experiment = None
+                        if comet_initialized:
+                            experiment = Experiment(
+                                api_key=COMET_API_KEY,
+                                workspace=COMET_WORKSPACE,
+                                project_name=COMET_PROJECT_NAME,
+                            )
+                            experiment.log_parameter("input_source_type", source_type)
+                            experiment.log_parameter("input_content_length", len(text_for_ner))
+                            experiment.log_table("predicted_entities", df)
+                        st.subheader("Treemap of All Keyphrases", divider="rainbow")
+                        fig_treemap = px.treemap(
+                            df,
+                            path=[px.Constant("all"), 'entity_group', 'word'],
+                            values='score',
+                            color='word',
+                            color_continuous_scale=px.colors.sequential.Plasma
+                        )
+                        fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
+                        st.plotly_chart(fig_treemap, use_container_width=True)
+                        if comet_initialized and experiment:
+                            experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
+                        # --- Download Section ---
+                        dfa = pd.DataFrame(
+                            data={
+                                'Column Name': ['word', 'entity_group', 'score', 'start', 'end'],
+                                'Description': [
+                                    'entity extracted from your text data',
+                                    'label (tag) assigned to a given extracted entity',
+                                    'accuracy score; how accurately a tag has been assigned to a given entity',
+                                    'index of the start of the corresponding entity',
+                                    'index of the end of the corresponding entity'
+                                ]
+                            }
+                        )
+                        buf = io.BytesIO()
+                        with zipfile.ZipFile(buf, "w") as myzip:
+                            if not df.empty:
+                                myzip.writestr("Summary_of_results.csv", df.to_csv(index=False))
+                                myzip.writestr("Most_frequent_keyphrases.csv", df_frequent.to_csv(index=False))
+                            myzip.writestr("Glossary_of_tags.csv", dfa.to_csv(index=False))
+                        with stylable_container(
+                            key="download_button",
+                            css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",
+                        ):
+                            st.download_button(
+                                label="Download zip file",
+                                data=buf.getvalue(),
+                                file_name="nlpblogs_ner_results.zip",
+                                mime="application/zip",
+                            )
+                        st.divider()
+                    else:
+                        st.warning("No entities found to generate visualizations.")
+            else:
+                st.warning("No meaningful text found to process. Please enter a URL, upload a text file, or type/paste text.")
+        except Exception as e:
+            st.error(f"An unexpected error occurred during processing: {e}")
+        finally:
+            if comet_initialized and experiment is not None:
+                try:
+                    experiment.end()
+                except Exception as comet_e:
+                    st.warning(f"Comet ML experiment.end() failed: {comet_e}")
+            if start_time_overall is not None:
+                end_time_overall = time.time()
+                elapsed_time_overall = end_time_overall - start_time_overall
+                st.info(f"Results processed in **{elapsed_time_overall:.2f} seconds**.")
+            st.write(f"Number of times you requested results: **{st.session_state['source_type_attempts']}/{max_attempts}**")
+    else:
+        st.warning("Please enter some text, a URL, or upload a file to analyze.")