Spaces:

AIEcosystem
/

AcademiaMiner

Sleeping

App Files Files Community

AcademiaMiner / src /streamlit_app.py

AIEcosystem

Update src/streamlit_app.py

19caa3e verified 6 days ago

raw

history blame

14.2 kB

	import os
	os.environ['HF_HOME'] = '/tmp'
	import time
	import streamlit as st
	import pandas as pd
	import io
	import plotly.express as px
	import zipfile
	import json
	from cryptography.fernet import Fernet
	from streamlit_extras.stylable_container import stylable_container
	from typing import Optional
	from gliner import GLiNER
	from comet_ml import Experiment
	from transformers import pipeline




	st.markdown(
	"""
	<style>
	/* Main app background with a subtle rainbow gradient */
	.stApp {
	background: linear-gradient(135deg, #f0f8ff, #f5f0ff, #fff0f5);
	color: #000000;
	font-family: 'Inter', sans-serif;
	}

	/* Rainbow gradient for the sidebar */
	.css-1d36184, .css-1d36184:hover, .css-1d36184:focus {
	background: linear-gradient(180deg, #FFC0CB, #FFD700, #98FB98, #ADD8E6, #BA55D3);
	secondary-background-color: #FFC080;
	}

	/* Expander background color with a slight transparency */
	.streamlit-expanderContent {
	background-color: rgba(255, 255, 255, 0.7);
	border-radius: 10px;
	}

	/* Expander header with a gentle gradient and bold text */
	.streamlit-expanderHeader {
	background: linear-gradient(90deg, #FADADD, #FFF9E0, #E0FFF8);
	border-radius: 10px;
	font-weight: bold;
	}

	/* Text Area with a light background and subtle border */
	.stTextArea textarea {
	background-color: #FFF0F5;
	color: #000000;
	border: 1px solid #ccc;
	border-radius: 8px;
	}

	/* Button with a solid color and elegant hover effect */
	.stButton > button {
	background-color: #FF69B4;
	color: #FFFFFF;
	font-weight: bold;
	border-radius: 12px;
	transition: all 0.2s ease-in-out;
	box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
	}
	.stButton > button:hover {
	background-color: #FFB6C1;
	box-shadow: 0 6px 8px rgba(0, 0, 0, 0.15);
	transform: translateY(-2px);
	}

	/* Warning box with a soft orange and rounded corners */
	.stAlert.st-warning {
	background-color: #FFDDAA;
	color: #000000;
	border-radius: 10px;
	border-left: 5px solid #FFA500;
	}

	/* Success box with a fresh green and rounded corners */
	.stAlert.st-success {
	background-color: #D4EDDA;
	color: #155724;
	border-radius: 10px;
	border-left: 5px solid #28A745;
	}

	/* Custom CSS to make the title text rainbow-colored */
	h1 {
	background: linear-gradient(45deg, #FF69B4, #FFD700, #00FF7F, #00BFFF, #8A2BE2);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-size: 3em;
	font-weight: 800;
	}

	</style>
	""",
	unsafe_allow_html=True
	)


	st.set_page_config(
	layout="wide",
	page_title="English Keyphrase"
	)



	# --- Comet ML Setup ---
	COMET_API_KEY = os.environ.get("COMET_API_KEY")
	COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
	COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
	comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)

	if not comet_initialized:
	st.warning("Comet ML not initialized. Check environment variables.")





	# --- UI Header and Notes ---
	st.subheader("AcademiaMiner", divider="rainbow")
	st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")

	expander = st.expander("*Important notes")
	expander.write('''
	Named Entities: This AcademiaMiner extracts keyphrases from English academic and scientific papers.

	Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.

	How to Use: Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.

	Usage Limits: You can request results unlimited times for one (1) month.

	Supported Languages: English

	Technical issues: If your connection times out, please refresh the page or reopen the app's URL.

	For any errors or inquiries, please contact us at [email protected]'''
	)



	with st.sidebar:
	st.write("Use the following code to embed the AcademiaMiner web app on your website. Feel free to adjust the width and height values to fit your page.")
	code = '''
	<iframe
	src="https://aiecosystem-business-core.hf.space"
	frameborder="0"
	width="850"
	height="450"
	></iframe>
	'''
	st.code(code, language="html")
	st.text("")
	st.text("")
	st.divider()
	st.subheader("🚀 Ready to build your own NER Web App?", divider="rainbow")
	st.link_button("NER Builder", "https://nlpblogs.com", type="primary")


	@st.cache_resource
	def load_ner_model():
	"""Loads the GLiNER model and caches it."""
	try:
	return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints= labels)
	except Exception as e:
	st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
	st.stop()
	model = load_ner_model()


	@st.cache_resource
	def load_ner_model():
	return pipeline("token-classification",
	model="ml6team/keyphrase-extraction-kbir-inspec",
	aggregation_strategy="max",
	stride=128,
	ignore_labels=["O"])

	model = load_ner_model()



	text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')

	def clear_text():
	"""Clears the text area."""
	st.session_state['my_text_area'] = ""

	st.button("Clear text", on_click=clear_text)


	if st.button("Results"):
	start_time = time.time()
	if not text.strip():
	st.warning("Please enter some text to extract entities.")
	else:
	with st.spinner("Analyzing text...", show_time=True):
	entities = model(text_for_ner)
	data = []
	if entities:
	for entity in entities:
	if all(k in entity for k in ['word', 'entity_group', 'score', 'start', 'end']):
	data.append({
	'word': entity['word'],
	'entity_group': entity['entity_group'],
	'score': entity['score'],
	'start': entity['start'],
	'end': entity['end']
	})
	else:
	st.warning(f"Skipping malformed entity encountered: {entity}. Missing expected keys.")
	df = pd.DataFrame(data)
	else:
	df = pd.DataFrame(columns=['word', 'entity_group', 'score', 'start', 'end'])

	if not df.empty:
	pattern = r'[^\w\s]'
	df['word'] = df['word'].replace(pattern, '', regex=True)
	df = df.replace('', 'Unknown')

	st.subheader("All Extracted Keyphrases", divider="rainbow")
	st.dataframe(df, use_container_width=True)

	with st.expander("See Glossary of tags"):
	st.write('''
	word: ['entity extracted from your text data']

	score: ['accuracy score; how accurately a tag has been assigned to a given entity']

	entity_group: ['label (tag) assigned to a given extracted entity']

	start: ['index of the start of the corresponding entity']

	end: ['index of the end of the corresponding entity']

	''')
	st.divider()

	st.subheader("Most Frequent Keyphrases", divider="rainbow")
	word_counts = df['word'].value_counts().reset_index()
	word_counts.columns = ['word', 'count']
	df_frequent = word_counts.sort_values(by='count', ascending=False).head(15)

	if not df_frequent.empty:
	tab1, tab2 = st.tabs(["Table", "Chart"])

	with tab1:
	st.dataframe(df_frequent, use_container_width=True)

	with tab2:
	fig_frequent_bar = px.bar(
	df_frequent,
	x='count',
	y='word',
	orientation='h',
	title='Top Frequent Keyphrases by Count',
	color='count',
	color_continuous_scale=px.colors.sequential.Viridis
	)
	fig_frequent_bar.update_layout(yaxis={'categoryorder':'total ascending'})
	st.plotly_chart(fig_frequent_bar, use_container_width=True)

	if comet_initialized and 'experiment' in locals():
	experiment.log_figure(figure=fig_frequent_bar, figure_name="frequent_keyphrases_bar_chart")
	else:
	st.info("No keyphrases found with more than one occurrence to display in tabs.")

	st.divider()

	experiment = None
	if comet_initialized:
	experiment = Experiment(
	api_key=COMET_API_KEY,
	workspace=COMET_WORKSPACE,
	project_name=COMET_PROJECT_NAME,
	)
	experiment.log_parameter("input_source_type", source_type)
	experiment.log_parameter("input_content_length", len(text_for_ner))
	experiment.log_table("predicted_entities", df)

	st.subheader("Treemap of All Keyphrases", divider="rainbow")
	fig_treemap = px.treemap(
	df,
	path=[px.Constant("all"), 'entity_group', 'word'],
	values='score',
	color='word',
	color_continuous_scale=px.colors.sequential.Plasma
	)
	fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
	st.plotly_chart(fig_treemap, use_container_width=True)

	if comet_initialized and experiment:
	experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")

	# --- Download Section ---
	dfa = pd.DataFrame(
	data={
	'Column Name': ['word', 'entity_group', 'score', 'start', 'end'],
	'Description': [
	'entity extracted from your text data',
	'label (tag) assigned to a given extracted entity',
	'accuracy score; how accurately a tag has been assigned to a given entity',
	'index of the start of the corresponding entity',
	'index of the end of the corresponding entity'
	]
	}
	)
	buf = io.BytesIO()
	with zipfile.ZipFile(buf, "w") as myzip:
	if not df.empty:
	myzip.writestr("Summary_of_results.csv", df.to_csv(index=False))
	myzip.writestr("Most_frequent_keyphrases.csv", df_frequent.to_csv(index=False))
	myzip.writestr("Glossary_of_tags.csv", dfa.to_csv(index=False))

	with stylable_container(
	key="download_button",
	css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",
	):
	st.download_button(
	label="Download zip file",
	data=buf.getvalue(),
	file_name="nlpblogs_ner_results.zip",
	mime="application/zip",
	)
	st.divider()
	else:
	st.warning("No entities found to generate visualizations.")
	else:
	st.warning("No meaningful text found to process. Please enter a URL, upload a text file, or type/paste text.")
	except Exception as e:
	st.error(f"An unexpected error occurred during processing: {e}")
	finally:
	if comet_initialized and experiment is not None:
	try:
	experiment.end()
	except Exception as comet_e:
	st.warning(f"Comet ML experiment.end() failed: {comet_e}")
	if start_time_overall is not None:
	end_time_overall = time.time()
	elapsed_time_overall = end_time_overall - start_time_overall
	st.info(f"Results processed in {elapsed_time_overall:.2f} seconds.")
	st.write(f"Number of times you requested results: {st.session_state['source_type_attempts']}/{max_attempts}")
	else:
	st.warning("Please enter some text, a URL, or upload a file to analyze.")