translation-languages / src /streamlit_app.py
nlpblogs's picture
Update src/streamlit_app.py
d33a2b8 verified
raw
history blame
6.77 kB
import streamlit as st
from cryptography.fernet import Fernet
import time
import io
from transformers import pipeline
from streamlit_extras.stylable_container import stylable_container
import json
from PyPDF2 import PdfReader
import docx
st.subheader("Named Entity Recognition (NER)", divider="red")
# generate Fernet key
if 'fernet_key' not in st.session_state:
st.session_state.fernet_key = Fernet.generate_key()
key = st.session_state.fernet_key
# function for generating and validating fernet key
def generate_fernet_token(key, data):
fernet = Fernet(key)
token = fernet.encrypt(data.encode())
return token
def validate_fernet_token(key, token, ttl_seconds):
fernet = Fernet(key)
try:
decrypted_data = fernet.decrypt(token, ttl=ttl_seconds).decode()
return decrypted_data, None
except Exception as e:
return None, f"Expired token: {e}"
# sidebar
with st.sidebar:
st.button("DEMO APP")
expander = st.expander("**Important notes on the Demo Named Entity Recognition (NER) App**")
expander.write('''
**Supported File Formats**
This app accepts files in .pdf and .docx formats.
**How to Use**
Upload your file first. Then, click the 'Results' button.
**Usage Limits**
You can request results up to 5 times.
**Subscription Management**
This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own Named Entity Recognition (NER) Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app within five business days. If you wish to delete your Account with us, please contact us at [email protected]
**Authorization**
For security purposes, your authorization access expires hourly. To restore access, click the "Request Authorization" button.
**Customization**
To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
**File Handling and Errors**
The app may display an error message if your file is corrupt, or has other errors.
For any errors or inquiries, please contact us at [email protected]
''')
# count attempts based on file upload
if 'file_upload_attempts_ner' not in st.session_state:
st.session_state['file_upload_attempts_ner'] = 0
max_attempts = 5
# upload file
upload_file_ner = st.file_uploader("Upload your file. Accepted file formats include: .pdf, .docx", type=['pdf', 'docx'])
text = None
if upload_file_ner is not None:
file_extension = upload_file_ner.name.split('.')[-1].lower()
if file_extension == 'pdf':
try:
pdf_reader = PdfReader(upload_file_ner)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
st.write("Extracted Text:")
st.write(text)
except Exception as e:
st.error(f"An error occurred while reading PDF: {e}")
elif file_extension == 'docx':
try:
doc = docx.Document(upload_file_ner)
text = "\n".join([para.text for para in doc.paragraphs])
st.write("Extracted Text:")
st.write(text)
except Exception as e:
st.error(f"An error occurred while reading docx: {e}")
else:
st.warning("Unsupported file type.")
st.stop()
if st.button("Results", key="results_ner"):
if st.session_state['file_upload_attempts_ner'] >= max_attempts:
st.error(f"You have requested results {max_attempts} times. You have reached your daily request limit.")
st.stop()
st.session_state['file_upload_attempts_ner'] += 1
if upload_file_ner and text: # Ensure text is available before processing
with st.spinner('Processing for Named Entities...'):
try:
pipe_ner = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") # Using a standard NER pipeline
ner_results = pipe_ner(text)
st.write("**Named Entity Recognition Results**: ")
st.write(ner_results)
# You can further process and display the results in a more user-friendly way
# Example of generating a token (you might want to tokenize the NER results)
# st.session_state.fernet_token_ner = generate_fernet_token(key, json.dumps(ner_results))
# st.download_button(
# label="Download NER Results (JSON)",
# data=json.dumps(ner_results),
# file_name="ner_results.json",
# mime="application/json",
# on_click=None,
# type="primary",
# use_container_width=True,
# disabled=not ner_results
# )
except Exception as e:
st.error(f"An unexpected error occurred during NER processing: {e}")
elif not upload_file_ner:
st.warning("Please upload a file first.")
elif not text:
st.warning("Could not extract text from the uploaded file.")
elif 'fernet_token_ner' in st.session_state:
del st.session_state['fernet_token_ner']
# The following section seems to be related to a translation feature that is not fully defined (get_translation_pipeline_en_el, key_ner).
# Since the app is for NER, I'll comment out this part to avoid errors. If you need translation as well, you'll need to define those elements.
# decrypted_data_streamlit_ner, error_streamlit_ner = validate_fernet_token(key_ner, st.session_state.fernet_token_ner if 'fernet_token_ner' in st.session_state else None, ttl_seconds=3600)
# if error_streamlit_ner:
# if 'translated_text_ner' in locals():
# st.warning("Please press Request Authorization.")
# if st.button("Request Authorization", key="request_auth_ner"):
# st.session_state.fernet_token_ner = generate_fernet_token(key_ner, translated_text_ner)
# st.success("Authorization granted")
# decrypted_data_streamlit_ner, error_streamlit_ner = validate_fernet_token(key_ner, st.session_state.fernet_token_ner, ttl=3600)
# if error_streamlit_ner:
# st.error(f"Your authorization has expired: {error_streamlit_ner}")
# st.stop()
# st.divider()
st.divider()
st.write(f"Number of times you requested results: {st.session_state['file_upload_attempts_ner']}/{max_attempts}")
st.divider()