File size: 6,772 Bytes
3b293de
0dd0885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d33a2b8
0dd0885
009a568
0dd0885
 
d33a2b8
0dd0885
d33a2b8
 
0dd0885
d33a2b8
 
 
 
 
0dd0885
d33a2b8
 
 
 
 
0dd0885
d33a2b8
 
0dd0885
d33a2b8
0dd0885
d33a2b8
0dd0885
d33a2b8
 
 
0dd0885
 
 
ac15b3a
0dd0885
7946909
d33a2b8
0dd0885
 
d33a2b8
0dd0885
 
 
d33a2b8
0dd0885
 
 
 
 
d33a2b8
0dd0885
d33a2b8
0dd0885
 
 
 
 
 
 
d33a2b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0dd0885
ac15b3a
 
 
d33a2b8
 
 
 
 
 
 
 
 
 
 
 
 
 
3b293de
0dd0885
ac15b3a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import streamlit as st
from cryptography.fernet import Fernet
import time
import io
from transformers import pipeline
from streamlit_extras.stylable_container import stylable_container
import json
from PyPDF2 import PdfReader
import docx

st.subheader("Named Entity Recognition (NER)", divider="red")

# generate Fernet key
if 'fernet_key' not in st.session_state:
    st.session_state.fernet_key = Fernet.generate_key()
key = st.session_state.fernet_key

# function for generating and validating fernet key
def generate_fernet_token(key, data):
    fernet = Fernet(key)
    token = fernet.encrypt(data.encode())
    return token

def validate_fernet_token(key, token, ttl_seconds):
    fernet = Fernet(key)
    try:
        decrypted_data = fernet.decrypt(token, ttl=ttl_seconds).decode()
        return decrypted_data, None
    except Exception as e:
        return None, f"Expired token: {e}"

# sidebar
with st.sidebar:
    st.button("DEMO APP")
    expander = st.expander("**Important notes on the Demo Named Entity Recognition (NER) App**")
    expander.write('''
        **Supported File Formats**
    This app accepts files in .pdf and .docx formats.

        **How to Use**
    Upload your file first. Then, click the 'Results' button.

        **Usage Limits**
    You can request results up to 5 times.

        **Subscription Management**
    This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own Named Entity Recognition (NER) Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app within five business days. If you wish to delete your Account with us, please contact us at [email protected]

        **Authorization**
    For security purposes, your authorization access expires hourly. To restore access, click the "Request Authorization" button.

        **Customization**
    To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.

        **File Handling and Errors**
    The app may display an error message if your file is corrupt, or has other errors.

    For any errors or inquiries, please contact us at [email protected]
    ''')

# count attempts based on file upload
if 'file_upload_attempts_ner' not in st.session_state:
    st.session_state['file_upload_attempts_ner'] = 0
max_attempts = 5

# upload file
upload_file_ner = st.file_uploader("Upload your file. Accepted file formats include: .pdf, .docx", type=['pdf', 'docx'])
text = None
if upload_file_ner is not None:
    file_extension = upload_file_ner.name.split('.')[-1].lower()
    if file_extension == 'pdf':
        try:
            pdf_reader = PdfReader(upload_file_ner)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text()
            st.write("Extracted Text:")
            st.write(text)
        except Exception as e:
            st.error(f"An error occurred while reading PDF: {e}")
    elif file_extension == 'docx':
        try:
            doc = docx.Document(upload_file_ner)
            text = "\n".join([para.text for para in doc.paragraphs])
            st.write("Extracted Text:")
            st.write(text)
        except Exception as e:
            st.error(f"An error occurred while reading docx: {e}")
    else:
        st.warning("Unsupported file type.")
        st.stop()

    if st.button("Results", key="results_ner"):
        if st.session_state['file_upload_attempts_ner'] >= max_attempts:
            st.error(f"You have requested results {max_attempts} times. You have reached your daily request limit.")
            st.stop()
        st.session_state['file_upload_attempts_ner'] += 1
        if upload_file_ner and text:  # Ensure text is available before processing
            with st.spinner('Processing for Named Entities...'):
                try:
                    pipe_ner = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") # Using a standard NER pipeline
                    ner_results = pipe_ner(text)
                    st.write("**Named Entity Recognition Results**: ")
                    st.write(ner_results)
                    # You can further process and display the results in a more user-friendly way

                    # Example of generating a token (you might want to tokenize the NER results)
                    # st.session_state.fernet_token_ner = generate_fernet_token(key, json.dumps(ner_results))
                    # st.download_button(
                    #     label="Download NER Results (JSON)",
                    #     data=json.dumps(ner_results),
                    #     file_name="ner_results.json",
                    #     mime="application/json",
                    #     on_click=None,
                    #     type="primary",
                    #     use_container_width=True,
                    #     disabled=not ner_results
                    # )
                except Exception as e:
                    st.error(f"An unexpected error occurred during NER processing: {e}")
        elif not upload_file_ner:
            st.warning("Please upload a file first.")
        elif not text:
            st.warning("Could not extract text from the uploaded file.")

elif 'fernet_token_ner' in st.session_state:
    del st.session_state['fernet_token_ner']

# The following section seems to be related to a translation feature that is not fully defined (get_translation_pipeline_en_el, key_ner).
# Since the app is for NER, I'll comment out this part to avoid errors. If you need translation as well, you'll need to define those elements.
# decrypted_data_streamlit_ner, error_streamlit_ner = validate_fernet_token(key_ner, st.session_state.fernet_token_ner if 'fernet_token_ner' in st.session_state else None, ttl_seconds=3600)
# if error_streamlit_ner:
#     if 'translated_text_ner' in locals():
#         st.warning("Please press Request Authorization.")
#         if st.button("Request Authorization", key="request_auth_ner"):
#             st.session_state.fernet_token_ner = generate_fernet_token(key_ner, translated_text_ner)
#             st.success("Authorization granted")
#             decrypted_data_streamlit_ner, error_streamlit_ner = validate_fernet_token(key_ner, st.session_state.fernet_token_ner, ttl=3600)
#         if error_streamlit_ner:
#             st.error(f"Your authorization has expired: {error_streamlit_ner}")
#             st.stop()
#         st.divider()

st.divider()
st.write(f"Number of times you requested results: {st.session_state['file_upload_attempts_ner']}/{max_attempts}")
st.divider()