File size: 8,740 Bytes
43ecbbe
 
 
 
 
 
58ef32f
 
76ff9fc
d75f942
76ff9fc
43ecbbe
 
 
945f037
43ecbbe
 
 
 
d75f942
 
 
 
486f34e
 
d75f942
 
 
 
 
 
 
 
76ff9fc
 
 
d75f942
76ff9fc
 
d75f942
76ff9fc
 
d75f942
 
 
 
 
 
76ff9fc
d75f942
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58ef32f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43ecbbe
 
58ef32f
43ecbbe
 
 
 
 
 
58ef32f
 
 
 
 
0a98e8e
58ef32f
 
 
 
 
 
 
 
 
43ecbbe
58ef32f
 
 
 
 
 
 
43ecbbe
58ef32f
 
 
465defd
b802176
ce9d6b8
b802176
43ecbbe
58ef32f
 
ce9d6b8
 
465defd
58ef32f
43ecbbe
 
58ef32f
 
 
 
b802176
 
ce9d6b8
 
 
b802176
 
43ecbbe
58ef32f
b802176
58ef32f
 
 
da2e635
 
 
 
 
58ef32f
465defd
58ef32f
 
 
 
465defd
58ef32f
 
43ecbbe
58ef32f
 
 
 
da2e635
58ef32f
 
 
 
 
 
 
 
 
 
 
 
 
77b66e4
58ef32f
 
 
 
77b66e4
 
 
 
 
58ef32f
 
 
77b66e4
029fde9
58ef32f
 
 
 
 
 
77b66e4
58ef32f
029fde9
58ef32f
77b66e4
58ef32f
 
b802176
 
 
58ef32f
 
 
 
 
 
 
 
 
 
77b66e4
76ff9fc
58ef32f
76ff9fc
43ecbbe
58ef32f
 
 
 
21af307
 
 
58ef32f
21af307
58ef32f
 
 
 
c30652a
 
029fde9
58ef32f
 
 
 
 
76ff9fc
43ecbbe
58ef32f
 
 
 
b802176
58ef32f
 
43ecbbe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
import streamlit as st
import pandas as pd
import plotly.express as px 
import plotly.graph_objects as go
import numpy as np
import requests
from datetime import datetime
import time
import os
import boto3


### Config
st.set_page_config(
    page_title="Offensive Speech Recognition",
    page_icon="⚠️",
    layout="wide"
)



# Initialize AWS session with credentials from Hugging Face secrets
session = boto3.Session(
    aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
)

# Initialize S3 resource
s3 = session.resource("s3")
bucket_name = 'llepogam-app-history'
bucket = s3.Bucket(bucket_name)


# File path for history
HISTORY_FILE = "https://llepogam-app-history.s3.eu-north-1.amazonaws.com/history.csv"

   

def save_history():
    """Save history to S3"""
    try:
        history_df = pd.DataFrame(st.session_state.history)
        # Save to temporary file first
        history_df.to_csv("/tmp/temp_history.csv", index=False)
        # Upload to S3
        bucket.upload_file("/tmp/temp_history.csv", "history.csv")
        # Clean up temp file
        os.remove("/tmp/temp_history.csv")
    except Exception as e:
        st.error(f"Error saving history to S3: {str(e)}")

def load_history():
    """Load history from S3"""
    try:
        # Download from S3 to temporary file
        bucket.download_file("history.csv", "/tmp/temp_history.csv")
        # Read the CSV
        history_df = pd.read_csv("/tmp/temp_history.csv")
        # Clean up temp file
        os.remove("/tmp/temp_history.csv")
        return history_df.to_dict('records')
    except Exception as e:
        st.error(f"Error loading history from S3: {str(e)}")
        return []
    
if 'history' not in st.session_state:
    st.session_state.history = load_history()

# Custom CSS
st.markdown("""
    <style>
    .prediction-box {
        padding: 20px;
        border-radius: 5px;
        margin: 10px 0;
    }
    .high-severity {
        background-color: rgba(255, 0, 0, 0.1);
        border: 1px solid red;
    }
    .medium-severity {
        background-color: rgba(255, 165, 0, 0.1);
        border: 1px solid orange;
    }
    .low-severity {
        background-color: rgba(0, 255, 0, 0.1);
        border: 1px solid green;
    }
    </style>
""", unsafe_allow_html=True)


def hate_speech_detection(text):
    """Make API call with error handling"""
    url = "https://llepogam-hate-speech-detection-api.hf.space/predict"
    headers = {
        "accept": "application/json",
        "Content-Type": "application/json"
    }

    try:
        response = requests.post(
            url, 
            headers=headers, 
            json={"Text": text},
            timeout=200
        )
        response.raise_for_status()
        return response.json(), None
    except requests.exceptions.Timeout:
        return None, "API request timed out. Please try again."
    except requests.exceptions.RequestException as e:
        return None, f"API error: {str(e)}"
    except Exception as e:
        return None, f"Unexpected error: {str(e)}"

def get_severity_class(probability):
    """Determine severity class based on probability"""
    if probability > 0.7:
        return "high-severity"
    elif probability > 0.4:
        return "medium-severity"
    return "low-severity"

# Header Section
st.title("🚫 Offensive Speech Detection")
st.markdown("""
This application helps identify potentially offensive content in text provided by an user. 

It uses a trained neural network to analyze text and determine if it contains offensive speech. 


**How it works:**
1. Enter your text in the input box below
2. The model will analyze the content and provide a prediction based on the model
3. Results show both the classification and value predicted by the model
4. The results is saved in the prediction history
""")


# FAQ Section
with st.expander("❓ Frequently Asked Questions"):
    st.markdown("""
    **Q: What is considered offensive speech?**
    - A: The model is using a dataset of tweets, which were tagged as offensive or not. More information on the dataset can be found here : https://huggingface.co/datasets/christophsonntag/OLID

    **Q: What type of model it is?**
    - A: It is a neural network with an initial preprocessing, a vectorization, an embedding layers and GRU layers 
                
    **Q: How is the prediction done?**
    - A: The model predicts a value between 1 and 0. The closer it is to 1, the more offensive is the prediction.  When the prediction is higher than 0.5, the text is considered as offensive

    **Q: How accurate is the detection?**
    - A: The model created has an accuracy of 73.1%, which means than prediction are correct almost 3 times out of four. When the targeted values is below 0.3 or higher than 0.7, it means than there is a high level of confidence in the prediction 

    """)


# Clear button - must come BEFORE the text_area widget
if st.button("Clear Input", key="clear_button"):
    st.session_state.user_input = ""

# Text Input Section
max_chars = 500 
user_input = st.text_area(
    "Enter text to analyze:",
    height=100,
    key="user_input",
    help="Enter the text you want to analyze for offensive content. Maximum 500 characters.",
    max_chars=max_chars
)

# Character counter
chars_remaining = max_chars - len(user_input)
st.caption(f"Characters remaining: {chars_remaining}")



# Process input
if user_input:
    if len(user_input.strip()) == 0:
        st.warning("Please enter some text to analyze.")
    else:
        with st.spinner("Analyzing text..."):
            result, error = hate_speech_detection(user_input)
            
            if error:
                st.error(f"Error: {error}")
            else:
                # Format probability as percentage
                probability = result['probability']
                
                # Create prediction box with appropriate severity class
                severity_class = get_severity_class(result['probability'])
                
                if result['prediction'] == 'offensive':
                    final_prediction = "Offensive"
                else : 
                    final_prediction = "Not Offensive"

                st.markdown(f"""
                <div class="prediction-box {severity_class}">
                    <h3>Analysis Results</h3>
                    <p><strong>Prediction:</strong> {final_prediction}</p>
                    <p><strong>Prediction Value:</strong> {probability:.2f}</p>
                </div>
                """, unsafe_allow_html=True)
                
                # Confidence meter using Plotly
                fig = go.Figure(go.Indicator(
                    mode = "gauge+number",
                    value = probability,
                    title = {'text': "Confidence Level"},
                    number = {'valueformat': '.2f'}, 
                    gauge = {
                        'axis': {'range': [0, 1]},
                        'bar': {'color': "darkblue"},
                        'steps': [
                            {'range': [0, 0.3], 'color': "lightgreen"},
                            {'range': [0.3, 0.7], 'color': "orange"},
                            {'range': [0.7, 1], 'color': "red"}
                        ]
                    }
                ))
                fig.update_layout(height=300)
                st.plotly_chart(fig, use_container_width=True)
                
                # Add to history
                st.session_state.history.append({
                    'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    'text': user_input,
                    'prediction': final_prediction,
                    'prediction_value': probability
                })
                save_history()

# History Section
if st.session_state.history:
    with st.expander("πŸ“œ Analysis History"):
        history_df = pd.DataFrame(st.session_state.history)
        history_df_output = (history_df
                     .sort_values('timestamp', ascending=False)
                     .head(20))
        st.dataframe(
            history_df_output,
            column_config={
                "timestamp": "Time",
                "text": "Input Text",
                "prediction": "Prediction",
                "prediction_value": st.column_config.NumberColumn(
                    "Prediction Value",
                    format="%.2f"
                )
            },
            hide_index=True
        )
        


# Footer
st.markdown("---")
st.markdown("""
    <div style='text-align: center'>
        <p>Developed with ❀️ by Louis Le Pogam</p>
    </div>
""", unsafe_allow_html=True)