File size: 8,740 Bytes
43ecbbe 58ef32f 76ff9fc d75f942 76ff9fc 43ecbbe 945f037 43ecbbe d75f942 486f34e d75f942 76ff9fc d75f942 76ff9fc d75f942 76ff9fc d75f942 76ff9fc d75f942 58ef32f 43ecbbe 58ef32f 43ecbbe 58ef32f 0a98e8e 58ef32f 43ecbbe 58ef32f 43ecbbe 58ef32f 465defd b802176 ce9d6b8 b802176 43ecbbe 58ef32f ce9d6b8 465defd 58ef32f 43ecbbe 58ef32f b802176 ce9d6b8 b802176 43ecbbe 58ef32f b802176 58ef32f da2e635 58ef32f 465defd 58ef32f 465defd 58ef32f 43ecbbe 58ef32f da2e635 58ef32f 77b66e4 58ef32f 77b66e4 58ef32f 77b66e4 029fde9 58ef32f 77b66e4 58ef32f 029fde9 58ef32f 77b66e4 58ef32f b802176 58ef32f 77b66e4 76ff9fc 58ef32f 76ff9fc 43ecbbe 58ef32f 21af307 58ef32f 21af307 58ef32f c30652a 029fde9 58ef32f 76ff9fc 43ecbbe 58ef32f b802176 58ef32f 43ecbbe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 |
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import requests
from datetime import datetime
import time
import os
import boto3
### Config
st.set_page_config(
page_title="Offensive Speech Recognition",
page_icon="β οΈ",
layout="wide"
)
# Initialize AWS session with credentials from Hugging Face secrets
session = boto3.Session(
aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
)
# Initialize S3 resource
s3 = session.resource("s3")
bucket_name = 'llepogam-app-history'
bucket = s3.Bucket(bucket_name)
# File path for history
HISTORY_FILE = "https://llepogam-app-history.s3.eu-north-1.amazonaws.com/history.csv"
def save_history():
"""Save history to S3"""
try:
history_df = pd.DataFrame(st.session_state.history)
# Save to temporary file first
history_df.to_csv("/tmp/temp_history.csv", index=False)
# Upload to S3
bucket.upload_file("/tmp/temp_history.csv", "history.csv")
# Clean up temp file
os.remove("/tmp/temp_history.csv")
except Exception as e:
st.error(f"Error saving history to S3: {str(e)}")
def load_history():
"""Load history from S3"""
try:
# Download from S3 to temporary file
bucket.download_file("history.csv", "/tmp/temp_history.csv")
# Read the CSV
history_df = pd.read_csv("/tmp/temp_history.csv")
# Clean up temp file
os.remove("/tmp/temp_history.csv")
return history_df.to_dict('records')
except Exception as e:
st.error(f"Error loading history from S3: {str(e)}")
return []
if 'history' not in st.session_state:
st.session_state.history = load_history()
# Custom CSS
st.markdown("""
<style>
.prediction-box {
padding: 20px;
border-radius: 5px;
margin: 10px 0;
}
.high-severity {
background-color: rgba(255, 0, 0, 0.1);
border: 1px solid red;
}
.medium-severity {
background-color: rgba(255, 165, 0, 0.1);
border: 1px solid orange;
}
.low-severity {
background-color: rgba(0, 255, 0, 0.1);
border: 1px solid green;
}
</style>
""", unsafe_allow_html=True)
def hate_speech_detection(text):
"""Make API call with error handling"""
url = "https://llepogam-hate-speech-detection-api.hf.space/predict"
headers = {
"accept": "application/json",
"Content-Type": "application/json"
}
try:
response = requests.post(
url,
headers=headers,
json={"Text": text},
timeout=200
)
response.raise_for_status()
return response.json(), None
except requests.exceptions.Timeout:
return None, "API request timed out. Please try again."
except requests.exceptions.RequestException as e:
return None, f"API error: {str(e)}"
except Exception as e:
return None, f"Unexpected error: {str(e)}"
def get_severity_class(probability):
"""Determine severity class based on probability"""
if probability > 0.7:
return "high-severity"
elif probability > 0.4:
return "medium-severity"
return "low-severity"
# Header Section
st.title("π« Offensive Speech Detection")
st.markdown("""
This application helps identify potentially offensive content in text provided by an user.
It uses a trained neural network to analyze text and determine if it contains offensive speech.
**How it works:**
1. Enter your text in the input box below
2. The model will analyze the content and provide a prediction based on the model
3. Results show both the classification and value predicted by the model
4. The results is saved in the prediction history
""")
# FAQ Section
with st.expander("β Frequently Asked Questions"):
st.markdown("""
**Q: What is considered offensive speech?**
- A: The model is using a dataset of tweets, which were tagged as offensive or not. More information on the dataset can be found here : https://huggingface.co/datasets/christophsonntag/OLID
**Q: What type of model it is?**
- A: It is a neural network with an initial preprocessing, a vectorization, an embedding layers and GRU layers
**Q: How is the prediction done?**
- A: The model predicts a value between 1 and 0. The closer it is to 1, the more offensive is the prediction. When the prediction is higher than 0.5, the text is considered as offensive
**Q: How accurate is the detection?**
- A: The model created has an accuracy of 73.1%, which means than prediction are correct almost 3 times out of four. When the targeted values is below 0.3 or higher than 0.7, it means than there is a high level of confidence in the prediction
""")
# Clear button - must come BEFORE the text_area widget
if st.button("Clear Input", key="clear_button"):
st.session_state.user_input = ""
# Text Input Section
max_chars = 500
user_input = st.text_area(
"Enter text to analyze:",
height=100,
key="user_input",
help="Enter the text you want to analyze for offensive content. Maximum 500 characters.",
max_chars=max_chars
)
# Character counter
chars_remaining = max_chars - len(user_input)
st.caption(f"Characters remaining: {chars_remaining}")
# Process input
if user_input:
if len(user_input.strip()) == 0:
st.warning("Please enter some text to analyze.")
else:
with st.spinner("Analyzing text..."):
result, error = hate_speech_detection(user_input)
if error:
st.error(f"Error: {error}")
else:
# Format probability as percentage
probability = result['probability']
# Create prediction box with appropriate severity class
severity_class = get_severity_class(result['probability'])
if result['prediction'] == 'offensive':
final_prediction = "Offensive"
else :
final_prediction = "Not Offensive"
st.markdown(f"""
<div class="prediction-box {severity_class}">
<h3>Analysis Results</h3>
<p><strong>Prediction:</strong> {final_prediction}</p>
<p><strong>Prediction Value:</strong> {probability:.2f}</p>
</div>
""", unsafe_allow_html=True)
# Confidence meter using Plotly
fig = go.Figure(go.Indicator(
mode = "gauge+number",
value = probability,
title = {'text': "Confidence Level"},
number = {'valueformat': '.2f'},
gauge = {
'axis': {'range': [0, 1]},
'bar': {'color': "darkblue"},
'steps': [
{'range': [0, 0.3], 'color': "lightgreen"},
{'range': [0.3, 0.7], 'color': "orange"},
{'range': [0.7, 1], 'color': "red"}
]
}
))
fig.update_layout(height=300)
st.plotly_chart(fig, use_container_width=True)
# Add to history
st.session_state.history.append({
'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'text': user_input,
'prediction': final_prediction,
'prediction_value': probability
})
save_history()
# History Section
if st.session_state.history:
with st.expander("π Analysis History"):
history_df = pd.DataFrame(st.session_state.history)
history_df_output = (history_df
.sort_values('timestamp', ascending=False)
.head(20))
st.dataframe(
history_df_output,
column_config={
"timestamp": "Time",
"text": "Input Text",
"prediction": "Prediction",
"prediction_value": st.column_config.NumberColumn(
"Prediction Value",
format="%.2f"
)
},
hide_index=True
)
# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center'>
<p>Developed with β€οΈ by Louis Le Pogam</p>
</div>
""", unsafe_allow_html=True)
|