|
import streamlit as st |
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
import numpy as np |
|
import requests |
|
from datetime import datetime |
|
import time |
|
import os |
|
import boto3 |
|
|
|
|
|
|
|
st.set_page_config( |
|
page_title="Offensive Speech Recognition", |
|
page_icon="⚠️", |
|
layout="wide" |
|
) |
|
|
|
|
|
|
|
|
|
session = boto3.Session( |
|
aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"), |
|
aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"), |
|
) |
|
|
|
|
|
s3 = session.resource("s3") |
|
bucket_name = 'llepogam-app-history' |
|
bucket = s3.Bucket(bucket_name) |
|
|
|
|
|
|
|
HISTORY_FILE = "https://llepogam-app-history.s3.eu-north-1.amazonaws.com/history.csv" |
|
|
|
|
|
|
|
def save_history(): |
|
"""Save history to S3""" |
|
try: |
|
history_df = pd.DataFrame(st.session_state.history) |
|
|
|
history_df.to_csv("/tmp/temp_history.csv", index=False) |
|
|
|
bucket.upload_file("/tmp/temp_history.csv", "history.csv") |
|
|
|
os.remove("/tmp/temp_history.csv") |
|
except Exception as e: |
|
st.error(f"Error saving history to S3: {str(e)}") |
|
|
|
def load_history(): |
|
"""Load history from S3""" |
|
try: |
|
|
|
bucket.download_file("history.csv", "/tmp/temp_history.csv") |
|
|
|
history_df = pd.read_csv("/tmp/temp_history.csv") |
|
|
|
os.remove("/tmp/temp_history.csv") |
|
return history_df.to_dict('records') |
|
except Exception as e: |
|
st.error(f"Error loading history from S3: {str(e)}") |
|
return [] |
|
|
|
if 'history' not in st.session_state: |
|
st.session_state.history = load_history() |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.prediction-box { |
|
padding: 20px; |
|
border-radius: 5px; |
|
margin: 10px 0; |
|
} |
|
.high-severity { |
|
background-color: rgba(255, 0, 0, 0.1); |
|
border: 1px solid red; |
|
} |
|
.medium-severity { |
|
background-color: rgba(255, 165, 0, 0.1); |
|
border: 1px solid orange; |
|
} |
|
.low-severity { |
|
background-color: rgba(0, 255, 0, 0.1); |
|
border: 1px solid green; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
def hate_speech_detection(text): |
|
"""Make API call with error handling""" |
|
url = "https://llepogam-hate-speech-detection-api.hf.space/predict" |
|
headers = { |
|
"accept": "application/json", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
try: |
|
response = requests.post( |
|
url, |
|
headers=headers, |
|
json={"Text": text}, |
|
timeout=200 |
|
) |
|
response.raise_for_status() |
|
return response.json(), None |
|
except requests.exceptions.Timeout: |
|
return None, "API request timed out. Please try again." |
|
except requests.exceptions.RequestException as e: |
|
return None, f"API error: {str(e)}" |
|
except Exception as e: |
|
return None, f"Unexpected error: {str(e)}" |
|
|
|
def get_severity_class(probability): |
|
"""Determine severity class based on probability""" |
|
if probability > 0.7: |
|
return "high-severity" |
|
elif probability > 0.4: |
|
return "medium-severity" |
|
return "low-severity" |
|
|
|
|
|
st.title("🚫 Offensive Speech Detection") |
|
st.markdown(""" |
|
This application helps identify potentially offensive content in text provided by an user. |
|
|
|
It uses a trained neural network to analyze text and determine if it contains offensive speech. |
|
|
|
|
|
**How it works:** |
|
1. Enter your text in the input box below |
|
2. The model will analyze the content and provide a prediction based on the model |
|
3. Results show both the classification and value predicted by the model |
|
4. The results is saved in the prediction history |
|
""") |
|
|
|
|
|
|
|
with st.expander("❓ Frequently Asked Questions"): |
|
st.markdown(""" |
|
**Q: What is considered offensive speech?** |
|
- A: The model is using a dataset of tweets, which were tagged as offensive or not. More information on the dataset can be found here : https://huggingface.co/datasets/christophsonntag/OLID |
|
|
|
**Q: What type of model it is?** |
|
- A: It is a neural network with an initial preprocessing, a vectorization, an embedding layers and GRU layers |
|
|
|
**Q: How is the prediction done?** |
|
- A: The model predicts a value between 1 and 0. The closer it is to 1, the more offensive is the prediction. When the prediction is higher than 0.5, the text is considered as offensive |
|
|
|
**Q: How accurate is the detection?** |
|
- A: The model created has an accuracy of 73.1%, which means than prediction are correct almost 3 times out of four. When the targeted values is below 0.3 or higher than 0.7, it means than there is a high level of confidence in the prediction |
|
|
|
""") |
|
|
|
|
|
|
|
if st.button("Clear Input", key="clear_button"): |
|
st.session_state.user_input = "" |
|
|
|
|
|
max_chars = 500 |
|
user_input = st.text_area( |
|
"Enter text to analyze:", |
|
height=100, |
|
key="user_input", |
|
help="Enter the text you want to analyze for offensive content. Maximum 500 characters.", |
|
max_chars=max_chars |
|
) |
|
|
|
|
|
chars_remaining = max_chars - len(user_input) |
|
st.caption(f"Characters remaining: {chars_remaining}") |
|
|
|
|
|
|
|
|
|
if user_input: |
|
if len(user_input.strip()) == 0: |
|
st.warning("Please enter some text to analyze.") |
|
else: |
|
with st.spinner("Analyzing text..."): |
|
result, error = hate_speech_detection(user_input) |
|
|
|
if error: |
|
st.error(f"Error: {error}") |
|
else: |
|
|
|
probability = result['probability'] |
|
|
|
|
|
severity_class = get_severity_class(result['probability']) |
|
|
|
if result['prediction'] == 'offensive': |
|
final_prediction = "Offensive" |
|
else : |
|
final_prediction = "Not Offensive" |
|
|
|
st.markdown(f""" |
|
<div class="prediction-box {severity_class}"> |
|
<h3>Analysis Results</h3> |
|
<p><strong>Prediction:</strong> {final_prediction}</p> |
|
<p><strong>Prediction Value:</strong> {probability:.2f}</p> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
fig = go.Figure(go.Indicator( |
|
mode = "gauge+number", |
|
value = probability, |
|
title = {'text': "Confidence Level"}, |
|
number = {'valueformat': '.2f'}, |
|
gauge = { |
|
'axis': {'range': [0, 1]}, |
|
'bar': {'color': "darkblue"}, |
|
'steps': [ |
|
{'range': [0, 0.3], 'color': "lightgreen"}, |
|
{'range': [0.3, 0.7], 'color': "orange"}, |
|
{'range': [0.7, 1], 'color': "red"} |
|
] |
|
} |
|
)) |
|
fig.update_layout(height=300) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
st.session_state.history.append({ |
|
'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), |
|
'text': user_input, |
|
'prediction': final_prediction, |
|
'prediction_value': probability |
|
}) |
|
save_history() |
|
|
|
|
|
if st.session_state.history: |
|
with st.expander("📜 Analysis History"): |
|
history_df = pd.DataFrame(st.session_state.history) |
|
history_df_output = (history_df |
|
.sort_values('timestamp', ascending=False) |
|
.head(20)) |
|
st.dataframe( |
|
history_df_output, |
|
column_config={ |
|
"timestamp": "Time", |
|
"text": "Input Text", |
|
"prediction": "Prediction", |
|
"prediction_value": st.column_config.NumberColumn( |
|
"Prediction Value", |
|
format="%.2f" |
|
) |
|
}, |
|
hide_index=True |
|
) |
|
|
|
|
|
|
|
|
|
st.markdown("---") |
|
st.markdown(""" |
|
<div style='text-align: center'> |
|
<p>Developed with ❤️ by Louis Le Pogam</p> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|