Spaces:

llepogam
/

hate-speech-recognition-app

Sleeping

File size: 8,740 Bytes

43ecbbe
 
 
 
 
 
58ef32f
 
76ff9fc
d75f942
76ff9fc
43ecbbe
 
 
945f037
43ecbbe
 
 
 
d75f942
 
 
 
486f34e
 
d75f942
 
 
 
 
 
 
 
76ff9fc
 
 
d75f942
76ff9fc
 
d75f942
76ff9fc
 
d75f942
 
 
 
 
 
76ff9fc
d75f942
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58ef32f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43ecbbe
 
58ef32f
43ecbbe
 
 
 
 
 
58ef32f
 
 
 
 
0a98e8e
58ef32f
 
 
 
 
 
 
 
 
43ecbbe
58ef32f
 
 
 
 
 
 
43ecbbe
58ef32f
 
 
465defd
b802176
ce9d6b8
b802176
43ecbbe
58ef32f
 
ce9d6b8
 
465defd
58ef32f
43ecbbe
 
58ef32f
 
 
 
b802176
 
ce9d6b8
 
 
b802176
 
43ecbbe
58ef32f
b802176
58ef32f
 
 
da2e635
 
 
 
 
58ef32f
465defd
58ef32f
 
 
 
465defd
58ef32f
 
43ecbbe
58ef32f
 
 
 
da2e635
58ef32f
 
 
 
 
 
 
 
 
 
 
 
 
77b66e4
58ef32f
 
 
 
77b66e4
 
 
 
 
58ef32f
 
 
77b66e4
029fde9
58ef32f
 
 
 
 
 
77b66e4
58ef32f
029fde9
58ef32f
77b66e4
58ef32f
 
b802176
 
 
58ef32f
 
 
 
 
 
 
 
 
 
77b66e4
76ff9fc
58ef32f
76ff9fc
43ecbbe
58ef32f
 
 
 
21af307
 
 
58ef32f
21af307
58ef32f
 
 
 
c30652a
 
029fde9
58ef32f
 
 
 
 
76ff9fc
43ecbbe
58ef32f
 
 
 
b802176
58ef32f
 
43ecbbe

import streamlit as st
import pandas as pd
import plotly.express as px 
import plotly.graph_objects as go
import numpy as np
import requests
from datetime import datetime
import time
import os
import boto3


### Config
st.set_page_config(
    page_title="Offensive Speech Recognition",
    page_icon="⚠️",
    layout="wide"
)



# Initialize AWS session with credentials from Hugging Face secrets
session = boto3.Session(
    aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
)

# Initialize S3 resource
s3 = session.resource("s3")
bucket_name = 'llepogam-app-history'
bucket = s3.Bucket(bucket_name)


# File path for history
HISTORY_FILE = "https://llepogam-app-history.s3.eu-north-1.amazonaws.com/history.csv"

   

def save_history():
    """Save history to S3"""
    try:
        history_df = pd.DataFrame(st.session_state.history)
        # Save to temporary file first
        history_df.to_csv("/tmp/temp_history.csv", index=False)
        # Upload to S3
        bucket.upload_file("/tmp/temp_history.csv", "history.csv")
        # Clean up temp file
        os.remove("/tmp/temp_history.csv")
    except Exception as e:
        st.error(f"Error saving history to S3: {str(e)}")

def load_history():
    """Load history from S3"""
    try:
        # Download from S3 to temporary file
        bucket.download_file("history.csv", "/tmp/temp_history.csv")
        # Read the CSV
        history_df = pd.read_csv("/tmp/temp_history.csv")
        # Clean up temp file
        os.remove("/tmp/temp_history.csv")
        return history_df.to_dict('records')
    except Exception as e:
        st.error(f"Error loading history from S3: {str(e)}")
        return []
    
if 'history' not in st.session_state:
    st.session_state.history = load_history()

# Custom CSS
st.markdown("""
    <style>
    .prediction-box {
        padding: 20px;
        border-radius: 5px;
        margin: 10px 0;
    }
    .high-severity {
        background-color: rgba(255, 0, 0, 0.1);
        border: 1px solid red;
    }
    .medium-severity {
        background-color: rgba(255, 165, 0, 0.1);
        border: 1px solid orange;
    }
    .low-severity {
        background-color: rgba(0, 255, 0, 0.1);
        border: 1px solid green;
    }
    </style>
""", unsafe_allow_html=True)


def hate_speech_detection(text):
    """Make API call with error handling"""
    url = "https://llepogam-hate-speech-detection-api.hf.space/predict"
    headers = {
        "accept": "application/json",
        "Content-Type": "application/json"
    }

    try:
        response = requests.post(
            url, 
            headers=headers, 
            json={"Text": text},
            timeout=200
        )
        response.raise_for_status()
        return response.json(), None
    except requests.exceptions.Timeout:
        return None, "API request timed out. Please try again."
    except requests.exceptions.RequestException as e:
        return None, f"API error: {str(e)}"
    except Exception as e:
        return None, f"Unexpected error: {str(e)}"

def get_severity_class(probability):
    """Determine severity class based on probability"""
    if probability > 0.7:
        return "high-severity"
    elif probability > 0.4:
        return "medium-severity"
    return "low-severity"

# Header Section
st.title("🚫 Offensive Speech Detection")
st.markdown("""
This application helps identify potentially offensive content in text provided by an user. 

It uses a trained neural network to analyze text and determine if it contains offensive speech. 


**How it works:**
1. Enter your text in the input box below
2. The model will analyze the content and provide a prediction based on the model
3. Results show both the classification and value predicted by the model
4. The results is saved in the prediction history
""")


# FAQ Section
with st.expander("❓ Frequently Asked Questions"):
    st.markdown("""
    **Q: What is considered offensive speech?**
    - A: The model is using a dataset of tweets, which were tagged as offensive or not. More information on the dataset can be found here : https://huggingface.co/datasets/christophsonntag/OLID

    **Q: What type of model it is?**
    - A: It is a neural network with an initial preprocessing, a vectorization, an embedding layers and GRU layers 
                
    **Q: How is the prediction done?**
    - A: The model predicts a value between 1 and 0. The closer it is to 1, the more offensive is the prediction.  When the prediction is higher than 0.5, the text is considered as offensive

    **Q: How accurate is the detection?**
    - A: The model created has an accuracy of 73.1%, which means than prediction are correct almost 3 times out of four. When the targeted values is below 0.3 or higher than 0.7, it means than there is a high level of confidence in the prediction 

    """)


# Clear button - must come BEFORE the text_area widget
if st.button("Clear Input", key="clear_button"):
    st.session_state.user_input = ""

# Text Input Section
max_chars = 500 
user_input = st.text_area(
    "Enter text to analyze:",
    height=100,
    key="user_input",
    help="Enter the text you want to analyze for offensive content. Maximum 500 characters.",
    max_chars=max_chars
)

# Character counter
chars_remaining = max_chars - len(user_input)
st.caption(f"Characters remaining: {chars_remaining}")



# Process input
if user_input:
    if len(user_input.strip()) == 0:
        st.warning("Please enter some text to analyze.")
    else:
        with st.spinner("Analyzing text..."):
            result, error = hate_speech_detection(user_input)
            
            if error:
                st.error(f"Error: {error}")
            else:
                # Format probability as percentage
                probability = result['probability']
                
                # Create prediction box with appropriate severity class
                severity_class = get_severity_class(result['probability'])
                
                if result['prediction'] == 'offensive':
                    final_prediction = "Offensive"
                else : 
                    final_prediction = "Not Offensive"

                st.markdown(f"""
                <div class="prediction-box {severity_class}">
                    <h3>Analysis Results</h3>
                    <p><strong>Prediction:</strong> {final_prediction}</p>
                    <p><strong>Prediction Value:</strong> {probability:.2f}</p>
                </div>
                """, unsafe_allow_html=True)
                
                # Confidence meter using Plotly
                fig = go.Figure(go.Indicator(
                    mode = "gauge+number",
                    value = probability,
                    title = {'text': "Confidence Level"},
                    number = {'valueformat': '.2f'}, 
                    gauge = {
                        'axis': {'range': [0, 1]},
                        'bar': {'color': "darkblue"},
                        'steps': [
                            {'range': [0, 0.3], 'color': "lightgreen"},
                            {'range': [0.3, 0.7], 'color': "orange"},
                            {'range': [0.7, 1], 'color': "red"}
                        ]
                    }
                ))
                fig.update_layout(height=300)
                st.plotly_chart(fig, use_container_width=True)
                
                # Add to history
                st.session_state.history.append({
                    'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    'text': user_input,
                    'prediction': final_prediction,
                    'prediction_value': probability
                })
                save_history()

# History Section
if st.session_state.history:
    with st.expander("📜 Analysis History"):
        history_df = pd.DataFrame(st.session_state.history)
        history_df_output = (history_df
                     .sort_values('timestamp', ascending=False)
                     .head(20))
        st.dataframe(
            history_df_output,
            column_config={
                "timestamp": "Time",
                "text": "Input Text",
                "prediction": "Prediction",
                "prediction_value": st.column_config.NumberColumn(
                    "Prediction Value",
                    format="%.2f"
                )
            },
            hide_index=True
        )
        


# Footer
st.markdown("---")
st.markdown("""
    <div style='text-align: center'>
        <p>Developed with ❤️ by Louis Le Pogam</p>
    </div>
""", unsafe_allow_html=True)