import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go import numpy as np import requests from datetime import datetime import time import os import boto3 ### Config st.set_page_config( page_title="Offensive Speech Recognition", page_icon="⚠️", layout="wide" ) # Initialize AWS session with credentials from Hugging Face secrets session = boto3.Session( aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"), aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"), ) # Initialize S3 resource s3 = session.resource("s3") bucket_name = 'llepogam-app-history' bucket = s3.Bucket(bucket_name) # File path for history HISTORY_FILE = "https://llepogam-app-history.s3.eu-north-1.amazonaws.com/history.csv" def save_history(): """Save history to S3""" try: history_df = pd.DataFrame(st.session_state.history) # Save to temporary file first history_df.to_csv("/tmp/temp_history.csv", index=False) # Upload to S3 bucket.upload_file("/tmp/temp_history.csv", "history.csv") # Clean up temp file os.remove("/tmp/temp_history.csv") except Exception as e: st.error(f"Error saving history to S3: {str(e)}") def load_history(): """Load history from S3""" try: # Download from S3 to temporary file bucket.download_file("history.csv", "/tmp/temp_history.csv") # Read the CSV history_df = pd.read_csv("/tmp/temp_history.csv") # Clean up temp file os.remove("/tmp/temp_history.csv") return history_df.to_dict('records') except Exception as e: st.error(f"Error loading history from S3: {str(e)}") return [] if 'history' not in st.session_state: st.session_state.history = load_history() # Custom CSS st.markdown(""" """, unsafe_allow_html=True) def hate_speech_detection(text): """Make API call with error handling""" url = "https://llepogam-hate-speech-detection-api.hf.space/predict" headers = { "accept": "application/json", "Content-Type": "application/json" } try: response = requests.post( url, headers=headers, json={"Text": text}, timeout=200 ) response.raise_for_status() return response.json(), None except requests.exceptions.Timeout: return None, "API request timed out. Please try again." except requests.exceptions.RequestException as e: return None, f"API error: {str(e)}" except Exception as e: return None, f"Unexpected error: {str(e)}" def get_severity_class(probability): """Determine severity class based on probability""" if probability > 0.7: return "high-severity" elif probability > 0.4: return "medium-severity" return "low-severity" # Header Section st.title("🚫 Offensive Speech Detection") st.markdown(""" This application helps identify potentially offensive content in text provided by an user. It uses a trained neural network to analyze text and determine if it contains offensive speech. **How it works:** 1. Enter your text in the input box below 2. The model will analyze the content and provide a prediction based on the model 3. Results show both the classification and value predicted by the model 4. The results is saved in the prediction history """) # FAQ Section with st.expander("❓ Frequently Asked Questions"): st.markdown(""" **Q: What is considered offensive speech?** - A: The model is using a dataset of tweets, which were tagged as offensive or not. More information on the dataset can be found here : https://huggingface.co/datasets/christophsonntag/OLID **Q: What type of model it is?** - A: It is a neural network with an initial preprocessing, a vectorization, an embedding layers and GRU layers **Q: How is the prediction done?** - A: The model predicts a value between 1 and 0. The closer it is to 1, the more offensive is the prediction. When the prediction is higher than 0.5, the text is considered as offensive **Q: How accurate is the detection?** - A: The model created has an accuracy of 73.1%, which means than prediction are correct almost 3 times out of four. When the targeted values is below 0.3 or higher than 0.7, it means than there is a high level of confidence in the prediction """) # Clear button - must come BEFORE the text_area widget if st.button("Clear Input", key="clear_button"): st.session_state.user_input = "" # Text Input Section max_chars = 500 user_input = st.text_area( "Enter text to analyze:", height=100, key="user_input", help="Enter the text you want to analyze for offensive content. Maximum 500 characters.", max_chars=max_chars ) # Character counter chars_remaining = max_chars - len(user_input) st.caption(f"Characters remaining: {chars_remaining}") # Process input if user_input: if len(user_input.strip()) == 0: st.warning("Please enter some text to analyze.") else: with st.spinner("Analyzing text..."): result, error = hate_speech_detection(user_input) if error: st.error(f"Error: {error}") else: # Format probability as percentage probability = result['probability'] # Create prediction box with appropriate severity class severity_class = get_severity_class(result['probability']) if result['prediction'] == 'offensive': final_prediction = "Offensive" else : final_prediction = "Not Offensive" st.markdown(f"""

Analysis Results

Prediction: {final_prediction}

Prediction Value: {probability:.2f}

""", unsafe_allow_html=True) # Confidence meter using Plotly fig = go.Figure(go.Indicator( mode = "gauge+number", value = probability, title = {'text': "Confidence Level"}, number = {'valueformat': '.2f'}, gauge = { 'axis': {'range': [0, 1]}, 'bar': {'color': "darkblue"}, 'steps': [ {'range': [0, 0.3], 'color': "lightgreen"}, {'range': [0.3, 0.7], 'color': "orange"}, {'range': [0.7, 1], 'color': "red"} ] } )) fig.update_layout(height=300) st.plotly_chart(fig, use_container_width=True) # Add to history st.session_state.history.append({ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'text': user_input, 'prediction': final_prediction, 'prediction_value': probability }) save_history() # History Section if st.session_state.history: with st.expander("📜 Analysis History"): history_df = pd.DataFrame(st.session_state.history) history_df_output = (history_df .sort_values('timestamp', ascending=False) .head(20)) st.dataframe( history_df_output, column_config={ "timestamp": "Time", "text": "Input Text", "prediction": "Prediction", "prediction_value": st.column_config.NumberColumn( "Prediction Value", format="%.2f" ) }, hide_index=True ) # Footer st.markdown("---") st.markdown("""

Developed with ❤️ by Louis Le Pogam

""", unsafe_allow_html=True)