Spaces:

rkihacker
/

Multimodal-Moderation-Demo

Running

App Files Files Community

Multimodal-Moderation-Demo / app.py

rkihacker

Update app.py

6f48975 verified 17 days ago

raw

history blame

11.1 kB

	# === Gradio Demo App: gradio_app.py ===
	# This script creates a user-friendly web interface to demonstrate the
	# multimodal moderation capabilities of the main FastAPI server.
	#
	# It interacts with the /v3/moderations endpoint.
	# --------------------------------------------------------------------

	import base64
	import os
	import json
	import logging
	import time

	import gradio as gr
	import httpx
	from dotenv import load_dotenv

	# --- Configuration ---
	logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
	load_dotenv()

	# The URL of your running FastAPI server.
	# It's crucial to set this in your .env file for deployment.
	API_BASE_URL = os.environ.get("API_BASE_URL", "")
	MODERATION_ENDPOINT = f"{API_BASE_URL}/v3/moderations"

	# --- Full list of Whisper V3 supported languages ---
	# Mapping user-friendly names to ISO 639-1 codes
	WHISPER_LANGUAGES = {
	"English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", "Russian": "ru",
	"Korean": "ko", "French": "fr", "Japanese": "ja", "Portuguese": "pt", "Turkish": "tr",
	"Polish": "pl", "Catalan": "ca", "Dutch": "nl", "Arabic": "ar", "Swedish": "sv",
	"Italian": "it", "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
	"Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", "Czech": "cs",
	"Romanian": "ro", "Danish": "da", "Hungarian": "hu", "Tamil": "ta", "Norwegian": "no",
	"Thai": "th", "Urdu": "ur", "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt",
	"Latin": "la", "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
	"Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn", "Serbian": "sr",
	"Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn", "Estonian": "et", "Macedonian": "mk",
	"Breton": "br", "Basque": "eu", "Icelandic": "is", "Armenian": "hy", "Nepali": "ne",
	"Mongolian": "mn", "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
	"Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si", "Khmer": "km",
	"Shona": "sn", "Yoruba": "yo", "Somali": "so", "Afrikaans": "af", "Occitan": "oc",
	"Georgian": "ka", "Belarusian": "be", "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu",
	"Amharic": "am", "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
	"Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn", "Maltese": "mt",
	"Sanskrit": "sa", "Luxembourgish": "lb", "Myanmar (Burmese)": "my", "Tibetan": "bo",
	"Tagalog": "tl", "Malagasy": "mg", "Assamese": "as", "Tatar": "tt", "Hawaiian": "haw",
	"Lingala": "ln", "Hausa": "ha", "Bashkir": "ba", "Javanese": "jw", "Sundanese": "su",
	}
	# Sort languages alphabetically for the dropdown
	SORTED_LANGUAGES = dict(sorted(WHISPER_LANGUAGES.items()))


	# --- Helper Function ---
	def file_to_base64(filepath: str) -> str:
	"""Reads a file and converts it to a base64 encoded string."""
	if not filepath:
	return None
	try:
	with open(filepath, "rb") as f:
	encoded_string = base64.b64encode(f.read()).decode("utf-8")
	return encoded_string
	except Exception as e:
	logging.error(f"Failed to convert file {filepath} to base64: {e}")
	return None

	# --- Core Logic ---
	def moderate_content(text_input, image_input, video_input, audio_input, language_full_name):
	"""
	Prepares the payload, calls the moderation API, and formats the response.
	"""
	if not any([text_input, image_input, video_input, audio_input]):
	return "Please provide at least one input (text, image, video, or audio).", None

	logging.info("Preparing payload for moderation API...")
	payload = { "model": "nai-moderation-latest" }
	if text_input: payload["input"] = text_input
	if image_b64 := file_to_base64(image_input): payload["image"] = image_b64
	if video_b64 := file_to_base64(video_input): payload["video"] = video_b64
	if audio_b64 := file_to_base64(audio_input):
	payload["voice"] = audio_b64
	language_code = SORTED_LANGUAGES.get(language_full_name, "en")
	payload["language"] = language_code
	logging.info(f"Audio detected. Using language: {language_full_name} ({language_code})")

	logging.info(f"Sending request to {MODERATION_ENDPOINT} with inputs: {list(payload.keys())}")

	summary_output = "An error occurred. Please check the logs."
	full_response_output = {}
	latency_ms = None

	try:
	with httpx.Client(timeout=180.0) as client:
	start_time = time.monotonic()
	response = client.post(MODERATION_ENDPOINT, json=payload)
	latency_ms = (time.monotonic() - start_time) * 1000
	logging.info(f"API response received in {latency_ms:.2f} ms with status code {response.status_code}")

	response.raise_for_status()

	data = response.json()
	full_response_output = data # <-- MODIFIED: Assign raw data, without adding latency

	if not data.get("results"):
	summary_output = "API returned an empty result. This might happen if media processing fails (e.g., a video with no frames)."
	return summary_output, full_response_output

	result = data["results"][0]

	status = "🚨 FLAGGED 🚨" if result["flagged"] else "✅ SAFE ✅"
	reason = result.get("reason") or "N/A"
	transcribed = result.get("transcribed_text") or "N/A"
	flagged_categories = [cat for cat, flagged in result.get("categories", {}).items() if flagged]
	categories_str = ", ".join(flagged_categories) if flagged_categories else "None"

	summary_output = f"""
	API Latency: {latency_ms:.2f} ms
	---
	Moderation Status: {status}
	---
	Reason: {reason}
	---
	Flagged Categories: {categories_str}
	---
	Transcribed Text (from audio):
	{transcribed}
	"""
	logging.info("Successfully parsed moderation response.")

	except httpx.HTTPStatusError as e:
	user_message = "The moderation service returned an error."
	error_details = ""
	latency_str = f"API Latency: {latency_ms:.2f} ms" if latency_ms is not None else ""

	try:
	error_json = e.response.json()
	detail = error_json.get("detail", "No specific error detail provided.")
	error_details = f"Reason: {detail}"
	# <-- MODIFIED: Latency removed from this dictionary
	full_response_output = {"error": "Backend API Error", "status_code": e.response.status_code, "details": error_json}
	except (json.JSONDecodeError, AttributeError):
	error_details = f"Raw Server Response:\n```\n{e.response.text}\n```"
	# <-- MODIFIED: Latency removed from this dictionary
	full_response_output = {"error": "Backend API Error", "status_code": e.response.status_code, "details": e.response.text}

	summary_output = f"""
	🚫 Error from Moderation Service (HTTP {e.response.status_code})
	---
	{latency_str}

	{user_message}

	{error_details}
	"""
	logging.error(f"HTTP Status Error: {e.response.status_code} - Response: {e.response.text}")

	except httpx.RequestError as e:
	if latency_ms is None:
	latency_ms = (time.monotonic() - start_time) * 1000 if 'start_time' in locals() else 0

	summary_output = f"""
	🔌 Connection Error
	---
	Could not connect to the API server at `{API_BASE_URL}`. The request failed after {latency_ms:.0f} ms.

	Please ensure the backend server is running and the URL is configured correctly in your `.env` file.
	"""
	# <-- MODIFIED: Latency removed from this dictionary
	full_response_output = {"error": "Connection Error", "url": API_BASE_URL, "details": str(e)}
	logging.error(f"Request Error: Could not connect to {API_BASE_URL}. Details: {e}")

	except Exception as e:
	summary_output = f"""
	💥 An Unexpected Application Error Occurred
	---
	An error happened within the Gradio application itself.
	Please check the application logs for more details.

	Error Type: `{type(e).__name__}`
	"""
	full_response_output = {"error": "Gradio App Internal Error", "type": type(e).__name__, "details": str(e)}
	logging.error(f"Unexpected Error in Gradio App: {e}", exc_info=True)

	return summary_output, full_response_output

	# --- Gradio Interface ---
	with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
	gr.Markdown(
	"""
	# 🤖 Multimodal Content Moderation Demo
	This demo uses a custom API server to perform advanced content moderation.
	You can provide any combination of text, image, video, and audio. The system will analyze all inputs together.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 1. Provide Your Content")
	text_input = gr.Textbox(label="Text Input", lines=4, placeholder="Enter any text here...")
	image_input = gr.Image(label="Image Input", type="filepath")
	video_input = gr.Video(label="Video Input")
	audio_input = gr.Audio(label="Voice/Audio Input", type="filepath")

	language_input = gr.Dropdown(
	label="Audio Language (if providing audio)",
	choices=list(SORTED_LANGUAGES.keys()),
	value="English",
	interactive=True
	)

	submit_button = gr.Button("Moderate Content", variant="primary")

	with gr.Column(scale=2):
	gr.Markdown("### 2. See the Results")
	result_output = gr.Markdown(label="Moderation Summary")
	full_response_output = gr.JSON(label="Full API Response")

	submit_button.click(
	fn=moderate_content,
	inputs=[text_input, image_input, video_input, audio_input, language_input],
	outputs=[result_output, full_response_output]
	)

	gr.Examples(
	examples=[
	["This is a test of the system with safe text.", None, None, None, "English"],
	["I am going to kill the process on my computer.", None, None, None, "English"],
	],
	inputs=[text_input, image_input, video_input, audio_input, language_input],
	outputs=[result_output, full_response_output],
	fn=moderate_content
	)

	if __name__ == "__main__":
	logging.info(f"Connecting to API server at: {API_BASE_URL}")
	if API_BASE_URL == "http://127.0.0.1:8000":
	logging.warning("API_BASE_URL is set to the default local address. Make sure this is correct or set it in your .env file.")
	demo.launch(server_name="0.0.0.0", server_port=7860)