# === Gradio Demo App: gradio_app.py === # This script creates a user-friendly web interface to demonstrate the # multimodal moderation capabilities of the main FastAPI server. # # It interacts with the /v3/moderations endpoint. # -------------------------------------------------------------------- import base64 import os import json import logging import gradio as gr import httpx from dotenv import load_dotenv # --- Configuration --- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") load_dotenv() # The URL of your running FastAPI server. # It's crucial to set this in your .env file for deployment. API_BASE_URL = os.environ.get("API_BASE_URL", "") MODERATION_ENDPOINT = f"{API_BASE_URL}/v3/moderations" # --- NEW: Full list of Whisper V3 supported languages --- # Mapping user-friendly names to ISO 639-1 codes WHISPER_LANGUAGES = { "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja", "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca", "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it", "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi", "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", "Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu", "Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur", "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la", "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk", "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn", "Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn", "Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu", "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn", "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw", "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si", "Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so", "Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be", "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am", "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo", "Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn", "Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Myanmar (Burmese)": "my", "Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as", "Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha", "Bashkir": "ba", "Javanese": "jw", "Sundanese": "su", } # Sort languages alphabetically for the dropdown SORTED_LANGUAGES = dict(sorted(WHISPER_LANGUAGES.items())) # --- Helper Function --- def file_to_base64(filepath: str) -> str: """Reads a file and converts it to a base64 encoded string.""" if not filepath: return None try: with open(filepath, "rb") as f: encoded_string = base64.b64encode(f.read()).decode("utf-8") return encoded_string except Exception as e: logging.error(f"Failed to convert file {filepath} to base64: {e}") return None # --- Core Logic --- def moderate_content(text_input, image_input, video_input, audio_input, language_full_name): """ Prepares the payload, calls the moderation API, and formats the response. """ if not any([text_input, image_input, video_input, audio_input]): return "Please provide at least one input (text, image, video, or audio).", None logging.info("Preparing payload for moderation API...") payload = { "model": "nai-moderation-latest" # This is the model name expected by our API } if text_input: payload["input"] = text_input # Gradio provides file paths; we need to convert them to base64 image_b64 = file_to_base64(image_input) if image_b64: payload["image"] = image_b64 video_b64 = file_to_base64(video_input) if video_b64: payload["video"] = video_b64 audio_b64 = file_to_base64(audio_input) if audio_b64: payload["voice"] = audio_b64 # --- NEW: Add selected language to the payload --- language_code = SORTED_LANGUAGES.get(language_full_name, "en") # Default to 'en' if not found payload["language"] = language_code logging.info(f"Audio detected. Using language: {language_full_name} ({language_code})") logging.info(f"Sending request to {MODERATION_ENDPOINT} with inputs: {list(payload.keys())}") summary_output = "An error occurred. Please check the logs." full_response_output = {} try: # Using a synchronous client is simpler for this Gradio function with httpx.Client(timeout=180.0) as client: response = client.post(MODERATION_ENDPOINT, json=payload) response.raise_for_status() # Raises an exception for 4xx/5xx errors data = response.json() full_response_output = data if not data.get("results"): summary_output = "API returned an empty result. This might happen if media processing fails (e.g., a video with no frames)." return summary_output, full_response_output # The v3 endpoint returns a single aggregated result result = data["results"][0] # Format a nice, human-readable summary status = "🚨 FLAGGED 🚨" if result["flagged"] else "✅ SAFE ✅" reason = result.get("reason") or "N/A" transcribed = result.get("transcribed_text") or "N/A" # Create a clean list of flagged categories flagged_categories = [cat for cat, flagged in result.get("categories", {}).items() if flagged] categories_str = ", ".join(flagged_categories) if flagged_categories else "None" summary_output = f""" **Moderation Status:** {status} --- **Reason:** {reason} --- **Flagged Categories:** {categories_str} --- **Transcribed Text (from audio):** {transcribed} """ logging.info("Successfully received and parsed moderation response.") except httpx.HTTPStatusError as e: error_details = e.response.text summary_output = f"HTTP Error: {e.response.status_code}\n\nCould not connect to the moderation service or the service returned an error.\n\nDetails:\n{error_details}" logging.error(f"HTTP Status Error: {error_details}") except httpx.RequestError as e: summary_output = f"Request Error: Could not connect to the API server at {API_BASE_URL}.\nPlease ensure the server is running and the URL is correct." logging.error(f"Request Error: {e}") except Exception as e: summary_output = f"An unexpected error occurred: {str(e)}" logging.error(f"Unexpected Error: {e}", exc_info=True) return summary_output, full_response_output # --- Gradio Interface --- with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo: gr.Markdown( """ # 🤖 Multimodal Content Moderation Demo This demo uses a custom API server to perform advanced content moderation. You can provide any combination of text, image, video, and audio. The system will analyze all inputs together. """ ) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 1. Provide Your Content") text_input = gr.Textbox(label="Text Input", lines=4, placeholder="Enter any text here...") image_input = gr.Image(label="Image Input", type="filepath") video_input = gr.Video(label="Video Input") audio_input = gr.Audio(label="Voice/Audio Input", type="filepath") # --- NEW: Language selection dropdown --- language_input = gr.Dropdown( label="Audio Language (if providing audio)", choices=list(SORTED_LANGUAGES.keys()), value="English", interactive=True ) submit_button = gr.Button("Moderate Content", variant="primary") with gr.Column(scale=2): gr.Markdown("### 2. See the Results") result_output = gr.Markdown(label="Moderation Summary") full_response_output = gr.JSON(label="Full API Response") submit_button.click( fn=moderate_content, # --- UPDATED: Add language_input to the list --- inputs=[text_input, image_input, video_input, audio_input, language_input], outputs=[result_output, full_response_output] ) gr.Examples( examples=[ ["This is a test of the system with safe text.", None, None, None, "English"], ["I am going to kill the process on my computer.", None, None, None, "English"], ], # --- UPDATED: Add language_input to the list --- inputs=[text_input, image_input, video_input, audio_input, language_input], outputs=[result_output, full_response_output], fn=moderate_content ) if __name__ == "__main__": logging.info(f"Connecting to API server at: {API_BASE_URL}") if API_BASE_URL == "http://127.0.0.1:8000": logging.warning("API_BASE_URL is set to the default local address. Make sure this is correct or set it in your .env file.") demo.launch(server_name="0.0.0.0", server_port=7860)