|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import base64 |
|
import os |
|
import json |
|
import logging |
|
import time |
|
|
|
import gradio as gr |
|
import httpx |
|
from dotenv import load_dotenv |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") |
|
load_dotenv() |
|
|
|
|
|
|
|
API_BASE_URL = os.environ.get("API_BASE_URL", "") |
|
MODERATION_ENDPOINT = f"{API_BASE_URL}/v3/moderations" |
|
|
|
|
|
|
|
WHISPER_LANGUAGES = { |
|
"English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", "Russian": "ru", |
|
"Korean": "ko", "French": "fr", "Japanese": "ja", "Portuguese": "pt", "Turkish": "tr", |
|
"Polish": "pl", "Catalan": "ca", "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", |
|
"Italian": "it", "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi", |
|
"Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", "Czech": "cs", |
|
"Romanian": "ro", "Danish": "da", "Hungarian": "hu", "Tamil": "ta", "Norwegian": "no", |
|
"Thai": "th", "Urdu": "ur", "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", |
|
"Latin": "la", "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk", |
|
"Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn", "Serbian": "sr", |
|
"Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn", "Estonian": "et", "Macedonian": "mk", |
|
"Breton": "br", "Basque": "eu", "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", |
|
"Mongolian": "mn", "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw", |
|
"Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si", "Khmer": "km", |
|
"Shona": "sn", "Yoruba": "yo", "Somali": "so", "Afrikaans": "af", "Occitan": "oc", |
|
"Georgian": "ka", "Belarusian": "be", "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", |
|
"Amharic": "am", "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo", |
|
"Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn", "Maltese": "mt", |
|
"Sanskrit": "sa", "Luxembourgish": "lb", "Myanmar (Burmese)": "my", "Tibetan": "bo", |
|
"Tagalog": "tl", "Malagasy": "mg", "Assamese": "as", "Tatar": "tt", "Hawaiian": "haw", |
|
"Lingala": "ln", "Hausa": "ha", "Bashkir": "ba", "Javanese": "jw", "Sundanese": "su", |
|
} |
|
|
|
SORTED_LANGUAGES = dict(sorted(WHISPER_LANGUAGES.items())) |
|
|
|
|
|
|
|
def file_to_base64(filepath: str) -> str: |
|
"""Reads a file and converts it to a base64 encoded string.""" |
|
if not filepath: |
|
return None |
|
try: |
|
with open(filepath, "rb") as f: |
|
encoded_string = base64.b64encode(f.read()).decode("utf-8") |
|
return encoded_string |
|
except Exception as e: |
|
logging.error(f"Failed to convert file {filepath} to base64: {e}") |
|
return None |
|
|
|
|
|
def moderate_content(text_input, image_input, video_input, audio_input, language_full_name): |
|
""" |
|
Prepares the payload, calls the moderation API, and formats the response. |
|
""" |
|
if not any([text_input, image_input, video_input, audio_input]): |
|
return "Please provide at least one input (text, image, video, or audio).", None |
|
|
|
logging.info("Preparing payload for moderation API...") |
|
payload = { "model": "nai-moderation-latest" } |
|
if text_input: payload["input"] = text_input |
|
if image_b64 := file_to_base64(image_input): payload["image"] = image_b64 |
|
if video_b64 := file_to_base64(video_input): payload["video"] = video_b64 |
|
if audio_b64 := file_to_base64(audio_input): |
|
payload["voice"] = audio_b64 |
|
language_code = SORTED_LANGUAGES.get(language_full_name, "en") |
|
payload["language"] = language_code |
|
logging.info(f"Audio detected. Using language: {language_full_name} ({language_code})") |
|
|
|
logging.info(f"Sending request to {MODERATION_ENDPOINT} with inputs: {list(payload.keys())}") |
|
|
|
summary_output = "An error occurred. Please check the logs." |
|
full_response_output = {} |
|
latency_ms = None |
|
|
|
try: |
|
with httpx.Client(timeout=180.0) as client: |
|
start_time = time.monotonic() |
|
response = client.post(MODERATION_ENDPOINT, json=payload) |
|
latency_ms = (time.monotonic() - start_time) * 1000 |
|
logging.info(f"API response received in {latency_ms:.2f} ms with status code {response.status_code}") |
|
|
|
response.raise_for_status() |
|
|
|
data = response.json() |
|
full_response_output = data |
|
|
|
if not data.get("results"): |
|
summary_output = "API returned an empty result. This might happen if media processing fails (e.g., a video with no frames)." |
|
return summary_output, full_response_output |
|
|
|
result = data["results"][0] |
|
|
|
status = "π¨ FLAGGED π¨" if result["flagged"] else "β
SAFE β
" |
|
reason = result.get("reason") or "N/A" |
|
transcribed = result.get("transcribed_text") or "N/A" |
|
flagged_categories = [cat for cat, flagged in result.get("categories", {}).items() if flagged] |
|
categories_str = ", ".join(flagged_categories) if flagged_categories else "None" |
|
|
|
summary_output = f""" |
|
**API Latency:** {latency_ms:.2f} ms |
|
--- |
|
**Moderation Status:** {status} |
|
--- |
|
**Reason:** {reason} |
|
--- |
|
**Flagged Categories:** {categories_str} |
|
--- |
|
**Transcribed Text (from audio):** |
|
{transcribed} |
|
""" |
|
logging.info("Successfully parsed moderation response.") |
|
|
|
except httpx.HTTPStatusError as e: |
|
user_message = "The moderation service returned an error." |
|
error_details = "" |
|
latency_str = f"**API Latency:** {latency_ms:.2f} ms" if latency_ms is not None else "" |
|
|
|
try: |
|
error_json = e.response.json() |
|
detail = error_json.get("detail", "No specific error detail provided.") |
|
error_details = f"**Reason:** {detail}" |
|
|
|
full_response_output = {"error": "Backend API Error", "status_code": e.response.status_code, "details": error_json} |
|
except (json.JSONDecodeError, AttributeError): |
|
error_details = f"**Raw Server Response:**\n```\n{e.response.text}\n```" |
|
|
|
full_response_output = {"error": "Backend API Error", "status_code": e.response.status_code, "details": e.response.text} |
|
|
|
summary_output = f""" |
|
**π« Error from Moderation Service (HTTP {e.response.status_code})** |
|
--- |
|
{latency_str} |
|
|
|
{user_message} |
|
|
|
{error_details} |
|
""" |
|
logging.error(f"HTTP Status Error: {e.response.status_code} - Response: {e.response.text}") |
|
|
|
except httpx.RequestError as e: |
|
if latency_ms is None: |
|
latency_ms = (time.monotonic() - start_time) * 1000 if 'start_time' in locals() else 0 |
|
|
|
summary_output = f""" |
|
**π Connection Error** |
|
--- |
|
Could not connect to the API server at `{API_BASE_URL}`. The request failed after {latency_ms:.0f} ms. |
|
|
|
Please ensure the backend server is running and the URL is configured correctly in your `.env` file. |
|
""" |
|
|
|
full_response_output = {"error": "Connection Error", "url": API_BASE_URL, "details": str(e)} |
|
logging.error(f"Request Error: Could not connect to {API_BASE_URL}. Details: {e}") |
|
|
|
except Exception as e: |
|
summary_output = f""" |
|
**π₯ An Unexpected Application Error Occurred** |
|
--- |
|
An error happened within the Gradio application itself. |
|
Please check the application logs for more details. |
|
|
|
**Error Type:** `{type(e).__name__}` |
|
""" |
|
full_response_output = {"error": "Gradio App Internal Error", "type": type(e).__name__, "details": str(e)} |
|
logging.error(f"Unexpected Error in Gradio App: {e}", exc_info=True) |
|
|
|
return summary_output, full_response_output |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo: |
|
gr.Markdown( |
|
""" |
|
# π€ Multimodal Content Moderation Demo |
|
This demo uses a custom API server to perform advanced content moderation. |
|
You can provide any combination of text, image, video, and audio. The system will analyze all inputs together. |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown("### 1. Provide Your Content") |
|
text_input = gr.Textbox(label="Text Input", lines=4, placeholder="Enter any text here...") |
|
image_input = gr.Image(label="Image Input", type="filepath") |
|
video_input = gr.Video(label="Video Input") |
|
audio_input = gr.Audio(label="Voice/Audio Input", type="filepath") |
|
|
|
language_input = gr.Dropdown( |
|
label="Audio Language (if providing audio)", |
|
choices=list(SORTED_LANGUAGES.keys()), |
|
value="English", |
|
interactive=True |
|
) |
|
|
|
submit_button = gr.Button("Moderate Content", variant="primary") |
|
|
|
with gr.Column(scale=2): |
|
gr.Markdown("### 2. See the Results") |
|
result_output = gr.Markdown(label="Moderation Summary") |
|
full_response_output = gr.JSON(label="Full API Response") |
|
|
|
submit_button.click( |
|
fn=moderate_content, |
|
inputs=[text_input, image_input, video_input, audio_input, language_input], |
|
outputs=[result_output, full_response_output] |
|
) |
|
|
|
gr.Examples( |
|
examples=[ |
|
["This is a test of the system with safe text.", None, None, None, "English"], |
|
["I am going to kill the process on my computer.", None, None, None, "English"], |
|
], |
|
inputs=[text_input, image_input, video_input, audio_input, language_input], |
|
outputs=[result_output, full_response_output], |
|
fn=moderate_content |
|
) |
|
|
|
if __name__ == "__main__": |
|
logging.info(f"Connecting to API server at: {API_BASE_URL}") |
|
if API_BASE_URL == "http://127.0.0.1:8000": |
|
logging.warning("API_BASE_URL is set to the default local address. Make sure this is correct or set it in your .env file.") |
|
demo.launch(server_name="0.0.0.0", server_port=7860) |