# app_gradio.py import gradio as gr import numpy as np import os import yaml from dotenv import load_dotenv import io from scipy.io.wavfile import read as read_wav # Correctly import from the drive_paddy package structure from src.detection.factory import get_detector from src.alerting.alert_system import get_alerter # --- Load Configuration and Environment Variables --- # This part is the same as our Streamlit app load_dotenv() config_path = 'config.yaml' with open(config_path, 'r') as f: config = yaml.safe_load(f) secrets = { "gemini_api_key": os.getenv("GEMINI_API_KEY"), } # --- Initialize Backend Components --- # We create these once and reuse them. detector = get_detector(config) alerter = get_alerter(config, secrets["gemini_api_key"]) # --- Audio Processing for Gradio --- # Gradio's gr.Audio component needs a specific format: (sample_rate, numpy_array) def process_audio_for_gradio(audio_bytes): """Converts in-memory audio bytes to a format Gradio can play.""" # gTTS creates MP3, so we read it as such byte_io = io.BytesIO(audio_bytes) # The 'read' function from scipy.io.wavfile expects a WAV file. # We need to first convert the MP3 bytes from gTTS to WAV bytes. # This requires pydub. try: from pydub import AudioSegment audio = AudioSegment.from_mp3(byte_io) wav_byte_io = io.BytesIO() audio.export(wav_byte_io, format="wav") wav_byte_io.seek(0) sample_rate, data = read_wav(wav_byte_io) return (sample_rate, data) except Exception as e: print(f"Could not process audio for Gradio: {e}") return None # --- Main Processing Function for Gradio --- # This function is the core of the app. It takes a webcam frame and returns # updates for all the output components. def process_live_frame(frame): """ Takes a single frame from the Gradio webcam input, processes it, and returns the processed frame, status text, and any audio alerts. """ if frame is None: # Return default values if frame is None blank_image = np.zeros((480, 640, 3), dtype=np.uint8) return blank_image, "Status: Inactive", None # Process the frame using our existing detector processed_frame, indicators, _ = detector.process_frame(frame) drowsiness_level = indicators.get("drowsiness_level", "Awake") lighting = indicators.get("lighting", "Good") score = indicators.get("details", {}).get("Score", 0) # Build the status text status_text = f"Lighting: {lighting}\n" if lighting == "Low": status_text += "Detection paused due to low light." else: status_text += f"Status: {drowsiness_level}\nScore: {score:.2f}" # Handle alerts audio_output = None if drowsiness_level != "Awake": audio_data = alerter.trigger_alert(level=drowsiness_level) if audio_data: audio_output = process_audio_for_gradio(audio_data) else: alerter.reset_alert() # Return all the values needed to update the UI return processed_frame, status_text, audio_output # --- Gradio UI Definition --- with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="blue")) as app: gr.Markdown("# 🚗 Drive Paddy - Drowsiness Detection (Gradio)") gr.Markdown("A live test using Gradio's webcam component. This can be more stable than WebRTC in some environments.") with gr.Row(): with gr.Column(): # Input: Live webcam feed webcam_input = gr.Image(sources=["webcam"], streaming=True, label="Live Camera Feed") with gr.Column(): # Output 1: Processed video feed processed_output = gr.Image(label="Processed Feed") # Output 2: Live status text status_output = gr.Textbox(label="Live Status", lines=3, interactive=False) # Output 3: Hidden audio player for alerts audio_alert_output = gr.Audio(autoplay=True, visible=False) # Link the input to the processing function and the function to the outputs webcam_input.stream( fn=process_live_frame, inputs=[webcam_input], outputs=[processed_output, status_output, audio_alert_output] ) # --- Launch the App --- # REMOVED: The 'if __name__ == "__main__":' block. # Hugging Face will run this file as a module and needs to find the 'app' object. app.launch(debug=True)