File size: 4,419 Bytes
a4af8e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b36b7cd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# app_gradio.py
import gradio as gr
import numpy as np
import os
import yaml
from dotenv import load_dotenv
import io
from scipy.io.wavfile import read as read_wav

# Correctly import from the drive_paddy package structure
from src.detection.factory import get_detector
from src.alerting.alert_system import get_alerter

# --- Load Configuration and Environment Variables ---
# This part is the same as our Streamlit app
load_dotenv()
config_path = 'config.yaml'
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)
secrets = {
    "gemini_api_key": os.getenv("GEMINI_API_KEY"),
}

# --- Initialize Backend Components ---
# We create these once and reuse them.
detector = get_detector(config)
alerter = get_alerter(config, secrets["gemini_api_key"])

# --- Audio Processing for Gradio ---
# Gradio's gr.Audio component needs a specific format: (sample_rate, numpy_array)
def process_audio_for_gradio(audio_bytes):
    """Converts in-memory audio bytes to a format Gradio can play."""
    # gTTS creates MP3, so we read it as such
    byte_io = io.BytesIO(audio_bytes)
    # The 'read' function from scipy.io.wavfile expects a WAV file.
    # We need to first convert the MP3 bytes from gTTS to WAV bytes.
    # This requires pydub.
    try:
        from pydub import AudioSegment
        audio = AudioSegment.from_mp3(byte_io)
        wav_byte_io = io.BytesIO()
        audio.export(wav_byte_io, format="wav")
        wav_byte_io.seek(0)
        
        sample_rate, data = read_wav(wav_byte_io)
        return (sample_rate, data)
    except Exception as e:
        print(f"Could not process audio for Gradio: {e}")
        return None

# --- Main Processing Function for Gradio ---
# This function is the core of the app. It takes a webcam frame and returns
# updates for all the output components.
def process_live_frame(frame):
    """
    Takes a single frame from the Gradio webcam input, processes it,
    and returns the processed frame, status text, and any audio alerts.
    """
    if frame is None:
        # Return default values if frame is None
        blank_image = np.zeros((480, 640, 3), dtype=np.uint8)
        return blank_image, "Status: Inactive", None

    # Process the frame using our existing detector
    processed_frame, indicators, _ = detector.process_frame(frame)
    drowsiness_level = indicators.get("drowsiness_level", "Awake")
    lighting = indicators.get("lighting", "Good")
    score = indicators.get("details", {}).get("Score", 0)

    # Build the status text
    status_text = f"Lighting: {lighting}\n"
    if lighting == "Low":
        status_text += "Detection paused due to low light."
    else:
        status_text += f"Status: {drowsiness_level}\nScore: {score:.2f}"

    # Handle alerts
    audio_output = None
    if drowsiness_level != "Awake":
        audio_data = alerter.trigger_alert(level=drowsiness_level)
        if audio_data:
            audio_output = process_audio_for_gradio(audio_data)
    else:
        alerter.reset_alert()

    # Return all the values needed to update the UI
    return processed_frame, status_text, audio_output

# --- Gradio UI Definition ---
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="blue")) as app:
    gr.Markdown("# πŸš— Drive Paddy - Drowsiness Detection (Gradio)")
    gr.Markdown("A live test using Gradio's webcam component. This can be more stable than WebRTC in some environments.")

    with gr.Row():
        with gr.Column():
            # Input: Live webcam feed
            webcam_input = gr.Image(sources=["webcam"], streaming=True, label="Live Camera Feed")
        with gr.Column():
            # Output 1: Processed video feed
            processed_output = gr.Image(label="Processed Feed")
            # Output 2: Live status text
            status_output = gr.Textbox(label="Live Status", lines=3, interactive=False)
            # Output 3: Hidden audio player for alerts
            audio_alert_output = gr.Audio(autoplay=True, visible=False)

    # Link the input to the processing function and the function to the outputs
    webcam_input.stream(
        fn=process_live_frame,
        inputs=[webcam_input],
        outputs=[processed_output, status_output, audio_alert_output]
    )

# --- Launch the App ---
# REMOVED: The 'if __name__ == "__main__":' block.
# Hugging Face will run this file as a module and needs to find the 'app' object.
app.launch(debug=True)