File size: 1,594 Bytes
95e28b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import tensorflow as tf
import tensorflow_io as tfio
import gradio as gr

# Load your pre-trained model
model = tf.keras.models.load_model('capuchin_bird_audio.h5')

# Function to load and preprocess audio file
def load_wav_mono(filename):
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

# Function to preprocess input for the model
def test_preprocess_1(file_path):
    wav = load_wav_mono(file_path)
    wav = wav[:48000]
    zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav], 0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    spectrogram = tf.expand_dims(spectrogram, axis=0)
    return spectrogram

# Function to make predictions
def predict_audio(file_path):
    input_data = test_preprocess_1(file_path)
    prediction = model.predict(input_data)
    
    # Threshold logic
    if prediction > 0.5:
        result = "Capuchin bird voice"
    else:
        result = "Not Capuchin bird voice"
    
    return result

# Gradio Interface
iface = gr.Interface(
    fn=predict_audio,
    inputs=gr.File(type="audio", label="Upload Audio File"),
    outputs="text",
    live=True,
    interpretation="default"
)

# Launch the interface on localhost
iface.launch()