Spaces:
Sleeping
Sleeping
File size: 1,594 Bytes
95e28b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import tensorflow as tf
import tensorflow_io as tfio
import gradio as gr
# Load your pre-trained model
model = tf.keras.models.load_model('capuchin_bird_audio.h5')
# Function to load and preprocess audio file
def load_wav_mono(filename):
file_contents = tf.io.read_file(filename)
wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
wav = tf.squeeze(wav, axis=-1)
sample_rate = tf.cast(sample_rate, dtype=tf.int64)
wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
return wav
# Function to preprocess input for the model
def test_preprocess_1(file_path):
wav = load_wav_mono(file_path)
wav = wav[:48000]
zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
wav = tf.concat([zero_padding, wav], 0)
spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
spectrogram = tf.abs(spectrogram)
spectrogram = tf.expand_dims(spectrogram, axis=2)
spectrogram = tf.expand_dims(spectrogram, axis=0)
return spectrogram
# Function to make predictions
def predict_audio(file_path):
input_data = test_preprocess_1(file_path)
prediction = model.predict(input_data)
# Threshold logic
if prediction > 0.5:
result = "Capuchin bird voice"
else:
result = "Not Capuchin bird voice"
return result
# Gradio Interface
iface = gr.Interface(
fn=predict_audio,
inputs=gr.File(type="audio", label="Upload Audio File"),
outputs="text",
live=True,
interpretation="default"
)
# Launch the interface on localhost
iface.launch()
|