import numpy as np import librosa import tensorflow as tf import streamlit as st import plotly.express as px import pandas as pd import soundfile as sf from pydub import AudioSegment import matplotlib.pyplot as plt import time # Parameters for audio processing window_length = 0.02 # 20ms hop_length = 0.0025 # 2.5ms sample_rate = 22050 global inference_time inference_time=1 # Load TFLite model interpreter = tf.lite.Interpreter(model_path=r"model_breath_logspec_mfcc_cnn.tflite") interpreter.allocate_tensors() input_details, output_details = interpreter.get_input_details(), interpreter.get_output_details() def convert_mp3_to_wav(mp3_path): audio = AudioSegment.from_mp3(mp3_path) wav_path = mp3_path.replace(".mp3", ".wav") audio.export(wav_path, format="wav") return wav_path def extract_breath_features(y, sr): frame_length = int(window_length * sr) hop_length_samples = int(hop_length * sr) zcr = librosa.feature.zero_crossing_rate(y=y, frame_length=frame_length, hop_length=hop_length_samples) rmse = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length_samples) breaths = (zcr.flatten() > 0.1) & (rmse.flatten() > 0.1) return np.where(breaths, 1, 0) def extract_features(y, sr, n_mels=128, n_mfcc=13): try: # Extract MFCC & Log-Mel Spectrogram mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc) logspec = librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)) breath_feature = extract_breath_features(y, sr) # Dynamically adjust length based on the shortest feature min_len = min(mfcc.shape[1], logspec.shape[1], len(breath_feature)) # Resize instead of fix_length for better visualization mfcc = librosa.util.fix_length(mfcc, size=min_len, axis=1) logspec = librosa.util.fix_length(logspec, size=min_len, axis=1) breath_feature = librosa.util.fix_length(breath_feature, size=min_len) return np.vstack((mfcc, logspec, breath_feature)) except Exception as e: st.error(f"Error processing") return None def prepare_single_data(features, max_len=500): features = librosa.util.fix_length(features, size=max_len, axis=1) return features[np.newaxis, ..., np.newaxis].astype(np.float32) def predict_audio(features): global inference_time start_time=time.time() prepared_features = prepare_single_data(features) interpreter.set_tensor(input_details[0]['index'], prepared_features) interpreter.invoke() prediction = interpreter.get_tensor(output_details[0]['index']) end_time = time.time() inference_time=start_time-end_time return np.argmax(prediction, axis=1)[0], prediction[0] def plot_waveform(y, sr, start_time, end_time): start_sample, end_sample = int(start_time * sr), int(end_time * sr) y_trimmed = y[start_sample:end_sample] times = np.linspace(start_time, end_time, num=len(y_trimmed)) df = pd.DataFrame({"Time (s)": times, "Amplitude": y_trimmed}) st.plotly_chart(px.line(df, x="Time (s)", y="Amplitude", title="Waveform"), use_container_width=True) return y_trimmed, sr def visualize_features(features, duration): time_axis = np.linspace(0, duration, features.shape[1]) df_breath = pd.DataFrame({"Time (s)": time_axis, "Breath Feature": features[-1]}) st.plotly_chart(px.line(df_breath, x="Time (s)", y="Breath Feature", title="Breath Feature Over Time"), use_container_width=True) def plot_mfcc_and_logspec(y, sr): mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) logspec = librosa.amplitude_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)) #logspec = librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)) with st.sidebar: st.write("### MFCC and Log-Spectrogram") # Plot MFCC fig, ax = plt.subplots(figsize=(5, 3)) librosa.display.specshow(mfcc, sr=sr, x_axis='time') plt.colorbar() plt.title("MFCC") st.pyplot(fig) # Plot Log-Spectrogram fig, ax = plt.subplots(figsize=(5, 3)) librosa.display.specshow(logspec, sr=sr, x_axis='time') plt.colorbar() plt.title("Log-Mel Spectrogram") st.pyplot(fig) st.title('Audio Integrity Net') st.subheader('Documentation to be added') uploaded_file = st.file_uploader('Upload an audio file', type=['wav', 'mp3']) if uploaded_file: with open('temp_audio.wav', 'wb') as f: f.write(uploaded_file.getbuffer()) y, sr = librosa.load('temp_audio.wav', sr=sample_rate) duration = librosa.get_duration(y=y, sr=sr) start_time, end_time = st.slider("Select time range", 0.0, duration, (0.0, duration)) y_trimmed, sr = plot_waveform(y, sr, start_time, end_time) if st.sidebar.button("Show MFCC & Log-Spectrogram"): plot_mfcc_and_logspec(y_trimmed, sr) features = extract_features(y_trimmed, sr) if features is not None: st.success("Feature Extraction Completed!") visualize_features(features, end_time - start_time) prediction, probability = predict_audio(features) if prediction==0: st.subheader(f' Predicted class is Real ') else: st.subheader(f'Predicted class is Fake') st.write(f'Probability of being real: {probability[0] * 100:.2f}%') st.write(f'Probability of being fake: {probability[1] * 100:.2f}%') inference_time=abs(inference_time) st.write(f"Inference Time: {inference_time:.6f} seconds")