Update app.py
Browse files
app.py
CHANGED
@@ -1,109 +1,134 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import librosa
|
3 |
-
import tensorflow as tf
|
4 |
-
import streamlit as st
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import librosa
|
3 |
+
import tensorflow as tf
|
4 |
+
import streamlit as st
|
5 |
+
import plotly.express as px
|
6 |
+
import pandas as pd
|
7 |
+
import soundfile as sf
|
8 |
+
from pydub import AudioSegment
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
import time
|
11 |
+
|
12 |
+
# Parameters for audio processing
|
13 |
+
window_length = 0.02 # 20ms
|
14 |
+
hop_length = 0.0025 # 2.5ms
|
15 |
+
sample_rate = 22050
|
16 |
+
global inference_time
|
17 |
+
inference_time=1
|
18 |
+
# Load TFLite model
|
19 |
+
interpreter = tf.lite.Interpreter(model_path=r"model_breath_logspec_mfcc_cnn.tflite")
|
20 |
+
interpreter.allocate_tensors()
|
21 |
+
input_details, output_details = interpreter.get_input_details(), interpreter.get_output_details()
|
22 |
+
|
23 |
+
def convert_mp3_to_wav(mp3_path):
|
24 |
+
audio = AudioSegment.from_mp3(mp3_path)
|
25 |
+
wav_path = mp3_path.replace(".mp3", ".wav")
|
26 |
+
audio.export(wav_path, format="wav")
|
27 |
+
return wav_path
|
28 |
+
|
29 |
+
def extract_breath_features(y, sr):
|
30 |
+
frame_length = int(window_length * sr)
|
31 |
+
hop_length_samples = int(hop_length * sr)
|
32 |
+
zcr = librosa.feature.zero_crossing_rate(y=y, frame_length=frame_length, hop_length=hop_length_samples)
|
33 |
+
rmse = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length_samples)
|
34 |
+
breaths = (zcr.flatten() > 0.1) & (rmse.flatten() > 0.1)
|
35 |
+
return np.where(breaths, 1, 0)
|
36 |
+
|
37 |
+
def extract_features(y, sr, n_mels=128, n_mfcc=13):
|
38 |
+
try:
|
39 |
+
|
40 |
+
# Extract MFCC & Log-Mel Spectrogram
|
41 |
+
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
|
42 |
+
logspec = librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels))
|
43 |
+
breath_feature = extract_breath_features(y, sr)
|
44 |
+
|
45 |
+
# Dynamically adjust length based on the shortest feature
|
46 |
+
min_len = min(mfcc.shape[1], logspec.shape[1], len(breath_feature))
|
47 |
+
|
48 |
+
# Resize instead of fix_length for better visualization
|
49 |
+
mfcc = librosa.util.fix_length(mfcc, size=min_len, axis=1)
|
50 |
+
logspec = librosa.util.fix_length(logspec, size=min_len, axis=1)
|
51 |
+
breath_feature = librosa.util.fix_length(breath_feature, size=min_len)
|
52 |
+
|
53 |
+
return np.vstack((mfcc, logspec, breath_feature))
|
54 |
+
except Exception as e:
|
55 |
+
st.error(f"Error processing")
|
56 |
+
return None
|
57 |
+
def prepare_single_data(features, max_len=500):
|
58 |
+
features = librosa.util.fix_length(features, size=max_len, axis=1)
|
59 |
+
return features[np.newaxis, ..., np.newaxis].astype(np.float32)
|
60 |
+
|
61 |
+
def predict_audio(features):
|
62 |
+
global inference_time
|
63 |
+
start_time=time.time()
|
64 |
+
prepared_features = prepare_single_data(features)
|
65 |
+
interpreter.set_tensor(input_details[0]['index'], prepared_features)
|
66 |
+
interpreter.invoke()
|
67 |
+
prediction = interpreter.get_tensor(output_details[0]['index'])
|
68 |
+
end_time = time.time()
|
69 |
+
inference_time=start_time-end_time
|
70 |
+
return np.argmax(prediction, axis=1)[0], prediction[0]
|
71 |
+
|
72 |
+
def plot_waveform(y, sr, start_time, end_time):
|
73 |
+
start_sample, end_sample = int(start_time * sr), int(end_time * sr)
|
74 |
+
y_trimmed = y[start_sample:end_sample]
|
75 |
+
times = np.linspace(start_time, end_time, num=len(y_trimmed))
|
76 |
+
df = pd.DataFrame({"Time (s)": times, "Amplitude": y_trimmed})
|
77 |
+
st.plotly_chart(px.line(df, x="Time (s)", y="Amplitude", title="Waveform"), use_container_width=True)
|
78 |
+
return y_trimmed, sr
|
79 |
+
|
80 |
+
def visualize_features(features, duration):
|
81 |
+
time_axis = np.linspace(0, duration, features.shape[1])
|
82 |
+
df_breath = pd.DataFrame({"Time (s)": time_axis, "Breath Feature": features[-1]})
|
83 |
+
st.plotly_chart(px.line(df_breath, x="Time (s)", y="Breath Feature", title="Breath Feature Over Time"), use_container_width=True)
|
84 |
+
|
85 |
+
def plot_mfcc_and_logspec(y, sr):
|
86 |
+
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
87 |
+
logspec = librosa.amplitude_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128))
|
88 |
+
#logspec = librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128))
|
89 |
+
with st.sidebar:
|
90 |
+
st.write("### MFCC and Log-Spectrogram")
|
91 |
+
|
92 |
+
# Plot MFCC
|
93 |
+
fig, ax = plt.subplots(figsize=(5, 3))
|
94 |
+
librosa.display.specshow(mfcc, sr=sr, x_axis='time')
|
95 |
+
plt.colorbar()
|
96 |
+
plt.title("MFCC")
|
97 |
+
st.pyplot(fig)
|
98 |
+
|
99 |
+
# Plot Log-Spectrogram
|
100 |
+
fig, ax = plt.subplots(figsize=(5, 3))
|
101 |
+
librosa.display.specshow(logspec, sr=sr, x_axis='time')
|
102 |
+
plt.colorbar()
|
103 |
+
plt.title("Log-Mel Spectrogram")
|
104 |
+
st.pyplot(fig)
|
105 |
+
|
106 |
+
st.title('Audio Integrity Net')
|
107 |
+
st.subheader('Documentation to be added')
|
108 |
+
uploaded_file = st.file_uploader('Upload an audio file', type=['wav', 'mp3'])
|
109 |
+
|
110 |
+
if uploaded_file:
|
111 |
+
with open('temp_audio.wav', 'wb') as f:
|
112 |
+
f.write(uploaded_file.getbuffer())
|
113 |
+
|
114 |
+
y, sr = librosa.load('temp_audio.wav', sr=sample_rate)
|
115 |
+
duration = librosa.get_duration(y=y, sr=sr)
|
116 |
+
start_time, end_time = st.slider("Select time range", 0.0, duration, (0.0, duration))
|
117 |
+
y_trimmed, sr = plot_waveform(y, sr, start_time, end_time)
|
118 |
+
|
119 |
+
if st.sidebar.button("Show MFCC & Log-Spectrogram"):
|
120 |
+
plot_mfcc_and_logspec(y_trimmed, sr)
|
121 |
+
|
122 |
+
features = extract_features(y_trimmed, sr)
|
123 |
+
if features is not None:
|
124 |
+
st.success("Feature Extraction Completed!")
|
125 |
+
visualize_features(features, end_time - start_time)
|
126 |
+
prediction, probability = predict_audio(features)
|
127 |
+
if prediction==0:
|
128 |
+
st.subheader(f' Predicted class is Real ')
|
129 |
+
else:
|
130 |
+
st.subheader(f'Predicted class is Fake')
|
131 |
+
st.write(f'Probability of being real: {probability[0] * 100:.2f}%')
|
132 |
+
st.write(f'Probability of being fake: {probability[1] * 100:.2f}%')
|
133 |
+
inference_time=abs(inference_time)
|
134 |
+
st.write(f"Inference Time: {inference_time:.6f} seconds")
|