2000prasanth commited on
Commit
51c17e6
·
verified ·
1 Parent(s): dc7d5e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -109
app.py CHANGED
@@ -1,109 +1,134 @@
1
- import numpy as np
2
- import librosa
3
- import tensorflow as tf
4
- import streamlit as st
5
-
6
- window_length = 0.02 # 20ms window length
7
- hop_length = 0.0025 # 2.5ms hop length
8
- sample_rate = 22050 # Standard audio sample rate
9
- n_mels = 128 # Number of mel filter banks
10
- threshold_zcr = 0.1 # Adjust this threshold to detect breath based on ZCR
11
- threshold_rmse = 0.1 # Adjust this threshold to detect breath based on RMSE
12
-
13
- def extract_breath_features(y, sr):
14
- frame_length = int(window_length * sr)
15
- hop_length_samples = int(hop_length * sr)
16
-
17
- zcr = librosa.feature.zero_crossing_rate(y=y, frame_length=frame_length, hop_length=hop_length_samples)
18
- rmse = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length_samples)
19
-
20
- zcr = zcr.T.flatten()
21
- rmse = rmse.T.flatten()
22
-
23
- # Calculate breath events
24
- breaths = (zcr > threshold_zcr) & (rmse > threshold_rmse)
25
-
26
- # Create a breath feature: 1 if breath is present, else 0
27
- breath_feature = np.where(breaths, 1, 0)
28
-
29
- return breath_feature
30
-
31
- def extract_features(file_path, n_mels=128, n_cqt=84, max_len=500, n_mfcc=13):
32
- try:
33
- y, sr = librosa.load(file_path, sr=None)
34
-
35
- # Compute MFCC
36
- mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
37
- mfcc = librosa.util.fix_length(mfcc, size=max_len, axis=1) # Fix length
38
-
39
- # Compute log-mel spectrogram
40
- logspec = librosa.amplitude_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels))
41
- logspec = librosa.util.fix_length(logspec, size=max_len, axis=1) # Fix length
42
-
43
- # Extract breath features
44
- breath_feature = extract_breath_features(y, sr)
45
- breath_feature = librosa.util.fix_length(breath_feature, size=max_len) # Fix length
46
-
47
- # Stack features vertically
48
- return np.vstack((mfcc,logspec, breath_feature))
49
- except Exception as e:
50
- print(f"Error loading {file_path}: {e}")
51
- return None
52
-
53
- # Function to prepare the features for prediction
54
- def prepare_single_data(features, max_len=500):
55
- features = librosa.util.fix_length(features, size=max_len, axis=1)
56
- features = features[np.newaxis, ..., np.newaxis] # Add batch and channel dimensions
57
- return features
58
-
59
- # Load the saved TensorFlow Lite model
60
- interpreter = tf.lite.Interpreter(model_path=r"model_breath_logspec_mfcc_cnn.tflite")
61
- interpreter.allocate_tensors()
62
-
63
- # Get input and output details
64
- input_details = interpreter.get_input_details()
65
- output_details = interpreter.get_output_details()
66
-
67
- # Function to predict audio class
68
- def predict_audio(file_path):
69
- features = extract_features(file_path)
70
- if features is not None:
71
- prepared_features = prepare_single_data(features)
72
- # Ensure the prepared features are of type FLOAT32
73
- prepared_features = prepared_features.astype(np.float32) # Convert to FLOAT32
74
- # Set the tensor to the prepared input data
75
- interpreter.set_tensor(input_details[0]['index'], prepared_features)
76
- interpreter.invoke()
77
- # Get the prediction result
78
- prediction = interpreter.get_tensor(output_details[0]['index'])
79
- predicted_class = np.argmax(prediction, axis=1)
80
- predicted_prob = prediction[0] # Get the probabilities for EER calculation
81
- return predicted_class[0], predicted_prob # Return class index and probabilities
82
- else:
83
- return None, None
84
-
85
- # Streamlit app
86
- st.title('Audio Classification: Real vs Fake')
87
- st.write('Upload an audio file to classify it as real or fake.')
88
-
89
- # File uploader
90
- uploaded_file = st.file_uploader('Choose an audio file', type=['wav', 'mp3'])
91
-
92
- if uploaded_file is not None:
93
- # Save the uploaded file temporarily
94
- with open('temp_audio_file.wav', 'wb') as f:
95
- f.write(uploaded_file.getbuffer())
96
-
97
-
98
- # Predict using the loaded model
99
- prediction,probablity = predict_audio('temp_audio_file.wav')
100
- st.write(f'Predicted class is {prediction} \n')
101
- st.write(f'Probability of being real: {probablity[0]*100:.2f}% \n')
102
- st.write(f'Probability of being fake: {probablity[1]*100:.2f}% \n')
103
-
104
-
105
-
106
-
107
-
108
-
109
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import librosa
3
+ import tensorflow as tf
4
+ import streamlit as st
5
+ import plotly.express as px
6
+ import pandas as pd
7
+ import soundfile as sf
8
+ from pydub import AudioSegment
9
+ import matplotlib.pyplot as plt
10
+ import time
11
+
12
+ # Parameters for audio processing
13
+ window_length = 0.02 # 20ms
14
+ hop_length = 0.0025 # 2.5ms
15
+ sample_rate = 22050
16
+ global inference_time
17
+ inference_time=1
18
+ # Load TFLite model
19
+ interpreter = tf.lite.Interpreter(model_path=r"model_breath_logspec_mfcc_cnn.tflite")
20
+ interpreter.allocate_tensors()
21
+ input_details, output_details = interpreter.get_input_details(), interpreter.get_output_details()
22
+
23
+ def convert_mp3_to_wav(mp3_path):
24
+ audio = AudioSegment.from_mp3(mp3_path)
25
+ wav_path = mp3_path.replace(".mp3", ".wav")
26
+ audio.export(wav_path, format="wav")
27
+ return wav_path
28
+
29
+ def extract_breath_features(y, sr):
30
+ frame_length = int(window_length * sr)
31
+ hop_length_samples = int(hop_length * sr)
32
+ zcr = librosa.feature.zero_crossing_rate(y=y, frame_length=frame_length, hop_length=hop_length_samples)
33
+ rmse = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length_samples)
34
+ breaths = (zcr.flatten() > 0.1) & (rmse.flatten() > 0.1)
35
+ return np.where(breaths, 1, 0)
36
+
37
+ def extract_features(y, sr, n_mels=128, n_mfcc=13):
38
+ try:
39
+
40
+ # Extract MFCC & Log-Mel Spectrogram
41
+ mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
42
+ logspec = librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels))
43
+ breath_feature = extract_breath_features(y, sr)
44
+
45
+ # Dynamically adjust length based on the shortest feature
46
+ min_len = min(mfcc.shape[1], logspec.shape[1], len(breath_feature))
47
+
48
+ # Resize instead of fix_length for better visualization
49
+ mfcc = librosa.util.fix_length(mfcc, size=min_len, axis=1)
50
+ logspec = librosa.util.fix_length(logspec, size=min_len, axis=1)
51
+ breath_feature = librosa.util.fix_length(breath_feature, size=min_len)
52
+
53
+ return np.vstack((mfcc, logspec, breath_feature))
54
+ except Exception as e:
55
+ st.error(f"Error processing")
56
+ return None
57
+ def prepare_single_data(features, max_len=500):
58
+ features = librosa.util.fix_length(features, size=max_len, axis=1)
59
+ return features[np.newaxis, ..., np.newaxis].astype(np.float32)
60
+
61
+ def predict_audio(features):
62
+ global inference_time
63
+ start_time=time.time()
64
+ prepared_features = prepare_single_data(features)
65
+ interpreter.set_tensor(input_details[0]['index'], prepared_features)
66
+ interpreter.invoke()
67
+ prediction = interpreter.get_tensor(output_details[0]['index'])
68
+ end_time = time.time()
69
+ inference_time=start_time-end_time
70
+ return np.argmax(prediction, axis=1)[0], prediction[0]
71
+
72
+ def plot_waveform(y, sr, start_time, end_time):
73
+ start_sample, end_sample = int(start_time * sr), int(end_time * sr)
74
+ y_trimmed = y[start_sample:end_sample]
75
+ times = np.linspace(start_time, end_time, num=len(y_trimmed))
76
+ df = pd.DataFrame({"Time (s)": times, "Amplitude": y_trimmed})
77
+ st.plotly_chart(px.line(df, x="Time (s)", y="Amplitude", title="Waveform"), use_container_width=True)
78
+ return y_trimmed, sr
79
+
80
+ def visualize_features(features, duration):
81
+ time_axis = np.linspace(0, duration, features.shape[1])
82
+ df_breath = pd.DataFrame({"Time (s)": time_axis, "Breath Feature": features[-1]})
83
+ st.plotly_chart(px.line(df_breath, x="Time (s)", y="Breath Feature", title="Breath Feature Over Time"), use_container_width=True)
84
+
85
+ def plot_mfcc_and_logspec(y, sr):
86
+ mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
87
+ logspec = librosa.amplitude_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128))
88
+ #logspec = librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128))
89
+ with st.sidebar:
90
+ st.write("### MFCC and Log-Spectrogram")
91
+
92
+ # Plot MFCC
93
+ fig, ax = plt.subplots(figsize=(5, 3))
94
+ librosa.display.specshow(mfcc, sr=sr, x_axis='time')
95
+ plt.colorbar()
96
+ plt.title("MFCC")
97
+ st.pyplot(fig)
98
+
99
+ # Plot Log-Spectrogram
100
+ fig, ax = plt.subplots(figsize=(5, 3))
101
+ librosa.display.specshow(logspec, sr=sr, x_axis='time')
102
+ plt.colorbar()
103
+ plt.title("Log-Mel Spectrogram")
104
+ st.pyplot(fig)
105
+
106
+ st.title('Audio Integrity Net')
107
+ st.subheader('Documentation to be added')
108
+ uploaded_file = st.file_uploader('Upload an audio file', type=['wav', 'mp3'])
109
+
110
+ if uploaded_file:
111
+ with open('temp_audio.wav', 'wb') as f:
112
+ f.write(uploaded_file.getbuffer())
113
+
114
+ y, sr = librosa.load('temp_audio.wav', sr=sample_rate)
115
+ duration = librosa.get_duration(y=y, sr=sr)
116
+ start_time, end_time = st.slider("Select time range", 0.0, duration, (0.0, duration))
117
+ y_trimmed, sr = plot_waveform(y, sr, start_time, end_time)
118
+
119
+ if st.sidebar.button("Show MFCC & Log-Spectrogram"):
120
+ plot_mfcc_and_logspec(y_trimmed, sr)
121
+
122
+ features = extract_features(y_trimmed, sr)
123
+ if features is not None:
124
+ st.success("Feature Extraction Completed!")
125
+ visualize_features(features, end_time - start_time)
126
+ prediction, probability = predict_audio(features)
127
+ if prediction==0:
128
+ st.subheader(f' Predicted class is Real ')
129
+ else:
130
+ st.subheader(f'Predicted class is Fake')
131
+ st.write(f'Probability of being real: {probability[0] * 100:.2f}%')
132
+ st.write(f'Probability of being fake: {probability[1] * 100:.2f}%')
133
+ inference_time=abs(inference_time)
134
+ st.write(f"Inference Time: {inference_time:.6f} seconds")