2000prasanth commited on
Commit
5ff3539
·
verified ·
1 Parent(s): d00a071

Upload 4 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ temp_audio_file.wav filter=lfs diff=lfs merge=lfs -text
model_breath_logspec_mfcc_cnn.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fea718318cecbaaded6f0061747c58e61006ec133550626e3466478f5203c97
3
+ size 137115984
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy==1.26.0
2
+ librosa==0.10.1
3
+ tensorflow==2.19.0
4
+ streamlit==1.43.2
5
+
streamlit_App.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import librosa
3
+ import tensorflow as tf
4
+ import streamlit as st
5
+
6
+ window_length = 0.02 # 20ms window length
7
+ hop_length = 0.0025 # 2.5ms hop length
8
+ sample_rate = 22050 # Standard audio sample rate
9
+ n_mels = 128 # Number of mel filter banks
10
+ threshold_zcr = 0.1 # Adjust this threshold to detect breath based on ZCR
11
+ threshold_rmse = 0.1 # Adjust this threshold to detect breath based on RMSE
12
+
13
+ def extract_breath_features(y, sr):
14
+ frame_length = int(window_length * sr)
15
+ hop_length_samples = int(hop_length * sr)
16
+
17
+ zcr = librosa.feature.zero_crossing_rate(y=y, frame_length=frame_length, hop_length=hop_length_samples)
18
+ rmse = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length_samples)
19
+
20
+ zcr = zcr.T.flatten()
21
+ rmse = rmse.T.flatten()
22
+
23
+ # Calculate breath events
24
+ breaths = (zcr > threshold_zcr) & (rmse > threshold_rmse)
25
+
26
+ # Create a breath feature: 1 if breath is present, else 0
27
+ breath_feature = np.where(breaths, 1, 0)
28
+
29
+ return breath_feature
30
+
31
+ def extract_features(file_path, n_mels=128, n_cqt=84, max_len=500, n_mfcc=13):
32
+ try:
33
+ y, sr = librosa.load(file_path, sr=None)
34
+
35
+ # Compute MFCC
36
+ mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
37
+ mfcc = librosa.util.fix_length(mfcc, size=max_len, axis=1) # Fix length
38
+
39
+ # Compute log-mel spectrogram
40
+ logspec = librosa.amplitude_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels))
41
+ logspec = librosa.util.fix_length(logspec, size=max_len, axis=1) # Fix length
42
+
43
+ # Extract breath features
44
+ breath_feature = extract_breath_features(y, sr)
45
+ breath_feature = librosa.util.fix_length(breath_feature, size=max_len) # Fix length
46
+
47
+ # Stack features vertically
48
+ return np.vstack((mfcc,logspec, breath_feature))
49
+ except Exception as e:
50
+ print(f"Error loading {file_path}: {e}")
51
+ return None
52
+
53
+ # Function to prepare the features for prediction
54
+ def prepare_single_data(features, max_len=500):
55
+ features = librosa.util.fix_length(features, size=max_len, axis=1)
56
+ features = features[np.newaxis, ..., np.newaxis] # Add batch and channel dimensions
57
+ return features
58
+
59
+ # Load the saved TensorFlow Lite model
60
+ interpreter = tf.lite.Interpreter(model_path=r"model_breath_logspec_mfcc_cnn.tflite")
61
+ interpreter.allocate_tensors()
62
+
63
+ # Get input and output details
64
+ input_details = interpreter.get_input_details()
65
+ output_details = interpreter.get_output_details()
66
+
67
+ # Function to predict audio class
68
+ def predict_audio(file_path):
69
+ features = extract_features(file_path)
70
+ if features is not None:
71
+ prepared_features = prepare_single_data(features)
72
+ # Ensure the prepared features are of type FLOAT32
73
+ prepared_features = prepared_features.astype(np.float32) # Convert to FLOAT32
74
+ # Set the tensor to the prepared input data
75
+ interpreter.set_tensor(input_details[0]['index'], prepared_features)
76
+ interpreter.invoke()
77
+ # Get the prediction result
78
+ prediction = interpreter.get_tensor(output_details[0]['index'])
79
+ predicted_class = np.argmax(prediction, axis=1)
80
+ predicted_prob = prediction[0] # Get the probabilities for EER calculation
81
+ return predicted_class[0], predicted_prob # Return class index and probabilities
82
+ else:
83
+ return None, None
84
+
85
+ # Streamlit app
86
+ st.title('Audio Classification: Real vs Fake')
87
+ st.write('Upload an audio file to classify it as real or fake.')
88
+
89
+ # File uploader
90
+ uploaded_file = st.file_uploader('Choose an audio file', type=['wav', 'mp3'])
91
+
92
+ if uploaded_file is not None:
93
+ # Save the uploaded file temporarily
94
+ with open('temp_audio_file.wav', 'wb') as f:
95
+ f.write(uploaded_file.getbuffer())
96
+
97
+
98
+ # Predict using the loaded model
99
+ prediction,probablity = predict_audio('temp_audio_file.wav')
100
+ st.write(f'Predicted class is {prediction} \n')
101
+ st.write(f'Probability of being real: {probablity[0]*100:.2f}% \n')
102
+ st.write(f'Probability of being fake: {probablity[1]*100:.2f}% \n')
103
+
104
+
105
+
106
+
107
+
108
+
109
+
temp_audio_file.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:160a44a876905d90490a048202b70ca8e5685375fa14ed69280948157486e475
3
+ size 2044844