Update app.py
Browse files
app.py
CHANGED
@@ -95,6 +95,7 @@ print(f"β
Loaded {len(transcripts)} transcripts.")
|
|
95 |
def load_and_process_audio(audio_path):
|
96 |
waveform, sample_rate = torchaudio.load(audio_path)
|
97 |
waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
|
|
|
98 |
input_features = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_features[0]
|
99 |
return input_features
|
100 |
|
@@ -119,9 +120,10 @@ st.sidebar.title("π§ Fine-Tuning Hyperparameters")
|
|
119 |
num_epochs = st.sidebar.slider("Epochs", min_value=1, max_value=10, value=3)
|
120 |
learning_rate = st.sidebar.select_slider("Learning Rate", options=[5e-4, 1e-4, 5e-5, 1e-5], value=5e-5)
|
121 |
batch_size = st.sidebar.select_slider("Batch Size", options=[2, 4, 8, 16], value=8)
|
|
|
122 |
|
123 |
# ================================
|
124 |
-
# 7οΈβ£ Streamlit ASR Web App (Fast Decoding &
|
125 |
# ================================
|
126 |
st.title("ποΈ Speech-to-Text ASR Model with Security Features πΆ")
|
127 |
|
@@ -134,9 +136,9 @@ if audio_file:
|
|
134 |
|
135 |
waveform, sample_rate = torchaudio.load(audio_path)
|
136 |
waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
|
|
|
137 |
|
138 |
# Simulate an adversarial attack by injecting random noise
|
139 |
-
attack_strength = st.sidebar.slider("Attack Strength", 0.0, 0.1, 0.2, 0.5, 0.7,0.9)
|
140 |
adversarial_waveform = waveform + (attack_strength * torch.randn_like(waveform))
|
141 |
adversarial_waveform = torch.clamp(adversarial_waveform, -1.0, 1.0)
|
142 |
|
@@ -147,7 +149,7 @@ if audio_file:
|
|
147 |
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
148 |
|
149 |
if attack_strength > 0.1:
|
150 |
-
st.warning("β οΈ Adversarial attack detected! Transcription
|
151 |
|
152 |
st.success("π Secure Transcription:")
|
153 |
st.write(transcription)
|
|
|
95 |
def load_and_process_audio(audio_path):
|
96 |
waveform, sample_rate = torchaudio.load(audio_path)
|
97 |
waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
|
98 |
+
waveform = waveform.to(dtype=torch.float32)
|
99 |
input_features = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_features[0]
|
100 |
return input_features
|
101 |
|
|
|
120 |
num_epochs = st.sidebar.slider("Epochs", min_value=1, max_value=10, value=3)
|
121 |
learning_rate = st.sidebar.select_slider("Learning Rate", options=[5e-4, 1e-4, 5e-5, 1e-5], value=5e-5)
|
122 |
batch_size = st.sidebar.select_slider("Batch Size", options=[2, 4, 8, 16], value=8)
|
123 |
+
attack_strength = st.sidebar.slider("Attack Strength", 0.0, 0.9, 0.1)
|
124 |
|
125 |
# ================================
|
126 |
+
# 7οΈβ£ Streamlit ASR Web App (Fast Decoding & Security Features)
|
127 |
# ================================
|
128 |
st.title("ποΈ Speech-to-Text ASR Model with Security Features πΆ")
|
129 |
|
|
|
136 |
|
137 |
waveform, sample_rate = torchaudio.load(audio_path)
|
138 |
waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
|
139 |
+
waveform = waveform.to(dtype=torch.float32)
|
140 |
|
141 |
# Simulate an adversarial attack by injecting random noise
|
|
|
142 |
adversarial_waveform = waveform + (attack_strength * torch.randn_like(waveform))
|
143 |
adversarial_waveform = torch.clamp(adversarial_waveform, -1.0, 1.0)
|
144 |
|
|
|
149 |
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
150 |
|
151 |
if attack_strength > 0.1:
|
152 |
+
st.warning("β οΈ Adversarial attack detected! Transcription may be affected.")
|
153 |
|
154 |
st.success("π Secure Transcription:")
|
155 |
st.write(transcription)
|