tahirsher commited on
Commit
dfe80a0
Β·
verified Β·
1 Parent(s): f6dc6c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -95,6 +95,7 @@ print(f"βœ… Loaded {len(transcripts)} transcripts.")
95
  def load_and_process_audio(audio_path):
96
  waveform, sample_rate = torchaudio.load(audio_path)
97
  waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
 
98
  input_features = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_features[0]
99
  return input_features
100
 
@@ -119,9 +120,10 @@ st.sidebar.title("πŸ”§ Fine-Tuning Hyperparameters")
119
  num_epochs = st.sidebar.slider("Epochs", min_value=1, max_value=10, value=3)
120
  learning_rate = st.sidebar.select_slider("Learning Rate", options=[5e-4, 1e-4, 5e-5, 1e-5], value=5e-5)
121
  batch_size = st.sidebar.select_slider("Batch Size", options=[2, 4, 8, 16], value=8)
 
122
 
123
  # ================================
124
- # 7️⃣ Streamlit ASR Web App (Fast Decoding & Adversarial Attack Detection)
125
  # ================================
126
  st.title("πŸŽ™οΈ Speech-to-Text ASR Model with Security Features 🎢")
127
 
@@ -134,9 +136,9 @@ if audio_file:
134
 
135
  waveform, sample_rate = torchaudio.load(audio_path)
136
  waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
 
137
 
138
  # Simulate an adversarial attack by injecting random noise
139
- attack_strength = st.sidebar.slider("Attack Strength", 0.0, 0.1, 0.2, 0.5, 0.7,0.9)
140
  adversarial_waveform = waveform + (attack_strength * torch.randn_like(waveform))
141
  adversarial_waveform = torch.clamp(adversarial_waveform, -1.0, 1.0)
142
 
@@ -147,7 +149,7 @@ if audio_file:
147
  transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
148
 
149
  if attack_strength > 0.1:
150
- st.warning("⚠️ Adversarial attack detected! Transcription secured.")
151
 
152
  st.success("πŸ“„ Secure Transcription:")
153
  st.write(transcription)
 
95
  def load_and_process_audio(audio_path):
96
  waveform, sample_rate = torchaudio.load(audio_path)
97
  waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
98
+ waveform = waveform.to(dtype=torch.float32)
99
  input_features = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_features[0]
100
  return input_features
101
 
 
120
  num_epochs = st.sidebar.slider("Epochs", min_value=1, max_value=10, value=3)
121
  learning_rate = st.sidebar.select_slider("Learning Rate", options=[5e-4, 1e-4, 5e-5, 1e-5], value=5e-5)
122
  batch_size = st.sidebar.select_slider("Batch Size", options=[2, 4, 8, 16], value=8)
123
+ attack_strength = st.sidebar.slider("Attack Strength", 0.0, 0.9, 0.1)
124
 
125
  # ================================
126
+ # 7️⃣ Streamlit ASR Web App (Fast Decoding & Security Features)
127
  # ================================
128
  st.title("πŸŽ™οΈ Speech-to-Text ASR Model with Security Features 🎢")
129
 
 
136
 
137
  waveform, sample_rate = torchaudio.load(audio_path)
138
  waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
139
+ waveform = waveform.to(dtype=torch.float32)
140
 
141
  # Simulate an adversarial attack by injecting random noise
 
142
  adversarial_waveform = waveform + (attack_strength * torch.randn_like(waveform))
143
  adversarial_waveform = torch.clamp(adversarial_waveform, -1.0, 1.0)
144
 
 
149
  transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
150
 
151
  if attack_strength > 0.1:
152
+ st.warning("⚠️ Adversarial attack detected! Transcription may be affected.")
153
 
154
  st.success("πŸ“„ Secure Transcription:")
155
  st.write(transcription)