Spaces:
Sleeping
Sleeping
roychao19477
commited on
Commit
·
678d466
1
Parent(s):
6e97a1b
Version revise
Browse files
app.py
CHANGED
|
@@ -103,46 +103,13 @@ def run_avse_inference(video_path, audio_path):
|
|
| 103 |
|
| 104 |
|
| 105 |
# Combine into input dict (match what model.enhance expects)
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
|
| 111 |
with torch.no_grad():
|
| 112 |
-
|
| 113 |
-
#estimated = avse_model.enhance(data).reshape(-1)
|
| 114 |
-
# Version 2
|
| 115 |
-
chunk_sec = 6
|
| 116 |
-
sr = 16000
|
| 117 |
-
audio_chunk_len = chunk_sec * sr # 48000
|
| 118 |
-
video_chunk_len = chunk_sec * 25 # 75
|
| 119 |
-
|
| 120 |
-
estimated_chunks = []
|
| 121 |
-
|
| 122 |
-
for i in range(0, len(noisy), audio_chunk_len):
|
| 123 |
-
audio_chunk = noisy[i:i+audio_chunk_len]
|
| 124 |
-
if len(audio_chunk) < audio_chunk_len:
|
| 125 |
-
pad = np.zeros(audio_chunk_len - len(audio_chunk), dtype=audio_chunk.dtype)
|
| 126 |
-
audio_chunk = np.concatenate([audio_chunk, pad])
|
| 127 |
-
|
| 128 |
-
vid_idx = i // sr * 25 # convert audio index to video frame index
|
| 129 |
-
#video_chunk = bg_frames[0, vid_idx:vid_idx+video_chunk_len, :, :]
|
| 130 |
-
video_chunk = bg_frames[vid_idx:vid_idx+video_chunk_len, :, :]
|
| 131 |
-
if video_chunk.shape[0] < video_chunk_len:
|
| 132 |
-
pad = np.zeros((video_chunk_len - video_chunk.shape[0], *video_chunk.shape[1:]), dtype=video_chunk.dtype)
|
| 133 |
-
video_chunk = np.concatenate([video_chunk, pad], axis=0)
|
| 134 |
-
|
| 135 |
-
data = {
|
| 136 |
-
"noisy_audio": audio_chunk,
|
| 137 |
-
"video_frames": video_chunk[np.newaxis, ...]
|
| 138 |
-
}
|
| 139 |
-
|
| 140 |
-
with torch.no_grad():
|
| 141 |
-
out = avse_model.enhance(data).reshape(-1)
|
| 142 |
-
estimated_chunks.append(out)
|
| 143 |
-
|
| 144 |
-
estimated = np.concatenate(estimated_chunks)[:len(noisy)]
|
| 145 |
-
|
| 146 |
|
| 147 |
# Save result
|
| 148 |
tmp_wav = audio_path.replace(".wav", "_enhanced.wav")
|
|
|
|
| 103 |
|
| 104 |
|
| 105 |
# Combine into input dict (match what model.enhance expects)
|
| 106 |
+
data = {
|
| 107 |
+
"noisy_audio": noisy,
|
| 108 |
+
"video_frames": bg_frames[np.newaxis, ...]
|
| 109 |
+
}
|
| 110 |
|
| 111 |
with torch.no_grad():
|
| 112 |
+
estimated = avse_model.enhance(data).reshape(-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
# Save result
|
| 115 |
tmp_wav = audio_path.replace(".wav", "_enhanced.wav")
|