Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import math
|
|
7 |
import librosa
|
8 |
from PIL import Image, ImageSequence
|
9 |
from decord import VideoReader, cpu
|
|
|
10 |
from transformers import AutoModel, AutoTokenizer, AutoProcessor
|
11 |
|
12 |
# Variables
|
@@ -51,13 +52,15 @@ def uniform_sample(idxs, n):
|
|
51 |
return [idxs[int(i * gap + gap / 2)] for i in range(n)]
|
52 |
|
53 |
def build_omni_chunks(path, sr=16000, seconds_per_unit=1):
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
58 |
content = []
|
59 |
for i in range(total_units):
|
60 |
-
|
|
|
61 |
audio_chunk = audio_np[sr * i * seconds_per_unit : sr * (i + 1) * seconds_per_unit]
|
62 |
content.extend(["<unit>", frame, audio_chunk])
|
63 |
return content
|
|
|
7 |
import librosa
|
8 |
from PIL import Image, ImageSequence
|
9 |
from decord import VideoReader, cpu
|
10 |
+
from moviepy.editor import VideoFileClip
|
11 |
from transformers import AutoModel, AutoTokenizer, AutoProcessor
|
12 |
|
13 |
# Variables
|
|
|
52 |
return [idxs[int(i * gap + gap / 2)] for i in range(n)]
|
53 |
|
54 |
def build_omni_chunks(path, sr=16000, seconds_per_unit=1):
|
55 |
+
clip = VideoFileClip(path)
|
56 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
57 |
+
clip.audio.write_audiofile(tmp.name, fps=sr, codec="pcm_s16le", verbose=False, logger=None)
|
58 |
+
audio_np, _ = librosa.load(tmp.name, sr=sr, mono=True)
|
59 |
+
total_units = math.ceil(clip.duration / seconds_per_unit)
|
60 |
content = []
|
61 |
for i in range(total_units):
|
62 |
+
t = min(i * seconds_per_unit, clip.duration - 1e-3)
|
63 |
+
frame = Image.fromarray(clip.get_frame(t).astype("uint8"))
|
64 |
audio_chunk = audio_np[sr * i * seconds_per_unit : sr * (i + 1) * seconds_per_unit]
|
65 |
content.extend(["<unit>", frame, audio_chunk])
|
66 |
return content
|