Spaces:

thecollabagepatch
/

magenta

Running

App Files Files Community

thecollabagepatch commited on 5 days ago

Commit

241e975

1 Parent(s): f8b3793

use tail end of longer contexts

Browse files

Files changed (1) hide show

app.py +33 -2

app.py CHANGED Viewed

@@ -141,6 +141,23 @@ def apply_micro_fades(wav: au.Waveform, ms: int = 5) -> None:
         wav.samples[:n]  *= env
         wav.samples[-n:] *= env[::-1]
 # ----------------------------
 # Main generation (single combined style vector)
 # ----------------------------
@@ -156,9 +173,23 @@ def generate_loop_continuation_with_mrt(
     loudness_mode: str = "auto",        # "auto"|"lufs"|"rms"|"none"
     loudness_headroom_db: float = 1.0,  # for the peak guard
 ):
-    # Load loop & encode
     loop = au.Waveform.from_file(input_wav_path).resample(mrt.sample_rate).as_stereo()
-    tokens_full = mrt.codec.encode(loop).astype(np.int32)
     tokens = tokens_full[:, :mrt.config.decoder_codec_rvq_depth]
     # Context

         wav.samples[:n]  *= env
         wav.samples[-n:] *= env[::-1]
+def take_bar_aligned_tail(wav: au.Waveform,
+                          bpm: float,
+                          beats_per_bar: int,
+                          ctx_seconds: float) -> au.Waveform:
+    """
+    Return the LAST N bars whose duration is as close as possible to ctx_seconds,
+    anchored to the end of `wav`, and bar-aligned.
+    """
+    spb = (60.0 / bpm) * beats_per_bar               # seconds per bar
+    bars_needed = max(1, int(round(ctx_seconds / spb)))
+    tail_seconds = bars_needed * spb                  # exact multiple of bars
+    n = int(round(tail_seconds * wav.sample_rate))
+    if n >= wav.samples.shape[0]:
+        # Input shorter than desired tail: keep whole thing (your existing behavior will tile)
+        return wav
+    return au.Waveform(wav.samples[-n:], wav.sample_rate)
 # ----------------------------
 # Main generation (single combined style vector)
 # ----------------------------
     loudness_mode: str = "auto",        # "auto"|"lufs"|"rms"|"none"
     loudness_headroom_db: float = 1.0,  # for the peak guard
 ):
+    # Load loop & put into model SR/channels
     loop = au.Waveform.from_file(input_wav_path).resample(mrt.sample_rate).as_stereo()
+    # Compute the model's desired context seconds (e.g., 250 frames / 25 fps = 10s)
+    codec_fps = float(mrt.codec.frame_rate)
+    ctx_seconds = float(mrt.config.context_length_frames) / codec_fps   # typically 10.0s
+    # ✅ NEW: take bar-aligned TAIL for context, if input is long enough
+    loop_for_context = take_bar_aligned_tail(
+        wav=loop,
+        bpm=bpm,
+        beats_per_bar=beats_per_bar,
+        ctx_seconds=ctx_seconds
+    )
+    # Encode ONLY the tail (so we condition on recent audio)
+    tokens_full = mrt.codec.encode(loop_for_context).astype(np.int32)
     tokens = tokens_full[:, :mrt.config.decoder_codec_rvq_depth]
     # Context