Commit
·
b1bc032
1
Parent(s):
bf8ae4c
incremental change for bpm sync
Browse files- Dockerfile +2 -0
- app.py +2 -2
- jam_worker.py +68 -28
Dockerfile
CHANGED
@@ -128,7 +128,9 @@ RUN python -m pip install --no-cache-dir --force-reinstall "protobuf==4.25.3"
|
|
128 |
|
129 |
RUN python -m pip install gradio
|
130 |
|
|
|
131 |
|
|
|
132 |
|
133 |
# Switch to Spaces’ preferred user
|
134 |
# Switch to Spaces’ preferred user
|
|
|
128 |
|
129 |
RUN python -m pip install gradio
|
130 |
|
131 |
+
RUN python -m pip install soxr
|
132 |
|
133 |
+
RUN python -m pip install samplerate
|
134 |
|
135 |
# Switch to Spaces’ preferred user
|
136 |
# Switch to Spaces’ preferred user
|
app.py
CHANGED
@@ -308,7 +308,7 @@ def generate_loop_continuation_with_mrt(
|
|
308 |
|
309 |
# Bar-aligned token window (unchanged)
|
310 |
context_tokens = make_bar_aligned_context(
|
311 |
-
tokens, bpm=bpm, fps=
|
312 |
ctx_frames=mrt.config.context_length_frames, beats_per_bar=beats_per_bar
|
313 |
)
|
314 |
state = mrt.init_state()
|
@@ -441,7 +441,7 @@ def _mrt_warmup():
|
|
441 |
context_tokens = make_bar_aligned_context(
|
442 |
tokens,
|
443 |
bpm=bpm,
|
444 |
-
fps=
|
445 |
ctx_frames=mrt.config.context_length_frames,
|
446 |
beats_per_bar=beats_per_bar,
|
447 |
)
|
|
|
308 |
|
309 |
# Bar-aligned token window (unchanged)
|
310 |
context_tokens = make_bar_aligned_context(
|
311 |
+
tokens, bpm=bpm, fps=float(mrt.codec.frame_rate),
|
312 |
ctx_frames=mrt.config.context_length_frames, beats_per_bar=beats_per_bar
|
313 |
)
|
314 |
state = mrt.init_state()
|
|
|
441 |
context_tokens = make_bar_aligned_context(
|
442 |
tokens,
|
443 |
bpm=bpm,
|
444 |
+
fps=float(mrt.codec.frame_rate),
|
445 |
ctx_frames=mrt.config.context_length_frames,
|
446 |
beats_per_bar=beats_per_bar,
|
447 |
)
|
jam_worker.py
CHANGED
@@ -10,6 +10,7 @@ from utils import (
|
|
10 |
apply_micro_fades, make_bar_aligned_context, take_bar_aligned_tail,
|
11 |
resample_and_snap, wav_bytes_base64
|
12 |
)
|
|
|
13 |
|
14 |
@dataclass
|
15 |
class JamParams:
|
@@ -61,6 +62,10 @@ class JamWorker(threading.Thread):
|
|
61 |
self.last_chunk_started_at = None
|
62 |
self.last_chunk_completed_at = None
|
63 |
|
|
|
|
|
|
|
|
|
64 |
|
65 |
def _setup_context_from_combined_loop(self):
|
66 |
"""Set up MRT context tokens from the combined loop audio"""
|
@@ -382,19 +387,18 @@ class JamWorker(threading.Thread):
|
|
382 |
|
383 |
def reseed_splice(self, recent_wav, anchor_bars: float):
|
384 |
"""
|
385 |
-
Token-splice reseed
|
386 |
-
- original = the context we captured when the jam started
|
387 |
-
- recent = tokens from the provided recent waveform (usually Swift-combined mix)
|
388 |
-
- anchor_bars controls how much of the original vibe we re-inject
|
389 |
"""
|
390 |
with self._lock:
|
391 |
if not hasattr(self, "_original_context_tokens") or self._original_context_tokens is None:
|
392 |
-
# Fallback: if we somehow don’t have originals, treat current as originals
|
393 |
self._original_context_tokens = np.copy(self.state.context_tokens)
|
394 |
|
395 |
-
recent_tokens = self._make_recent_tokens_from_wave(recent_wav)
|
396 |
new_ctx = self._splice_context(self._original_context_tokens, recent_tokens, anchor_bars)
|
397 |
|
|
|
|
|
|
|
398 |
# install the new context window
|
399 |
new_state = self.mrt.init_state()
|
400 |
new_state.context_tokens = new_ctx
|
@@ -411,15 +415,31 @@ class JamWorker(threading.Thread):
|
|
411 |
chunk_secs = self.params.bars_per_chunk * spb
|
412 |
xfade = float(self.mrt.config.crossfade_length) # seconds
|
413 |
sr = int(self.mrt.sample_rate)
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
420 |
if first_chunk_extra:
|
421 |
-
#
|
422 |
-
|
|
|
|
|
423 |
return max(0, want - have)
|
424 |
|
425 |
def _mono_env(x: np.ndarray, sr: int, win_ms: float = 10.0) -> np.ndarray:
|
@@ -445,7 +465,6 @@ class JamWorker(threading.Thread):
|
|
445 |
return 0
|
446 |
|
447 |
# envelopes + z-score
|
448 |
-
import numpy as np
|
449 |
def _z(a):
|
450 |
m, s = float(a.mean()), float(a.std() or 1.0); return (a - m) / s
|
451 |
e_ref = _z(_mono_env(ref_tail, sr)).astype(np.float32)
|
@@ -500,22 +519,26 @@ class JamWorker(threading.Thread):
|
|
500 |
break
|
501 |
|
502 |
# 2) One-time: align the emit pointer to the groove
|
503 |
-
if self.idx == 0 and self.params.combined_loop is not None:
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
self.
|
|
|
515 |
|
516 |
# 3) Emit exactly bars_per_chunk × spb from the stream
|
517 |
start = self._next_emit_start
|
518 |
-
|
|
|
|
|
|
|
519 |
if end > self._stream.shape[0]:
|
520 |
# shouldn't happen often; generate a bit more and loop
|
521 |
continue
|
@@ -549,6 +572,23 @@ class JamWorker(threading.Thread):
|
|
549 |
cutoff = self._last_delivered_index - 5
|
550 |
self.outbox = [ch for ch in self.outbox if ch.index > cutoff]
|
551 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
552 |
print(f"✅ Completed chunk {self.idx}")
|
553 |
|
554 |
print("🛑 JamWorker stopped")
|
|
|
10 |
apply_micro_fades, make_bar_aligned_context, take_bar_aligned_tail,
|
11 |
resample_and_snap, wav_bytes_base64
|
12 |
)
|
13 |
+
from math import floor, ceil
|
14 |
|
15 |
@dataclass
|
16 |
class JamParams:
|
|
|
62 |
self.last_chunk_started_at = None
|
63 |
self.last_chunk_completed_at = None
|
64 |
|
65 |
+
self._pending_reseed = None # {"ctx": np.ndarray, "ref": au.Waveform|None}
|
66 |
+
self._needs_bar_realign = False # request a one-shot downbeat alignment
|
67 |
+
self._reseed_ref_loop = None # which loop to align against after reseed
|
68 |
+
|
69 |
|
70 |
def _setup_context_from_combined_loop(self):
|
71 |
"""Set up MRT context tokens from the combined loop audio"""
|
|
|
387 |
|
388 |
def reseed_splice(self, recent_wav, anchor_bars: float):
|
389 |
"""
|
390 |
+
Token-splice reseed queued for the next bar boundary between chunks.
|
|
|
|
|
|
|
391 |
"""
|
392 |
with self._lock:
|
393 |
if not hasattr(self, "_original_context_tokens") or self._original_context_tokens is None:
|
|
|
394 |
self._original_context_tokens = np.copy(self.state.context_tokens)
|
395 |
|
396 |
+
recent_tokens = self._make_recent_tokens_from_wave(recent_wav) # [T, depth]
|
397 |
new_ctx = self._splice_context(self._original_context_tokens, recent_tokens, anchor_bars)
|
398 |
|
399 |
+
# Queue it; the run loop will install right after we finish the current slice
|
400 |
+
self._pending_reseed = {"ctx": new_ctx, "ref": recent_wav}
|
401 |
+
|
402 |
# install the new context window
|
403 |
new_state = self.mrt.init_state()
|
404 |
new_state.context_tokens = new_ctx
|
|
|
415 |
chunk_secs = self.params.bars_per_chunk * spb
|
416 |
xfade = float(self.mrt.config.crossfade_length) # seconds
|
417 |
sr = int(self.mrt.sample_rate)
|
418 |
+
chunk_step_f = chunk_secs * sr # float samples per chunk
|
419 |
+
self._emit_phase = getattr(self, "_emit_phase", 0.0)
|
420 |
+
|
421 |
+
def _need(first_chunk_extra: bool = False) -> int:
|
422 |
+
"""
|
423 |
+
How many more samples we still need in the stream to emit the next slice.
|
424 |
+
Uses the fractional step (chunk_step_f) + current _emit_phase to compute
|
425 |
+
the *integer* number of samples required for the next chunk, without
|
426 |
+
mutating _emit_phase here.
|
427 |
+
"""
|
428 |
+
start = getattr(self, "_next_emit_start", 0)
|
429 |
+
total = 0 if getattr(self, "_stream", None) is None else self._stream.shape[0]
|
430 |
+
have = max(0, total - start)
|
431 |
+
|
432 |
+
# Compute the integer step we'd use for the next emit, non-mutating.
|
433 |
+
emit_phase = float(getattr(self, "_emit_phase", 0.0))
|
434 |
+
step_int = int(floor(chunk_step_f + emit_phase)) # matches the logic used when advancing
|
435 |
+
|
436 |
+
# How much we want available beyond 'start' for this emit.
|
437 |
+
want = step_int
|
438 |
if first_chunk_extra:
|
439 |
+
# Reserve two extra bars so the first-chunk onset alignment has material.
|
440 |
+
# Use ceil to be conservative so we don't under-request.
|
441 |
+
want += int(ceil(2.0 * spb * sr))
|
442 |
+
|
443 |
return max(0, want - have)
|
444 |
|
445 |
def _mono_env(x: np.ndarray, sr: int, win_ms: float = 10.0) -> np.ndarray:
|
|
|
465 |
return 0
|
466 |
|
467 |
# envelopes + z-score
|
|
|
468 |
def _z(a):
|
469 |
m, s = float(a.mean()), float(a.std() or 1.0); return (a - m) / s
|
470 |
e_ref = _z(_mono_env(ref_tail, sr)).astype(np.float32)
|
|
|
519 |
break
|
520 |
|
521 |
# 2) One-time: align the emit pointer to the groove
|
522 |
+
if (self.idx == 0 and self.params.combined_loop is not None) or self._needs_bar_realign:
|
523 |
+
ref_loop = self._reseed_ref_loop or self.params.combined_loop
|
524 |
+
if ref_loop is not None:
|
525 |
+
head_len = min(self._stream.shape[0] - self._next_emit_start, int(round(2 * spb * sr)))
|
526 |
+
seg = self._stream[self._next_emit_start : self._next_emit_start + head_len]
|
527 |
+
gen_head = au.Waveform(seg.astype(np.float32, copy=False), sr).as_stereo()
|
528 |
+
offs = _estimate_first_offset_samples(ref_loop, gen_head, sr, spb)
|
529 |
+
if offs != 0:
|
530 |
+
self._next_emit_start = max(0, self._next_emit_start + offs)
|
531 |
+
print(f"🎯 Offset compensation: {offs/sr:+.3f}s")
|
532 |
+
self._realign_emit_pointer_to_bar(sr)
|
533 |
+
self._needs_bar_realign = False
|
534 |
+
self._reseed_ref_loop = None
|
535 |
|
536 |
# 3) Emit exactly bars_per_chunk × spb from the stream
|
537 |
start = self._next_emit_start
|
538 |
+
step_total = chunk_step_f + self._emit_phase
|
539 |
+
step_int = int(np.floor(step_total))
|
540 |
+
self._emit_phase = float(step_total - step_int)
|
541 |
+
end = start + step_int
|
542 |
if end > self._stream.shape[0]:
|
543 |
# shouldn't happen often; generate a bit more and loop
|
544 |
continue
|
|
|
572 |
cutoff = self._last_delivered_index - 5
|
573 |
self.outbox = [ch for ch in self.outbox if ch.index > cutoff]
|
574 |
|
575 |
+
# 👉 If a reseed was requested, apply it *now*, between chunks
|
576 |
+
if self._pending_reseed is not None:
|
577 |
+
pkg = self._pending_reseed
|
578 |
+
self._pending_reseed = None
|
579 |
+
|
580 |
+
new_state = self.mrt.init_state()
|
581 |
+
new_state.context_tokens = pkg["ctx"] # exact (ctx_frames, depth)
|
582 |
+
self.state = new_state
|
583 |
+
|
584 |
+
# start a fresh stream and schedule one-time alignment
|
585 |
+
self._stream = None
|
586 |
+
self._next_emit_start = 0
|
587 |
+
self._reseed_ref_loop = pkg.get("ref") or self.params.combined_loop
|
588 |
+
self._needs_bar_realign = True
|
589 |
+
|
590 |
+
print("🔁 Reseed installed at bar boundary; will realign before next slice")
|
591 |
+
|
592 |
print(f"✅ Completed chunk {self.idx}")
|
593 |
|
594 |
print("🛑 JamWorker stopped")
|