Spaces:

thecollabagepatch
/

magenta

Running

App Files Files Community

thecollabagepatch commited on 1 day ago

Commit

a8d3e47

1 Parent(s): 53cce5a

the next 8 bars not the last 8 bars

Browse files

Files changed (2) hide show

app.py +45 -24
jam_worker.py +90 -47

app.py CHANGED Viewed

@@ -323,25 +323,46 @@ def jam_start(
     return {"session_id": sid}
 @app.get("/jam/next")
-def jam_next(session_id: str, since: int = 0):
     with jam_lock:
         worker = jam_registry.get(session_id)
     if worker is None or not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
-    # drain outbox entries with index > since
-    items = []
-    with worker._lock:
-        for ch in worker.outbox:
-            if ch.index > since:
-                items.append({"index": ch.index, "audio_base64": ch.audio_base64, "metadata": ch.metadata})
-        # optional: truncate old items to keep memory bounded
-        if len(worker.outbox) > 32:
-            worker.outbox = worker.outbox[-16:]
-    if not items:
-        return Response(status_code=204)  # nothing yet
-    return {"chunks": items}
 @app.post("/jam/stop")
 def jam_stop(session_id: str = Body(..., embed=True)):
@@ -384,26 +405,26 @@ def jam_status(session_id: str):
     # Snapshot safely
     with worker._lock:
-        last_index = int(worker.idx)
         queued = len(worker.outbox)
         p = worker.params
         spb = p.beats_per_bar * (60.0 / p.bpm)
         chunk_secs = p.bars_per_chunk * spb
-        target_sr = p.target_sr
-        bars_per_chunk = p.bars_per_chunk
-        beats_per_bar = p.beats_per_bar
-        bpm = p.bpm
     return {
         "running": running,
-        "last_index": last_index,          # last finished chunk index (0 if none yet)
-        "queued_chunks": queued,           # how many not-yet-fetched chunks are in the outbox
-        "bpm": bpm,
-        "beats_per_bar": beats_per_bar,
-        "bars_per_chunk": bars_per_chunk,
         "seconds_per_bar": spb,
         "chunk_duration_seconds": chunk_secs,
-        "target_sample_rate": target_sr,
         "last_chunk_started_at": worker.last_chunk_started_at,
         "last_chunk_completed_at": worker.last_chunk_completed_at,
     }

     return {"session_id": sid}
 @app.get("/jam/next")
+def jam_next(session_id: str):
+    """
+    Get the next sequential chunk in the jam session.
+    This ensures chunks are delivered in order without gaps.
+    """
     with jam_lock:
         worker = jam_registry.get(session_id)
     if worker is None or not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
+    # Get the next sequential chunk (this blocks until ready)
+    chunk = worker.get_next_chunk()
+    if chunk is None:
+        raise HTTPException(status_code=408, detail="Chunk not ready within timeout")
+    return {
+        "chunk": {
+            "index": chunk.index,
+            "audio_base64": chunk.audio_base64,
+            "metadata": chunk.metadata
+        }
+    }
+@app.post("/jam/consume")
+def jam_consume(session_id: str = Form(...), chunk_index: int = Form(...)):
+    """
+    Mark a chunk as consumed by the frontend.
+    This helps the worker manage its buffer and generation flow.
+    """
+    with jam_lock:
+        worker = jam_registry.get(session_id)
+    if worker is None or not worker.is_alive():
+        raise HTTPException(status_code=404, detail="Session not found")
+    worker.mark_chunk_consumed(chunk_index)
+    return {"consumed": chunk_index}
 @app.post("/jam/stop")
 def jam_stop(session_id: str = Body(..., embed=True)):
     # Snapshot safely
     with worker._lock:
+        last_generated = int(worker.idx)
+        last_delivered = int(worker._last_delivered_index)
         queued = len(worker.outbox)
+        buffer_ahead = last_generated - last_delivered
         p = worker.params
         spb = p.beats_per_bar * (60.0 / p.bpm)
         chunk_secs = p.bars_per_chunk * spb
     return {
         "running": running,
+        "last_generated_index": last_generated,       # Last chunk that finished generating
+        "last_delivered_index": last_delivered,       # Last chunk sent to frontend
+        "buffer_ahead": buffer_ahead,                  # How many chunks ahead we are
+        "queued_chunks": queued,                       # Total chunks in outbox
+        "bpm": p.bpm,
+        "beats_per_bar": p.beats_per_bar,
+        "bars_per_chunk": p.bars_per_chunk,
         "seconds_per_bar": spb,
         "chunk_duration_seconds": chunk_secs,
+        "target_sample_rate": p.target_sr,
         "last_chunk_started_at": worker.last_chunk_started_at,
         "last_chunk_completed_at": worker.last_chunk_completed_at,
     }

jam_worker.py CHANGED Viewed

@@ -1,17 +1,14 @@
-# jam_worker.py
 import threading, time, base64, io, uuid
 from dataclasses import dataclass, field
 import numpy as np
 import soundfile as sf
-# Pull in your helpers from app.py or refactor them into a shared utils module.
 from utils import (
     match_loudness_to_reference, stitch_generated, hard_trim_seconds,
     apply_micro_fades, make_bar_aligned_context, take_bar_aligned_tail,
     resample_and_snap, wav_bytes_base64
 )
-from scipy.signal import resample_poly
-from math import gcd
 @dataclass
 class JamParams:
@@ -22,8 +19,8 @@ class JamParams:
     loudness_mode: str = "auto"
     headroom_db: float = 1.0
     style_vec: np.ndarray | None = None
-    ref_loop: any = None                  # au.Waveform at model SR for 1st-chunk loudness
-    combined_loop: any = None             # NEW: Full combined audio for context setup
     guidance_weight: float = 1.1
     temperature: float = 1.1
     topk: int = 40
@@ -39,15 +36,20 @@ class JamWorker(threading.Thread):
         super().__init__(daemon=True)
         self.mrt = mrt
         self.params = params
-        # Initialize fresh state
         self.state = mrt.init_state()
-        # CRITICAL: Set up fresh context from the new combined audio
         if params.combined_loop is not None:
             self._setup_context_from_combined_loop()
         self.idx = 0
         self.outbox: list[JamChunk] = []
         self._stop_event = threading.Event()
         self.last_chunk_started_at = None
         self.last_chunk_completed_at = None
         self._lock = threading.Lock()
@@ -55,14 +57,11 @@ class JamWorker(threading.Thread):
     def _setup_context_from_combined_loop(self):
         """Set up MRT context tokens from the combined loop audio"""
         try:
-            # Import the utility functions (same as used in main generation)
             from utils import make_bar_aligned_context, take_bar_aligned_tail
-            # Extract context from combined loop (same logic as generate_loop_continuation_with_mrt)
             codec_fps = float(self.mrt.codec.frame_rate)
             ctx_seconds = float(self.mrt.config.context_length_frames) / codec_fps
-            # Take tail portion for context (matches main generation)
             loop_for_context = take_bar_aligned_tail(
                 self.params.combined_loop,
                 self.params.bpm,
@@ -70,11 +69,9 @@ class JamWorker(threading.Thread):
                 ctx_seconds
             )
-            # Encode to tokens
             tokens_full = self.mrt.codec.encode(loop_for_context).astype(np.int32)
             tokens = tokens_full[:, :self.mrt.config.decoder_codec_rvq_depth]
-            # Create bar-aligned context
             context_tokens = make_bar_aligned_context(
                 tokens,
                 bpm=self.params.bpm,
@@ -83,30 +80,58 @@ class JamWorker(threading.Thread):
                 beats_per_bar=self.params.beats_per_bar
             )
-            # Set context on state - this is the key fix!
             self.state.context_tokens = context_tokens
             print(f"✅ JamWorker: Set up fresh context from combined loop")
-            print(f"   Context shape: {context_tokens.shape if context_tokens is not None else None}")
         except Exception as e:
             print(f"❌ Failed to setup context from combined loop: {e}")
-            # Continue without context rather than crashing
     def stop(self):
         self._stop_event.set()
-    def update_style(self, new_style_vec: np.ndarray | None):
-        with self._lock:
-            if new_style_vec is not None:
-                self.params.style_vec = new_style_vec
     def update_knobs(self, *, guidance_weight=None, temperature=None, topk=None):
         with self._lock:
             if guidance_weight is not None: self.params.guidance_weight = float(guidance_weight)
             if temperature is not None:     self.params.temperature     = float(temperature)
             if topk is not None:            self.params.topk            = int(topk)
     def _seconds_per_bar(self) -> float:
         return self.params.beats_per_bar * (60.0 / self.params.bpm)
@@ -131,58 +156,76 @@ class JamWorker(threading.Thread):
         return b64, meta
     def run(self):
         spb = self._seconds_per_bar()
         chunk_secs = self.params.bars_per_chunk * spb
         xfade = self.mrt.config.crossfade_length
-        # Prime: set initial context on state (caller should have done this; safe to re-set here)
-        # NOTE: We assume caller passed a style_vec computed from tail/whole/blend.
         while not self._stop_event.is_set():
-            # honor live knob updates atomically
             with self._lock:
                 style_vec = self.params.style_vec
-                # Temporarily override MRT knobs (thread-local overrides)
                 self.mrt.guidance_weight = self.params.guidance_weight
                 self.mrt.temperature = self.params.temperature
                 self.mrt.topk = self.params.topk
-            # 1) generate enough model chunks to cover chunk_secs
             need = chunk_secs
             chunks = []
             self.last_chunk_started_at = time.time()
             while need > 0 and not self._stop_event.is_set():
                 wav, self.state = self.mrt.generate_chunk(state=self.state, style=style_vec)
                 chunks.append(wav)
-                # model chunk length (seconds) at model SR
                 need -= (wav.samples.shape[0] / float(self.mrt.sample_rate))
             if self._stop_event.is_set():
                 break
-            # 2) stitch and trim to exact seconds at model SR
             y = stitch_generated(chunks, self.mrt.sample_rate, xfade).as_stereo()
             y = hard_trim_seconds(y, chunk_secs)
-            # 3) post-process
-            if self.idx == 0 and self.params.ref_loop is not None:
-                y, _ = match_loudness_to_reference(self.params.ref_loop, y,
-                                                   method=self.params.loudness_mode,
-                                                   headroom_db=self.params.headroom_db)
             else:
                 apply_micro_fades(y, 3)
-            # 4) resample + snap + b64
-            b64, meta = self._snap_and_encode(y, seconds=chunk_secs,
-                                              target_sr=self.params.target_sr,
-                                              bars=self.params.bars_per_chunk)
-            # 5) enqueue
             with self._lock:
-                self.idx += 1
-                self.outbox.append(JamChunk(index=self.idx, audio_base64=b64, metadata=meta))
-                self.last_chunk_completed_at = time.time()
-        # optional: cleanup here if needed

+# jam_worker.py - SIMPLE FIX VERSION
 import threading, time, base64, io, uuid
 from dataclasses import dataclass, field
 import numpy as np
 import soundfile as sf
 from utils import (
     match_loudness_to_reference, stitch_generated, hard_trim_seconds,
     apply_micro_fades, make_bar_aligned_context, take_bar_aligned_tail,
     resample_and_snap, wav_bytes_base64
 )
 @dataclass
 class JamParams:
     loudness_mode: str = "auto"
     headroom_db: float = 1.0
     style_vec: np.ndarray | None = None
+    ref_loop: any = None
+    combined_loop: any = None
     guidance_weight: float = 1.1
     temperature: float = 1.1
     topk: int = 40
         super().__init__(daemon=True)
         self.mrt = mrt
         self.params = params
         self.state = mrt.init_state()
         if params.combined_loop is not None:
             self._setup_context_from_combined_loop()
         self.idx = 0
         self.outbox: list[JamChunk] = []
         self._stop_event = threading.Event()
+        # NEW: Track delivery state
+        self._last_delivered_index = 0
+        self._max_buffer_ahead = 5  # Don't generate more than 3 chunks ahead
+        # Timing info
         self.last_chunk_started_at = None
         self.last_chunk_completed_at = None
         self._lock = threading.Lock()
     def _setup_context_from_combined_loop(self):
         """Set up MRT context tokens from the combined loop audio"""
         try:
             from utils import make_bar_aligned_context, take_bar_aligned_tail
             codec_fps = float(self.mrt.codec.frame_rate)
             ctx_seconds = float(self.mrt.config.context_length_frames) / codec_fps
             loop_for_context = take_bar_aligned_tail(
                 self.params.combined_loop,
                 self.params.bpm,
                 ctx_seconds
             )
             tokens_full = self.mrt.codec.encode(loop_for_context).astype(np.int32)
             tokens = tokens_full[:, :self.mrt.config.decoder_codec_rvq_depth]
             context_tokens = make_bar_aligned_context(
                 tokens,
                 bpm=self.params.bpm,
                 beats_per_bar=self.params.beats_per_bar
             )
             self.state.context_tokens = context_tokens
             print(f"✅ JamWorker: Set up fresh context from combined loop")
         except Exception as e:
             print(f"❌ Failed to setup context from combined loop: {e}")
     def stop(self):
         self._stop_event.set()
     def update_knobs(self, *, guidance_weight=None, temperature=None, topk=None):
         with self._lock:
             if guidance_weight is not None: self.params.guidance_weight = float(guidance_weight)
             if temperature is not None:     self.params.temperature     = float(temperature)
             if topk is not None:            self.params.topk            = int(topk)
+    def get_next_chunk(self) -> JamChunk | None:
+        """Get the next sequential chunk (blocks/waits if not ready)"""
+        target_index = self._last_delivered_index + 1
+        # Wait for the target chunk to be ready (with timeout)
+        max_wait = 30.0  # seconds
+        start_time = time.time()
+        while time.time() - start_time < max_wait and not self._stop_event.is_set():
+            with self._lock:
+                # Look for the exact chunk we need
+                for chunk in self.outbox:
+                    if chunk.index == target_index:
+                        self._last_delivered_index = target_index
+                        print(f"📦 Delivered chunk {target_index}")
+                        return chunk
+            # Not ready yet, wait a bit
+            time.sleep(0.1)
+        # Timeout or stopped
+        return None
+    def mark_chunk_consumed(self, chunk_index: int):
+        """Mark a chunk as consumed by the frontend"""
+        with self._lock:
+            self._last_delivered_index = max(self._last_delivered_index, chunk_index)
+            print(f"✅ Chunk {chunk_index} consumed")
+    def _should_generate_next_chunk(self) -> bool:
+        """Check if we should generate the next chunk (don't get too far ahead)"""
+        with self._lock:
+            # Don't generate if we're already too far ahead
+            if self.idx > self._last_delivered_index + self._max_buffer_ahead:
+                return False
+            return True
     def _seconds_per_bar(self) -> float:
         return self.params.beats_per_bar * (60.0 / self.params.bpm)
         return b64, meta
     def run(self):
+        """Main worker loop - generate chunks continuously but don't get too far ahead"""
         spb = self._seconds_per_bar()
         chunk_secs = self.params.bars_per_chunk * spb
         xfade = self.mrt.config.crossfade_length
+        print("🚀 JamWorker started with flow control...")
         while not self._stop_event.is_set():
+            # Check if we should generate the next chunk
+            if not self._should_generate_next_chunk():
+                # We're ahead enough, wait a bit for frontend to catch up
+                print(f"⏸️  Buffer full, waiting for consumption...")
+                time.sleep(0.5)
+                continue
+            # Generate the next chunk
             with self._lock:
                 style_vec = self.params.style_vec
                 self.mrt.guidance_weight = self.params.guidance_weight
                 self.mrt.temperature = self.params.temperature
                 self.mrt.topk = self.params.topk
+                next_idx = self.idx + 1
+            print(f"🎹 Generating chunk {next_idx}...")
+            # Generate enough model chunks to cover chunk_secs
             need = chunk_secs
             chunks = []
             self.last_chunk_started_at = time.time()
             while need > 0 and not self._stop_event.is_set():
                 wav, self.state = self.mrt.generate_chunk(state=self.state, style=style_vec)
                 chunks.append(wav)
                 need -= (wav.samples.shape[0] / float(self.mrt.sample_rate))
             if self._stop_event.is_set():
                 break
+            # Stitch and trim to exact seconds at model SR
             y = stitch_generated(chunks, self.mrt.sample_rate, xfade).as_stereo()
             y = hard_trim_seconds(y, chunk_secs)
+            # Post-process
+            if next_idx == 1 and self.params.ref_loop is not None:
+                y, _ = match_loudness_to_reference(
+                    self.params.ref_loop, y,
+                    method=self.params.loudness_mode,
+                    headroom_db=self.params.headroom_db
+                )
             else:
                 apply_micro_fades(y, 3)
+            # Resample + snap + b64
+            b64, meta = self._snap_and_encode(
+                y, seconds=chunk_secs,
+                target_sr=self.params.target_sr,
+                bars=self.params.bars_per_chunk
+            )
+            # Store the completed chunk
             with self._lock:
+                self.idx = next_idx
+                self.outbox.append(JamChunk(index=next_idx, audio_base64=b64, metadata=meta))
+                # Keep outbox bounded (remove old chunks)
+                if len(self.outbox) > 10:
+                    # Remove chunks that are way behind the delivery point
+                    self.outbox = [ch for ch in self.outbox if ch.index > self._last_delivered_index - 5]
+            self.last_chunk_completed_at = time.time()
+            print(f"✅ Completed chunk {next_idx}")
+        print("🛑 JamWorker stopped")