Spaces:

Athspi-aitools
/

Aiaudio

Sleeping

App Files Files Community

Athspi commited on Apr 9

Commit

3e135af

verified ·

1 Parent(s): 2f8d75b

Update app.py

Browse files

Files changed (1) hide show

app.py +211 -386

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import tempfile
 import logging
 import asyncio
 from typing import List, Optional, Dict, Any
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException, BackgroundTasks, Query
 from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
@@ -16,344 +17,292 @@ from pydub import AudioSegment
 from pydub.exceptions import CouldntDecodeError
 # --- AI & Advanced Audio Imports ---
 try:
     import torch
-    # Transformers only needed if using HF pipelines directly, not for speechbrain/demucs manual loading
-    # from transformers import pipeline
     import soundfile as sf
     import numpy as np
     import librosa
-    # Specific Model Libraries
     import speechbrain.pretrained
     import demucs.separate
     import demucs.apply
-    print("AI and advanced audio libraries loaded.")
 except ImportError as e:
-    print(f"Error importing AI/Audio libraries: {e}")
-    print("Ensure torch, soundfile, librosa, speechbrain, demucs are installed.")
-    print("AI features will be unavailable.")
     torch = None
     sf = None
     np = None
     librosa = None
     speechbrain = None
     demucs = None
 # --- Configuration & Setup ---
 TEMP_DIR = tempfile.gettempdir()
 os.makedirs(TEMP_DIR, exist_ok=True)
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
 # --- Global Variables for Loaded Models ---
-# Use consistent keys for storing/retrieving models
 ENHANCEMENT_MODEL_KEY = "speechbrain_sepformer"
-# Choose a default Demucs model (htdemucs is good quality)
-SEPARATION_MODEL_KEY = "htdemucs" # Or use "mdx_extra_q" for a faster quantized one
 enhancement_models: Dict[str, Any] = {}
 separation_models: Dict[str, Any] = {}
-# Target sampling rates (confirm from model specifics if necessary)
-ENHANCEMENT_SR = 16000 # Sepformer WHAMR operates at 16kHz
-DEMUCS_SR = 44100      # Demucs default is 44.1kHz
 # --- Device Selection ---
 if torch:
     DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-    logger.info(f"Using device: {DEVICE}")
 else:
-    DEVICE = "cpu" # Fallback if torch failed import
-# --- Helper Functions ---
 def cleanup_file(file_path: str):
     """Safely remove a file."""
     try:
         if file_path and os.path.exists(file_path):
             os.remove(file_path)
-            logger.info(f"Cleaned up temporary file: {file_path}")
     except Exception as e:
         logger.error(f"Error cleaning up file {file_path}: {e}", exc_info=False)
 async def save_upload_file(upload_file: UploadFile, prefix: str = "upload_") -> str:
     """Saves an uploaded file to a temporary location and returns the path."""
     _, file_extension = os.path.splitext(upload_file.filename)
-    # Default to .wav if no extension, as it's widely compatible for loading
     if not file_extension: file_extension = ".wav"
     temp_file_path = os.path.join(TEMP_DIR, f"{prefix}{uuid.uuid4().hex}{file_extension}")
     try:
         with open(temp_file_path, "wb") as buffer:
-             # Read chunk by chunk for large files
-             while content := await upload_file.read(1024 * 1024): # 1MB chunks
-                 buffer.write(content)
         logger.info(f"Saved uploaded file '{upload_file.filename}' to temp path: {temp_file_path}")
         return temp_file_path
     except Exception as e:
         logger.error(f"Failed to save uploaded file {upload_file.filename}: {e}", exc_info=True)
-        cleanup_file(temp_file_path) # Attempt cleanup if saving failed
         raise HTTPException(status_code=500, detail=f"Could not save uploaded file: {upload_file.filename}")
     finally:
-         await upload_file.close() # Ensure file handle is closed
-# --- Audio Loading/Saving for AI Models ---
 def load_audio_for_hf(file_path: str, target_sr: Optional[int] = None) -> tuple[torch.Tensor, int]:
     """Loads audio, converts to mono float32 Torch tensor, optionally resamples."""
     try:
         audio, orig_sr = sf.read(file_path, dtype='float32', always_2d=False)
-        logger.info(f"Loaded audio '{os.path.basename(file_path)}' with SR={orig_sr}, shape={audio.shape}, dtype={audio.dtype}")
-        # Ensure mono
-        if audio.ndim > 1:
-            if audio.shape[0] > audio.shape[1]: # Check if channels are likely the first dimension
-                audio = audio[0, :] # Take the first channel
-                logger.info(f"Selected first channel from multi-channel audio. New shape {audio.shape}")
-            else: # Assume channels are the second dimension (common case)
-                logger.info(f"Converting {audio.shape[1]} channels to mono by averaging.")
-                audio = np.mean(audio, axis=1)
-        # Convert numpy array to torch tensor
-        audio_tensor = torch.from_numpy(audio).float()
-        # Resample if necessary using librosa
         if target_sr and orig_sr != target_sr:
-            if librosa is None: raise RuntimeError("Librosa is required for resampling but not installed.")
-            logger.info(f"Resampling from {orig_sr} Hz to {target_sr} Hz...")
-            # Librosa works on numpy
             audio_np = audio_tensor.numpy()
             resampled_audio_np = librosa.resample(audio_np, orig_sr=orig_sr, target_sr=target_sr)
             audio_tensor = torch.from_numpy(resampled_audio_np).float()
             current_sr = target_sr
-            logger.info(f"Resampled audio tensor shape: {audio_tensor.shape}")
         else:
             current_sr = orig_sr
-        # Ensure tensor is on the correct device
         return audio_tensor.to(DEVICE), current_sr
     except Exception as e:
         logger.error(f"Error loading/processing audio file {file_path} for HF: {e}", exc_info=True)
-        # Clean up the potentially corrupted saved file if loading failed
         cleanup_file(file_path)
-        raise HTTPException(status_code=415, detail=f"Could not load or process audio file: {os.path.basename(file_path)}. Ensure it's a valid audio format supported by soundfile/libsndfile.")
 def save_hf_audio(audio_data: Any, sampling_rate: int, output_format: str = "wav") -> str:
     """Saves audio data (Tensor or NumPy array) to a temporary file."""
     output_filename = f"ai_output_{uuid.uuid4().hex}.{output_format.lower()}"
     output_path = os.path.join(TEMP_DIR, output_filename)
     try:
-        logger.info(f"Saving AI processed audio to {output_path} (SR={sampling_rate}, format={output_format})")
-        # Convert tensor to numpy array if needed
         if isinstance(audio_data, torch.Tensor):
-            logger.debug("Converting output tensor to NumPy array.")
-            # Ensure tensor is on CPU before converting to numpy
             audio_np = audio_data.detach().cpu().numpy()
         elif isinstance(audio_data, np.ndarray):
             audio_np = audio_data
         else:
-            raise TypeError(f"Unsupported audio data type for saving: {type(audio_data)}")
-        # Ensure data is float32
-        if audio_np.dtype != np.float32:
-             logger.warning(f"Output audio dtype is {audio_np.dtype}, converting to float32 for saving.")
-             audio_np = audio_np.astype(np.float32)
-        # Clip values to avoid potential issues with formats expecting [-1, 1]
         audio_np = np.clip(audio_np, -1.0, 1.0)
-        # Use soundfile (preferred for wav/flac)
         if output_format.lower() in ['wav', 'flac']:
              sf.write(output_path, audio_np, sampling_rate, format=output_format.upper())
         else:
-             # For lossy formats, use pydub
-             logger.debug(f"Using pydub to export to lossy format: {output_format}")
-             # Scale float32 [-1, 1] to int16 for pydub
-             # Ensure audio_np is 1D (mono) before scaling and converting
-             if audio_np.ndim > 1:
-                 logger.warning(f"Audio data has {audio_np.ndim} dimensions, taking first dimension for pydub export.")
-                 audio_np_mono = audio_np[0] if audio_np.shape[0] < audio_np.shape[1] else audio_np[:, 0] # Basic mono conversion attempt
-             else:
-                 audio_np_mono = audio_np
              audio_int16 = (audio_np_mono * 32767).astype(np.int16)
-             segment = AudioSegment(
-                 audio_int16.tobytes(),
-                 frame_rate=sampling_rate,
-                 sample_width=audio_int16.dtype.itemsize,
-                 channels=1 # Assuming mono
-             )
              segment.export(output_path, format=output_format)
         return output_path
     except Exception as e:
         logger.error(f"Error saving AI processed audio to {output_path}: {e}", exc_info=True)
-        cleanup_file(output_path) # Attempt cleanup on saving failure
         raise HTTPException(status_code=500, detail="Failed to save processed audio.")
-# --- Pydub Loading (for basic edits) ---
 def load_audio_pydub(file_path: str) -> AudioSegment:
-    """Loads an audio file using pydub."""
     try:
         audio = AudioSegment.from_file(file_path)
-        logger.info(f"Loaded audio using pydub from: {file_path}")
         return audio
-    except CouldntDecodeError:
-        logger.warning(f"pydub couldn't decode file: {file_path}. Might be unsupported format or corrupted.")
-        raise HTTPException(status_code=415, detail=f"Unsupported audio format or corrupted file (pydub): {os.path.basename(file_path)}")
-    except FileNotFoundError:
-         logger.error(f"Audio file not found after saving (pydub): {file_path}")
-         raise HTTPException(status_code=500, detail="Internal error: Audio file disappeared.")
-    except Exception as e:
-        logger.error(f"Error loading audio file {file_path} with pydub: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"Error processing audio file (pydub): {os.path.basename(file_path)}")
 def export_audio_pydub(audio: AudioSegment, format: str) -> str:
-    """Exports a Pydub AudioSegment to a temporary file and returns the path."""
     output_filename = f"edited_{uuid.uuid4().hex}.{format.lower()}"
     output_path = os.path.join(TEMP_DIR, output_filename)
     try:
-        logger.info(f"Exporting audio using pydub to format '{format}' at {output_path}")
         audio.export(output_path, format=format.lower())
         return output_path
-    except Exception as e:
-        logger.error(f"Error exporting audio with pydub to format {format}: {e}", exc_info=True)
-        cleanup_file(output_path) # Cleanup if export failed
-        raise HTTPException(status_code=500, detail=f"Failed to export audio to format '{format}' using pydub.")
-# --- Synchronous AI Inference Functions ---
 def _run_enhancement_sync(model: Any, audio_tensor: torch.Tensor, sampling_rate: int) -> torch.Tensor:
-    """Synchronous wrapper for SpeechBrain enhancement model inference."""
     if not model: raise ValueError("Enhancement model not loaded")
     try:
-        logger.info(f"Running speech enhancement (input shape: {audio_tensor.shape}, SR: {sampling_rate}, Device: {audio_tensor.device})...")
-        # Add batch dimension if needed
-        if audio_tensor.ndim == 1:
-            audio_tensor = audio_tensor.unsqueeze(0)
-        # Move tensor to the same device as the model
-        model_device = next(model.parameters()).device # Check model's current device
-        if audio_tensor.device != model_device:
-             audio_tensor = audio_tensor.to(model_device)
         with torch.no_grad():
             enhanced_tensor = model.enhance_batch(audio_tensor, lengths=torch.tensor([audio_tensor.shape[1]]).to(model_device))
-        # Remove batch dimension from output before returning, move back to CPU
         enhanced_audio = enhanced_tensor.squeeze(0).cpu()
         logger.info(f"Enhancement complete (output shape: {enhanced_audio.shape})")
         return enhanced_audio
-    except Exception as e:
-        logger.error(f"Error during synchronous enhancement inference: {e}", exc_info=True)
-        raise
 def _run_separation_sync(model: Any, audio_tensor: torch.Tensor, sampling_rate: int) -> Dict[str, torch.Tensor]:
-    """Synchronous wrapper for Demucs source separation model inference."""
     if not model: raise ValueError("Separation model not loaded")
-    if not demucs: raise RuntimeError("Demucs library not available")
     try:
-        logger.info(f"Running source separation (input shape: {audio_tensor.shape}, SR: {sampling_rate}, Device: {audio_tensor.device})...")
-        # Move tensor to the same device as the model
         model_device = next(model.parameters()).device
-        if audio_tensor.device != model_device:
-             audio_tensor = audio_tensor.to(model_device)
-        # Add batch and channel dimensions if mono (expects batch, channels, samples)
-        if audio_tensor.ndim == 1:
-            audio_tensor = audio_tensor.unsqueeze(0).unsqueeze(0) # (1, 1, N)
-        elif audio_tensor.ndim == 2: # Should be rare if loader works
-             audio_tensor = audio_tensor.unsqueeze(1) # (B, 1, N)
-        # Repeat channel if model expects stereo but input is mono
         if audio_tensor.shape[1] != model.audio_channels:
-             if audio_tensor.shape[1] == 1:
-                 logger.warning(f"Model expects {model.audio_channels} channels, input is mono. Repeating channel.")
-                 audio_tensor = audio_tensor.repeat(1, model.audio_channels, 1)
-             else:
-                  # Cannot automatically handle other channel mismatches
-                  raise ValueError(f"Input audio has {audio_tensor.shape[1]} channels, but Demucs model expects {model.audio_channels}.")
-        logger.debug(f"Input tensor shape for Demucs: {audio_tensor.shape}")
         with torch.no_grad():
-            # Use demucs.apply.apply_model for handling chunking etc.
-            # apply_model expects a tensor of shape (channels, samples)
-            # We process one batch item at a time if needed, but typically process the whole file
-            audio_to_process = audio_tensor.squeeze(0) # Remove batch dim -> (channels, samples)
             out = demucs.apply.apply_model(model, audio_to_process, device=model_device, shifts=1, split=True, overlap=0.25)
-            # Output shape (stems, channels, samples)
-        logger.debug(f"Raw separated sources tensor shape: {out.shape}")
-        # Map stems based on the model's sources list
         stem_map = {name: out[i] for i, name in enumerate(model.sources)}
-        # Convert back to mono for simplicity (average channels) and move to CPU
-        output_stems = {}
-        for name, data in stem_map.items():
-             # Average channels, detach, move to CPU
-             output_stems[name] = data.mean(dim=0).detach().cpu()
-        logger.info(f"Separation complete. Found stems: {list(output_stems.keys())}")
         return output_stems
-    except Exception as e:
-        logger.error(f"Error during synchronous separation inference: {e}", exc_info=True)
-        raise
-# --- Model Loading Function ---
 def load_hf_models():
     """Loads AI models at startup using correct libraries."""
     global enhancement_models, separation_models
-    if torch is None or speechbrain is None or demucs is None:
-        logger.error("Core AI libraries (torch, speechbrain, demucs) not available. Skipping model loading.")
         return
-    # --- Load Enhancement Model (SpeechBrain) ---
     enhancement_model_hparams = "speechbrain/sepformer-whamr-enhancement"
     try:
-        logger.info(f"Loading enhancement model: {enhancement_model_hparams} (using SpeechBrain)...")
         enhancer = speechbrain.pretrained.SepformerEnhancement.from_hparams(
             source=enhancement_model_hparams,
             run_opts={"device": DEVICE}
         )
         enhancement_models[ENHANCEMENT_MODEL_KEY] = enhancer
-        logger.info(f"Enhancement model '{ENHANCEMENT_MODEL_KEY}' loaded successfully on {DEVICE}.")
     except Exception as e:
-        logger.error(f"Failed to load enhancement model '{enhancement_model_hparams}': {e}", exc_info=True)
-    # --- Load Separation Model (Demucs) ---
     separation_model_name = SEPARATION_MODEL_KEY # e.g., "htdemucs"
     try:
-        logger.info(f"Loading separation model: {separation_model_name} (using Demucs package)...")
         separator = demucs.apply.load_model(name=separation_model_name, device=DEVICE)
         separation_models[SEPARATION_MODEL_KEY] = separator
-        logger.info(f"Separation model '{SEPARATION_MODEL_KEY}' loaded successfully on {DEVICE}.")
-        logger.info(f"Separation model available sources: {separator.sources}")
     except Exception as e:
-         logger.error(f"Failed to load separation model '{separation_model_name}': {e}", exc_info=True)
-         logger.warning("Ensure the 'demucs' package is installed correctly and the model name is valid (e.g., htdemucs).")
 # --- FastAPI App ---
 app = FastAPI(
     title="AI Audio Editor API",
-    description="API for basic audio editing and AI-powered enhancement & separation. Requires FFmpeg and specific AI libraries (torch, speechbrain, demucs).",
-    version="2.1.0",
 )
 @app.on_event("startup")
 async def startup_event():
-    logger.info("Application startup: Loading AI models...")
-    await asyncio.to_thread(load_hf_models)
-    logger.info("Model loading process finished (check logs for success/failure).")
 # --- API Endpoints ---
@@ -362,185 +311,98 @@ def read_root():
     """Root endpoint providing a welcome message and available features."""
     features = ["/trim", "/concat", "/volume", "/convert"]
     ai_features = []
     if enhancement_models: ai_features.append(f"/enhance (model: {ENHANCEMENT_MODEL_KEY})")
-    if separation_models: ai_features.append(f"/separate (model: {SEPARATION_MODEL_KEY}, sources: {', '.join(separation_models.get(SEPARATION_MODEL_KEY).sources)})")
     return {
         "message": "Welcome to the AI Audio Editor API.",
         "basic_features": features,
         "ai_features": ai_features if ai_features else "None available (check startup logs)",
-        "notes": "Requires FFmpeg. AI features require specific models loaded at startup."
         }
-# --- Basic Editing Endpoints ---
 @app.post("/trim", tags=["Basic Editing"])
-async def trim_audio(
-    background_tasks: BackgroundTasks,
-    file: UploadFile = File(..., description="Audio file to trim."),
-    start_ms: int = Form(..., description="Start time in milliseconds."),
-    end_ms: int = Form(..., description="End time in milliseconds.")
-):
-    """Trims an audio file to the specified start and end times (in milliseconds). Uses Pydub."""
-    if start_ms < 0 or end_ms <= start_ms:
-        raise HTTPException(status_code=422, detail="Invalid start/end times. Ensure start_ms >= 0 and end_ms > start_ms.")
-    logger.info(f"Trim request: file='{file.filename}', start={start_ms}ms, end={end_ms}ms")
-    input_path = await save_upload_file(file, prefix="trim_in_")
-    background_tasks.add_task(cleanup_file, input_path) # Schedule input cleanup
-    output_path = None
     try:
         audio = load_audio_pydub(input_path)
         trimmed_audio = audio[start_ms:end_ms]
-        logger.info(f"Audio trimmed to {len(trimmed_audio)}ms")
-        original_format = os.path.splitext(file.filename)[1][1:].lower() or "mp3"
-        if not original_format or original_format == "tmp": original_format = "mp3"
-        output_path = export_audio_pydub(trimmed_audio, original_format)
-        background_tasks.add_task(cleanup_file, output_path) # Schedule output cleanup
-        output_filename=f"trimmed_{start_ms}-{end_ms}_{os.path.splitext(file.filename)[0]}.{original_format}"
-        return FileResponse(
-            path=output_path,
-            media_type=f"audio/{original_format}",
-            filename=output_filename
-        )
     except Exception as e:
-        logger.error(f"Error during trim operation: {e}", exc_info=True)
         if output_path: cleanup_file(output_path)
-        if isinstance(e, HTTPException): raise e
-        else: raise HTTPException(status_code=500, detail=f"An unexpected error occurred during trimming: {str(e)}")
 @app.post("/concat", tags=["Basic Editing"])
-async def concatenate_audio(
-    background_tasks: BackgroundTasks,
-    files: List[UploadFile] = File(..., description="Two or more audio files to join in order."),
-    output_format: str = Form("mp3", description="Desired output format (e.g., 'mp3', 'wav', 'ogg').")
-):
-    """Concatenates two or more audio files sequentially using Pydub."""
-    if len(files) < 2:
-        raise HTTPException(status_code=422, detail="Please upload at least two files to concatenate.")
-    logger.info(f"Concatenate request: {len(files)} files, output_format='{output_format}'")
-    input_paths = []
-    loaded_audios = []
-    output_path = None
     try:
         for file in files:
-            input_path = await save_upload_file(file, prefix="concat_in_")
-            input_paths.append(input_path)
-            background_tasks.add_task(cleanup_file, input_path)
-            audio = load_audio_pydub(input_path)
-            loaded_audios.append(audio)
-        if not loaded_audios: raise HTTPException(status_code=500, detail="No audio segments were loaded successfully.")
-        combined_audio = loaded_audios[0]
-        logger.info(f"Starting concatenation with first segment ({len(combined_audio)}ms)")
-        for i in range(1, len(loaded_audios)):
-            logger.info(f"Adding segment {i+1} ({len(loaded_audios[i])}ms)")
-            combined_audio += loaded_audios[i]
-        logger.info(f"Concatenated audio length: {len(combined_audio)}ms")
-        output_path = export_audio_pydub(combined_audio, output_format)
         background_tasks.add_task(cleanup_file, output_path)
-        first_filename_base = os.path.splitext(files[0].filename)[0]
-        output_filename = f"concat_{first_filename_base}_and_{len(files)-1}_others.{output_format}"
-        return FileResponse(
-            path=output_path,
-            media_type=f"audio/{output_format}",
-            filename=output_filename
-        )
     except Exception as e:
-        logger.error(f"Error during concat operation: {e}", exc_info=True)
-        # Cleanup intermediate files if error occurs
-        for path in input_paths: cleanup_file(path)
         if output_path: cleanup_file(output_path)
-        if isinstance(e, HTTPException): raise e
-        else: raise HTTPException(status_code=500, detail=f"An unexpected error occurred during concatenation: {str(e)}")
 @app.post("/volume", tags=["Basic Editing"])
-async def change_volume(
-    background_tasks: BackgroundTasks,
-    file: UploadFile = File(..., description="Audio file to adjust volume for."),
-    change_db: float = Form(..., description="Volume change in decibels (dB). Positive increases, negative decreases.")
-):
-    """Adjusts the volume of an audio file by a specified decibel amount using Pydub."""
-    logger.info(f"Volume request: file='{file.filename}', change_db={change_db}dB")
-    input_path = await save_upload_file(file, prefix="volume_in_")
-    background_tasks.add_task(cleanup_file, input_path)
-    output_path = None
     try:
         audio = load_audio_pydub(input_path)
-        adjusted_audio = audio + change_db
-        logger.info(f"Volume adjusted by {change_db}dB.")
-        original_format = os.path.splitext(file.filename)[1][1:].lower() or "mp3"
-        if not original_format or original_format == "tmp": original_format = "mp3"
-        output_path = export_audio_pydub(adjusted_audio, original_format)
         background_tasks.add_task(cleanup_file, output_path)
-        output_filename=f"volume_{change_db}dB_{os.path.splitext(file.filename)[0]}.{original_format}"
-        return FileResponse(
-            path=output_path,
-            media_type=f"audio/{original_format}",
-            filename=output_filename
-        )
     except Exception as e:
-        logger.error(f"Error during volume operation: {e}", exc_info=True)
         if output_path: cleanup_file(output_path)
-        if isinstance(e, HTTPException): raise e
-        else: raise HTTPException(status_code=500, detail=f"An unexpected error occurred during volume adjustment: {str(e)}")
 @app.post("/convert", tags=["Basic Editing"])
-async def convert_format(
-    background_tasks: BackgroundTasks,
-    file: UploadFile = File(..., description="Audio file to convert."),
-    output_format: str = Form(..., description="Target audio format (e.g., 'mp3', 'wav', 'ogg', 'flac').")
-):
-    """Converts an audio file to a different format using Pydub."""
-    allowed_formats = {'mp3', 'wav', 'ogg', 'flac', 'aac', 'm4a', 'opus'} # Common formats
-    if output_format.lower() not in allowed_formats:
-         raise HTTPException(status_code=422, detail=f"Invalid output format. Allowed formats: {', '.join(allowed_formats)}")
-    logger.info(f"Convert request: file='{file.filename}', output_format='{output_format}'")
-    input_path = await save_upload_file(file, prefix="convert_in_")
-    background_tasks.add_task(cleanup_file, input_path)
-    output_path = None
     try:
-        # Load using pydub, which handles many input formats
         audio = load_audio_pydub(input_path)
         output_path = export_audio_pydub(audio, output_format.lower())
         background_tasks.add_task(cleanup_file, output_path)
-        filename_base = os.path.splitext(file.filename)[0]
-        output_filename = f"{filename_base}_converted.{output_format.lower()}"
-        return FileResponse(
-            path=output_path,
-            media_type=f"audio/{output_format.lower()}", # Media type might need refinement for opus/aac/m4a
-            filename=output_filename
-        )
     except Exception as e:
-        logger.error(f"Error during convert operation: {e}", exc_info=True)
         if output_path: cleanup_file(output_path)
-        if isinstance(e, HTTPException): raise e
-        else: raise HTTPException(status_code=500, detail=f"An unexpected error occurred during format conversion: {str(e)}")
-# --- AI Endpoints (Corrected) ---
 @app.post("/enhance", tags=["AI Editing"])
 async def enhance_speech(
@@ -550,44 +412,31 @@ async def enhance_speech(
     output_format: str = Form("wav", description="Output format (wav, flac recommended).")
 ):
     """Enhances speech audio using a pre-loaded SpeechBrain model."""
-    if torch is None or speechbrain is None:
-         raise HTTPException(status_code=501, detail="AI processing libraries (torch, speechbrain) not available.")
     if model_key not in enhancement_models:
          logger.error(f"Enhancement model key '{model_key}' requested but model not loaded.")
-         raise HTTPException(status_code=503, detail=f"Enhancement model '{model_key}' is not loaded or available. Check server logs.")
     loaded_model = enhancement_models[model_key]
     logger.info(f"Enhance request: file='{file.filename}', model='{model_key}', format='{output_format}'")
     input_path = await save_upload_file(file, prefix="enhance_in_")
     background_tasks.add_task(cleanup_file, input_path)
     output_path = None
     try:
-        # Load audio as tensor, ensure correct SR (16kHz)
         audio_tensor, current_sr = load_audio_for_hf(input_path, target_sr=ENHANCEMENT_SR)
         logger.info("Submitting enhancement task to background thread...")
         enhanced_audio_tensor = await asyncio.to_thread(
             _run_enhancement_sync, loaded_model, audio_tensor, current_sr
         )
         logger.info("Enhancement task completed.")
-        # Save the result (tensor output from enhancer at 16kHz)
         output_path = save_hf_audio(enhanced_audio_tensor, ENHANCEMENT_SR, output_format)
         background_tasks.add_task(cleanup_file, output_path)
         output_filename=f"enhanced_{os.path.splitext(file.filename)[0]}.{output_format}"
-        return FileResponse(
-            path=output_path,
-            media_type=f"audio/{output_format}",
-            filename=output_filename
-        )
     except Exception as e:
         logger.error(f"Error during enhancement operation: {e}", exc_info=True)
-        if output_path: cleanup_file(output_path) # Cleanup output if error occurs after save
-        if isinstance(e, HTTPException): raise e
-        else: raise HTTPException(status_code=500, detail=f"An unexpected error occurred during enhancement: {str(e)}")
 @app.post("/separate", tags=["AI Editing"])
@@ -599,55 +448,43 @@ async def separate_sources(
     output_format: str = Form("wav", description="Output format for the stems (wav, flac recommended).")
 ):
     """Separates music into stems using a pre-loaded Demucs model. Returns a ZIP archive."""
-    if torch is None or demucs is None:
-         raise HTTPException(status_code=501, detail="AI processing libraries (torch, demucs) not available.")
     if model_key not in separation_models:
          logger.error(f"Separation model key '{model_key}' requested but model not loaded.")
-         raise HTTPException(status_code=503, detail=f"Separation model '{model_key}' is not loaded or available. Check server logs.")
     loaded_model = separation_models[model_key]
-    valid_stems = set(loaded_model.sources) # Get stems directly from loaded model
     requested_stems = set(s.lower() for s in stems)
     if not requested_stems.issubset(valid_stems):
-        raise HTTPException(status_code=422, detail=f"Invalid stem(s) requested. Model '{model_key}' provides: {', '.join(valid_stems)}")
     logger.info(f"Separate request: file='{file.filename}', model='{model_key}', stems={requested_stems}, format='{output_format}'")
     input_path = await save_upload_file(file, prefix="separate_in_")
     background_tasks.add_task(cleanup_file, input_path)
     stem_output_paths: Dict[str, str] = {}
-    zip_buffer = None
     try:
-        # Load audio as tensor, ensure correct SR (Demucs default 44.1kHz)
         audio_tensor, current_sr = load_audio_for_hf(input_path, target_sr=DEMUCS_SR)
         logger.info("Submitting separation task to background thread...")
         all_separated_stems_tensors = await asyncio.to_thread(
             _run_separation_sync, loaded_model, audio_tensor, current_sr
         )
         logger.info("Separation task completed.")
-        # --- Create ZIP file in memory ---
-        zip_buffer = io.BytesIO()
-        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            # Save only the requested stems
-            for stem_name in requested_stems:
-                if stem_name in all_separated_stems_tensors:
-                    stem_tensor = all_separated_stems_tensors[stem_name]
-                    # Save stem temporarily (save_hf_audio handles tensor)
-                    # Use the model's native sampling rate for output (DEMUCS_SR)
-                    stem_path = save_hf_audio(stem_tensor, DEMUCS_SR, output_format)
-                    stem_output_paths[stem_name] = stem_path
-                    # Schedule cleanup AFTER zip is sent
-                    background_tasks.add_task(cleanup_file, stem_path)
-                    # Use a simpler archive name within the zip
-                    archive_name = f"{stem_name}.{output_format}"
-                    zipf.write(stem_path, arcname=archive_name)
-                    logger.info(f"Added '{archive_name}' to ZIP.")
-                else:
-                    logger.warning(f"Requested stem '{stem_name}' not found in model output (should not happen here due to validation).")
         zip_buffer.seek(0)
         zip_filename = f"separated_{model_key}_{os.path.splitext(file.filename)[0]}.zip"
@@ -658,23 +495,11 @@ async def separate_sources(
         )
     except Exception as e:
         logger.error(f"Error during separation operation: {e}", exc_info=True)
-        # Manually trigger cleanup for any stems saved before error
         for path in stem_output_paths.values(): cleanup_file(path)
-        if zip_buffer: zip_buffer.close() # Ensure buffer is closed on error
         if isinstance(e, HTTPException): raise e
-        else: raise HTTPException(status_code=500, detail=f"An unexpected error occurred during separation: {str(e)}")
-# --- How to Run ---
-# 1. Ensure FFmpeg is installed and accessible in your PATH.
-# 2. Save this code as `app.py`.
-# 3. Create `requirements.txt` (as shown in previous responses, including fastapi, uvicorn, pydub, torch, soundfile, librosa, speechbrain, demucs).
-# 4. Install dependencies: `pip install -r requirements.txt` (This can take significant time and disk space!).
-# 5. Run the FastAPI server: `uvicorn app:app --host 0.0.0.0 --port 7860` (Using --host 0.0.0.0 and port 7860 common for HF Spaces)
-#    Remove --reload for production/stable deployment.
-#
-# --- WARNING ---
-# - AI models require SIGNIFICANT RAM and CPU/GPU. Inference can be SLOW.
-# - The first run will download models, which can take a long time and lots of disk space.
-# - Ensure the specific model IDs/names used (e.g., "htdemucs") are correct and compatible.
-# - Monitor startup logs carefully for model loading success or failure.

 import logging
 import asyncio
 from typing import List, Optional, Dict, Any
+import traceback # For detailed error logging
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException, BackgroundTasks, Query
 from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
 from pydub.exceptions import CouldntDecodeError
 # --- AI & Advanced Audio Imports ---
+# Add extra logging around imports
+logger_init = logging.getLogger("AppInit")
+logger_init.setLevel(logging.INFO)
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+# Create console handler and set level to info
+ch = logging.StreamHandler()
+ch.setLevel(logging.INFO)
+ch.setFormatter(formatter)
+logger_init.addHandler(ch)
 try:
+    logger_init.info("Importing torch...")
     import torch
+    logger_init.info("Importing soundfile...")
     import soundfile as sf
+    logger_init.info("Importing numpy...")
     import numpy as np
+    logger_init.info("Importing librosa...")
     import librosa
+    logger_init.info("Importing speechbrain...")
     import speechbrain.pretrained
+    logger_init.info("Importing demucs...")
     import demucs.separate
     import demucs.apply
+    logger_init.info("AI and advanced audio libraries imported successfully.")
+    AI_LIBS_AVAILABLE = True
 except ImportError as e:
+    logger_init.error(f"CRITICAL: Error importing AI/Audio libraries: {e}", exc_info=True)
+    logger_init.error("Ensure torch, soundfile, librosa, speechbrain, demucs are in requirements.txt and installed.")
+    logger_init.error("AI features will be unavailable.")
     torch = None
     sf = None
     np = None
     librosa = None
     speechbrain = None
     demucs = None
+    AI_LIBS_AVAILABLE = False
 # --- Configuration & Setup ---
 TEMP_DIR = tempfile.gettempdir()
 os.makedirs(TEMP_DIR, exist_ok=True)
+# Configure main app logging (use the root logger setup by FastAPI/Uvicorn)
+logger = logging.getLogger(__name__) # Will inherit root logger settings
 # --- Global Variables for Loaded Models ---
 ENHANCEMENT_MODEL_KEY = "speechbrain_sepformer"
+SEPARATION_MODEL_KEY = "htdemucs"
 enhancement_models: Dict[str, Any] = {}
 separation_models: Dict[str, Any] = {}
+ENHANCEMENT_SR = 16000
+DEMUCS_SR = 44100
 # --- Device Selection ---
 if torch:
     DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+    logger_init.info(f"Using device: {DEVICE}")
 else:
+    DEVICE = "cpu"
+    logger_init.info("Torch not available, defaulting device to CPU.")
+# --- Helper Functions (cleanup_file, save_upload_file - same as before) ---
 def cleanup_file(file_path: str):
     """Safely remove a file."""
     try:
         if file_path and os.path.exists(file_path):
             os.remove(file_path)
+            # logger.info(f"Cleaned up temporary file: {file_path}") # Reduce log noise
     except Exception as e:
         logger.error(f"Error cleaning up file {file_path}: {e}", exc_info=False)
 async def save_upload_file(upload_file: UploadFile, prefix: str = "upload_") -> str:
     """Saves an uploaded file to a temporary location and returns the path."""
     _, file_extension = os.path.splitext(upload_file.filename)
     if not file_extension: file_extension = ".wav"
     temp_file_path = os.path.join(TEMP_DIR, f"{prefix}{uuid.uuid4().hex}{file_extension}")
     try:
         with open(temp_file_path, "wb") as buffer:
+             while content := await upload_file.read(1024 * 1024): buffer.write(content)
         logger.info(f"Saved uploaded file '{upload_file.filename}' to temp path: {temp_file_path}")
         return temp_file_path
     except Exception as e:
         logger.error(f"Failed to save uploaded file {upload_file.filename}: {e}", exc_info=True)
+        cleanup_file(temp_file_path)
         raise HTTPException(status_code=500, detail=f"Could not save uploaded file: {upload_file.filename}")
     finally:
+         await upload_file.close()
+# --- Audio Loading/Saving Functions (same as before) ---
 def load_audio_for_hf(file_path: str, target_sr: Optional[int] = None) -> tuple[torch.Tensor, int]:
     """Loads audio, converts to mono float32 Torch tensor, optionally resamples."""
+    # ... (Function definition remains the same) ...
     try:
         audio, orig_sr = sf.read(file_path, dtype='float32', always_2d=False)
+        # logger.debug(...) # Keep debug logs if needed
+        if audio.ndim > 1: # Ensure mono
+            if audio.shape[0] < audio.shape[1] and audio.shape[0] < 10: # Check if first dim is likely channels
+                 audio = audio[0, :]
+            elif audio.shape[1] < audio.shape[0] and audio.shape[1] < 10: # Check if second dim is likely channels
+                audio = audio[:, 0]
+            else: # Fallback: Average if dims are ambiguous or many channels
+                 logger.warning(f"Ambiguous audio shape {audio.shape}, averaging channels to mono.")
+                 audio = np.mean(audio, axis=1 if audio.shape[1] < audio.shape[0] else 0)
+        audio_tensor = torch.from_numpy(audio).float()
         if target_sr and orig_sr != target_sr:
+            if librosa is None: raise RuntimeError("Librosa missing")
+            logger.info(f"Resampling from {orig_sr} Hz to {target_sr} Hz for {os.path.basename(file_path)}...")
             audio_np = audio_tensor.numpy()
             resampled_audio_np = librosa.resample(audio_np, orig_sr=orig_sr, target_sr=target_sr)
             audio_tensor = torch.from_numpy(resampled_audio_np).float()
             current_sr = target_sr
         else:
             current_sr = orig_sr
         return audio_tensor.to(DEVICE), current_sr
     except Exception as e:
         logger.error(f"Error loading/processing audio file {file_path} for HF: {e}", exc_info=True)
         cleanup_file(file_path)
+        raise HTTPException(status_code=415, detail=f"Could not load/process audio file: {os.path.basename(file_path)}. Check format.")
 def save_hf_audio(audio_data: Any, sampling_rate: int, output_format: str = "wav") -> str:
     """Saves audio data (Tensor or NumPy array) to a temporary file."""
+    # ... (Function definition remains the same) ...
     output_filename = f"ai_output_{uuid.uuid4().hex}.{output_format.lower()}"
     output_path = os.path.join(TEMP_DIR, output_filename)
     try:
+        # logger.debug(...) # Keep debug logs if needed
         if isinstance(audio_data, torch.Tensor):
             audio_np = audio_data.detach().cpu().numpy()
         elif isinstance(audio_data, np.ndarray):
             audio_np = audio_data
         else:
+            raise TypeError(f"Unsupported audio data type: {type(audio_data)}")
+        if audio_np.dtype != np.float32: audio_np = audio_np.astype(np.float32)
         audio_np = np.clip(audio_np, -1.0, 1.0)
         if output_format.lower() in ['wav', 'flac']:
              sf.write(output_path, audio_np, sampling_rate, format=output_format.upper())
         else:
+             if audio_np.ndim > 1: audio_np_mono = np.mean(audio_np, axis=0 if audio_np.shape[0] < audio_np.shape[1] else 1) # Basic mono conversion
+             else: audio_np_mono = audio_np
              audio_int16 = (audio_np_mono * 32767).astype(np.int16)
+             segment = AudioSegment(audio_int16.tobytes(), frame_rate=sampling_rate, sample_width=audio_int16.dtype.itemsize, channels=1)
              segment.export(output_path, format=output_format)
         return output_path
     except Exception as e:
         logger.error(f"Error saving AI processed audio to {output_path}: {e}", exc_info=True)
+        cleanup_file(output_path)
         raise HTTPException(status_code=500, detail="Failed to save processed audio.")
+# --- Pydub Loading/Exporting (for basic edits - same as before) ---
 def load_audio_pydub(file_path: str) -> AudioSegment:
+    # ... (Function definition remains the same) ...
     try:
         audio = AudioSegment.from_file(file_path)
         return audio
+    except CouldntDecodeError: raise HTTPException(status_code=415, detail=f"Unsupported audio format (pydub): {os.path.basename(file_path)}")
+    except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing audio (pydub): {os.path.basename(file_path)}")
 def export_audio_pydub(audio: AudioSegment, format: str) -> str:
+    # ... (Function definition remains the same) ...
     output_filename = f"edited_{uuid.uuid4().hex}.{format.lower()}"
     output_path = os.path.join(TEMP_DIR, output_filename)
     try:
         audio.export(output_path, format=format.lower())
         return output_path
+    except Exception as e: raise HTTPException(status_code=500, detail=f"Failed to export audio (pydub): {format}")
+# --- Synchronous AI Inference Functions (same as before) ---
 def _run_enhancement_sync(model: Any, audio_tensor: torch.Tensor, sampling_rate: int) -> torch.Tensor:
+    # ... (Function definition remains the same) ...
     if not model: raise ValueError("Enhancement model not loaded")
     try:
+        logger.info(f"Running enhancement (input shape: {audio_tensor.shape}, SR: {sampling_rate}, Device: {DEVICE})...")
+        model_device = next(model.parameters()).device
+        if audio_tensor.device != model_device: audio_tensor = audio_tensor.to(model_device)
+        if audio_tensor.ndim == 1: audio_tensor = audio_tensor.unsqueeze(0)
         with torch.no_grad():
             enhanced_tensor = model.enhance_batch(audio_tensor, lengths=torch.tensor([audio_tensor.shape[1]]).to(model_device))
         enhanced_audio = enhanced_tensor.squeeze(0).cpu()
         logger.info(f"Enhancement complete (output shape: {enhanced_audio.shape})")
         return enhanced_audio
+    except Exception as e: logger.error(f"Sync enhancement error: {e}", exc_info=True); raise
 def _run_separation_sync(model: Any, audio_tensor: torch.Tensor, sampling_rate: int) -> Dict[str, torch.Tensor]:
+    # ... (Function definition remains the same) ...
     if not model: raise ValueError("Separation model not loaded")
+    if not demucs: raise RuntimeError("Demucs library missing")
     try:
+        logger.info(f"Running separation (input shape: {audio_tensor.shape}, SR: {sampling_rate}, Device: {DEVICE})...")
         model_device = next(model.parameters()).device
+        if audio_tensor.device != model_device: audio_tensor = audio_tensor.to(model_device)
+        if audio_tensor.ndim == 1: audio_tensor = audio_tensor.unsqueeze(0).unsqueeze(0)
+        elif audio_tensor.ndim == 2: audio_tensor = audio_tensor.unsqueeze(1)
         if audio_tensor.shape[1] != model.audio_channels:
+             if audio_tensor.shape[1] == 1: audio_tensor = audio_tensor.repeat(1, model.audio_channels, 1)
+             else: raise ValueError(f"Input channels ({audio_tensor.shape[1]}) mismatch model ({model.audio_channels})")
         with torch.no_grad():
+            audio_to_process = audio_tensor.squeeze(0)
             out = demucs.apply.apply_model(model, audio_to_process, device=model_device, shifts=1, split=True, overlap=0.25)
         stem_map = {name: out[i] for i, name in enumerate(model.sources)}
+        output_stems = {name: data.mean(dim=0).detach().cpu() for name, data in stem_map.items()}
+        logger.info(f"Separation complete. Stems: {list(output_stems.keys())}")
         return output_stems
+    except Exception as e: logger.error(f"Sync separation error: {e}", exc_info=True); raise
+# --- Model Loading Function (Enhanced Logging) ---
 def load_hf_models():
     """Loads AI models at startup using correct libraries."""
+    logger_load = logging.getLogger("ModelLoader") # Use specific logger
+    logger_load.setLevel(logging.INFO)
+    if not logger_load.handlers: logger_load.addHandler(ch) # Add handler if not already present
     global enhancement_models, separation_models
+    if not AI_LIBS_AVAILABLE:
+        logger_load.error("Core AI libraries not available. Cannot load AI models.")
         return
+    # --- Load Enhancement Model ---
     enhancement_model_hparams = "speechbrain/sepformer-whamr-enhancement"
+    logger_load.info(f"--- Attempting to load Enhancement Model: {enhancement_model_hparams} ---")
     try:
+        # Log device before loading
+        logger_load.info(f"Attempting load on device: {DEVICE}")
         enhancer = speechbrain.pretrained.SepformerEnhancement.from_hparams(
             source=enhancement_model_hparams,
             run_opts={"device": DEVICE}
         )
+        # Check model device after loading
+        model_device = next(enhancer.parameters()).device
         enhancement_models[ENHANCEMENT_MODEL_KEY] = enhancer
+        logger_load.info(f"SUCCESS: Enhancement model '{ENHANCEMENT_MODEL_KEY}' loaded successfully on {model_device}.")
     except Exception as e:
+        logger_load.error(f"FAILED to load enhancement model '{enhancement_model_hparams}'. Error:", exc_info=False) # Log only message
+        logger_load.error(f"Traceback: {traceback.format_exc()}") # Log full traceback separately
+        logger_load.warning("Enhancement features will be unavailable.")
+    # --- Load Separation Model ---
     separation_model_name = SEPARATION_MODEL_KEY # e.g., "htdemucs"
+    logger_load.info(f"--- Attempting to load Separation Model: {separation_model_name} ---")
     try:
+        logger_load.info(f"Attempting load on device: {DEVICE}")
+        # This automatically handles downloading the model checkpoint
         separator = demucs.apply.load_model(name=separation_model_name, device=DEVICE)
+        model_device = next(separator.parameters()).device
         separation_models[SEPARATION_MODEL_KEY] = separator
+        logger_load.info(f"SUCCESS: Separation model '{SEPARATION_MODEL_KEY}' loaded successfully on {model_device}.")
+        logger_load.info(f"Separation model sources: {separator.sources}")
     except Exception as e:
+         logger_load.error(f"FAILED to load separation model '{separation_model_name}'. Error:", exc_info=False)
+         logger_load.error(f"Traceback: {traceback.format_exc()}")
+         logger_load.warning("Ensure the 'demucs' package is installed correctly and the model name is valid (e.g., htdemucs).")
+         logger_load.warning("Separation features will be unavailable.")
+    logger_load.info(f"--- Model loading attempts finished ---")
+    logger_load.info(f"Loaded Enhancement Models: {list(enhancement_models.keys())}")
+    logger_load.info(f"Loaded Separation Models: {list(separation_models.keys())}")
 # --- FastAPI App ---
 app = FastAPI(
     title="AI Audio Editor API",
+    description="API for basic audio editing and AI-powered enhancement & separation. Requires FFmpeg and specific AI libraries.",
+    version="2.1.1", # Incremented version
 )
 @app.on_event("startup")
 async def startup_event():
+    # Use the init logger for startup messages
+    logger_init.info("--- FastAPI Application Startup ---")
+    if AI_LIBS_AVAILABLE:
+        logger_init.info("AI Libraries appear to be available. Proceeding to load models in background thread...")
+        # Run blocking model load in thread
+        await asyncio.to_thread(load_hf_models)
+        logger_init.info("Background model loading task finished (check ModelLoader logs for details).")
+    else:
+        logger_init.error("AI Libraries failed to import. AI features will be disabled.")
+    logger_init.info("--- Startup complete ---")
 # --- API Endpoints ---
     """Root endpoint providing a welcome message and available features."""
     features = ["/trim", "/concat", "/volume", "/convert"]
     ai_features = []
+    # Check loaded models dictionary status
     if enhancement_models: ai_features.append(f"/enhance (model: {ENHANCEMENT_MODEL_KEY})")
+    if separation_models:
+        model = separation_models.get(SEPARATION_MODEL_KEY)
+        sources_str = ', '.join(model.sources) if model else 'N/A'
+        ai_features.append(f"/separate (model: {SEPARATION_MODEL_KEY}, sources: {sources_str})")
     return {
         "message": "Welcome to the AI Audio Editor API.",
+        "status": "AI Libraries Available" if AI_LIBS_AVAILABLE else "AI Libraries Import Failed",
+        "loaded_enhancement_models": list(enhancement_models.keys()),
+        "loaded_separation_models": list(separation_models.keys()),
         "basic_features": features,
         "ai_features": ai_features if ai_features else "None available (check startup logs)",
+        "notes": "Requires FFmpeg. AI features require models to load successfully at startup."
         }
+# --- Basic Editing Endpoints ---
+# (Add /trim, /concat, /volume, /convert endpoints here - unchanged)
 @app.post("/trim", tags=["Basic Editing"])
+async def trim_audio( background_tasks: BackgroundTasks, file: UploadFile = File(...), start_ms: int = Form(...), end_ms: int = Form(...)):
+    if start_ms < 0 or end_ms <= start_ms: raise HTTPException(422, "Invalid start/end times.")
+    input_path = await save_upload_file(file, "trim_in_")
+    background_tasks.add_task(cleanup_file, input_path); output_path = None
     try:
         audio = load_audio_pydub(input_path)
         trimmed_audio = audio[start_ms:end_ms]
+        fmt = os.path.splitext(file.filename)[1][1:].lower() or "mp3"
+        output_path = export_audio_pydub(trimmed_audio, fmt)
+        background_tasks.add_task(cleanup_file, output_path)
+        fname = f"trimmed_{start_ms}-{end_ms}_{os.path.splitext(file.filename)[0]}.{fmt}"
+        return FileResponse(output_path, media_type=f"audio/{fmt}", filename=fname)
     except Exception as e:
         if output_path: cleanup_file(output_path)
+        if isinstance(e, HTTPException): raise e; else: raise HTTPException(500, f"Trim error: {e}")
 @app.post("/concat", tags=["Basic Editing"])
+async def concatenate_audio( background_tasks: BackgroundTasks, files: List[UploadFile] = File(...), output_format: str = Form("mp3")):
+    if len(files) < 2: raise HTTPException(422, "Need at least two files.")
+    input_paths, loaded_audios, output_path = [], [], None
     try:
+        combined = None
         for file in files:
+            ip = await save_upload_file(file, "concat_in_")
+            input_paths.append(ip); background_tasks.add_task(cleanup_file, ip)
+            audio = load_audio_pydub(ip)
+            combined = (combined + audio) if combined else audio
+        if not combined: raise ValueError("No audio loaded.")
+        output_path = export_audio_pydub(combined, output_format)
         background_tasks.add_task(cleanup_file, output_path)
+        fname = f"concat_{os.path.splitext(files[0].filename)[0]}_{len(files)-1}_others.{output_format}"
+        return FileResponse(output_path, media_type=f"audio/{output_format}", filename=fname)
     except Exception as e:
+        for p in input_paths: cleanup_file(p);
         if output_path: cleanup_file(output_path)
+        if isinstance(e, HTTPException): raise e; else: raise HTTPException(500, f"Concat error: {e}")
 @app.post("/volume", tags=["Basic Editing"])
+async def change_volume( background_tasks: BackgroundTasks, file: UploadFile = File(...), change_db: float = Form(...)):
+    input_path = await save_upload_file(file, "volume_in_")
+    background_tasks.add_task(cleanup_file, input_path); output_path = None
     try:
         audio = load_audio_pydub(input_path)
+        adjusted = audio + change_db
+        fmt = os.path.splitext(file.filename)[1][1:].lower() or "mp3"
+        output_path = export_audio_pydub(adjusted, fmt)
         background_tasks.add_task(cleanup_file, output_path)
+        fname = f"volume_{change_db}dB_{os.path.splitext(file.filename)[0]}.{fmt}"
+        return FileResponse(output_path, media_type=f"audio/{fmt}", filename=fname)
     except Exception as e:
         if output_path: cleanup_file(output_path)
+        if isinstance(e, HTTPException): raise e; else: raise HTTPException(500, f"Volume error: {e}")
 @app.post("/convert", tags=["Basic Editing"])
+async def convert_format( background_tasks: BackgroundTasks, file: UploadFile = File(...), output_format: str = Form(...)):
+    allowed = {'mp3', 'wav', 'ogg', 'flac', 'aac', 'm4a', 'opus'}
+    if output_format.lower() not in allowed: raise HTTPException(422, f"Invalid format. Allowed: {allowed}")
+    input_path = await save_upload_file(file, "convert_in_")
+    background_tasks.add_task(cleanup_file, input_path); output_path = None
     try:
         audio = load_audio_pydub(input_path)
         output_path = export_audio_pydub(audio, output_format.lower())
         background_tasks.add_task(cleanup_file, output_path)
+        fname = f"{os.path.splitext(file.filename)[0]}_converted.{output_format.lower()}"
+        return FileResponse(output_path, media_type=f"audio/{output_format.lower()}", filename=fname)
     except Exception as e:
         if output_path: cleanup_file(output_path)
+        if isinstance(e, HTTPException): raise e; else: raise HTTPException(500, f"Convert error: {e}")
+# --- AI Endpoints (Unchanged Functionality, relies on successful loading) ---
 @app.post("/enhance", tags=["AI Editing"])
 async def enhance_speech(
     output_format: str = Form("wav", description="Output format (wav, flac recommended).")
 ):
     """Enhances speech audio using a pre-loaded SpeechBrain model."""
+    if not AI_LIBS_AVAILABLE: raise HTTPException(501,"AI libraries not available.")
     if model_key not in enhancement_models:
          logger.error(f"Enhancement model key '{model_key}' requested but model not loaded.")
+         raise HTTPException(status_code=503, detail=f"Enhancement model '{model_key}' is not loaded or available. Check server startup logs.")
     loaded_model = enhancement_models[model_key]
     logger.info(f"Enhance request: file='{file.filename}', model='{model_key}', format='{output_format}'")
     input_path = await save_upload_file(file, prefix="enhance_in_")
     background_tasks.add_task(cleanup_file, input_path)
     output_path = None
     try:
         audio_tensor, current_sr = load_audio_for_hf(input_path, target_sr=ENHANCEMENT_SR)
         logger.info("Submitting enhancement task to background thread...")
         enhanced_audio_tensor = await asyncio.to_thread(
             _run_enhancement_sync, loaded_model, audio_tensor, current_sr
         )
         logger.info("Enhancement task completed.")
         output_path = save_hf_audio(enhanced_audio_tensor, ENHANCEMENT_SR, output_format)
         background_tasks.add_task(cleanup_file, output_path)
         output_filename=f"enhanced_{os.path.splitext(file.filename)[0]}.{output_format}"
+        return FileResponse(path=output_path, media_type=f"audio/{output_format}", filename=output_filename)
     except Exception as e:
         logger.error(f"Error during enhancement operation: {e}", exc_info=True)
+        if output_path: cleanup_file(output_path)
+        if isinstance(e, HTTPException): raise e; else: raise HTTPException(500, f"Enhancement error: {e}")
 @app.post("/separate", tags=["AI Editing"])
     output_format: str = Form("wav", description="Output format for the stems (wav, flac recommended).")
 ):
     """Separates music into stems using a pre-loaded Demucs model. Returns a ZIP archive."""
+    if not AI_LIBS_AVAILABLE: raise HTTPException(501,"AI libraries not available.")
     if model_key not in separation_models:
          logger.error(f"Separation model key '{model_key}' requested but model not loaded.")
+         raise HTTPException(status_code=503, detail=f"Separation model '{model_key}' is not loaded or available. Check server startup logs.")
     loaded_model = separation_models[model_key]
+    valid_stems = set(loaded_model.sources)
     requested_stems = set(s.lower() for s in stems)
     if not requested_stems.issubset(valid_stems):
+        raise HTTPException(422, f"Invalid stem(s). Model '{model_key}' provides: {valid_stems}")
     logger.info(f"Separate request: file='{file.filename}', model='{model_key}', stems={requested_stems}, format='{output_format}'")
     input_path = await save_upload_file(file, prefix="separate_in_")
     background_tasks.add_task(cleanup_file, input_path)
     stem_output_paths: Dict[str, str] = {}
+    zip_buffer = io.BytesIO(); zipf = None # Define zipf here
     try:
         audio_tensor, current_sr = load_audio_for_hf(input_path, target_sr=DEMUCS_SR)
         logger.info("Submitting separation task to background thread...")
         all_separated_stems_tensors = await asyncio.to_thread(
             _run_separation_sync, loaded_model, audio_tensor, current_sr
         )
         logger.info("Separation task completed.")
+        zipf = zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED)
+        for stem_name in requested_stems:
+            if stem_name in all_separated_stems_tensors:
+                stem_tensor = all_separated_stems_tensors[stem_name]
+                stem_path = save_hf_audio(stem_tensor, DEMUCS_SR, output_format)
+                stem_output_paths[stem_name] = stem_path
+                background_tasks.add_task(cleanup_file, stem_path) # Cleanup after response sent
+                archive_name = f"{stem_name}.{output_format}"
+                zipf.write(stem_path, arcname=archive_name)
+                logger.info(f"Added '{archive_name}' to ZIP.")
+        zipf.close() # Close zip file BEFORE seeking/reading
         zip_buffer.seek(0)
         zip_filename = f"separated_{model_key}_{os.path.splitext(file.filename)[0]}.zip"
         )
     except Exception as e:
         logger.error(f"Error during separation operation: {e}", exc_info=True)
+        # Ensure buffer/zipfile are closed and temp files cleaned up on error
+        if zipf: zipf.close() # Ensure zipfile is closed
+        if zip_buffer: zip_buffer.close()
         for path in stem_output_paths.values(): cleanup_file(path)
         if isinstance(e, HTTPException): raise e
+        else: raise HTTPException(500, f"Separation error: {e}")
+# --- (How to Run instructions remain the same) ---