shukdevdatta123 commited on
Commit
99ef324
·
verified ·
1 Parent(s): 4ee577e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -37,27 +37,31 @@ def preprocess_audio_to_npz(audio_path):
37
  # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
38
  audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
39
 
40
- # Ensure audio is a float32 array
41
  audio = audio.astype(np.float32)
42
 
43
  with torch.device("cpu"):
44
- # Generate dummy semantic tokens using generate_text_semantic
45
  dummy_text = "Dummy text for history prompt generation."
46
  semantic_tokens = generate_text_semantic(
47
  text=dummy_text,
 
48
  temp=0.7,
49
  silent=True
50
  )
51
 
52
- # Ensure semantic_tokens is a numpy array with correct shape
53
  semantic_tokens = np.array(semantic_tokens, dtype=np.int64)
54
- if semantic_tokens.ndim == 0:
55
- semantic_tokens = semantic_tokens.reshape(-1)
56
 
57
- # Coarse and fine prompts are derived from semantic tokens
58
- # Bark often uses similar tokens for coarse and fine prompts
59
- coarse_tokens = semantic_tokens # Simplified assumption
60
- fine_tokens = semantic_tokens # Simplified assumption
 
 
 
61
 
62
  # Create history prompt dictionary
63
  history_prompt = {
 
37
  # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
38
  audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
39
 
40
+ # Ensure audio is a float32 array (for potential future use)
41
  audio = audio.astype(np.float32)
42
 
43
  with torch.device("cpu"):
44
+ # Generate semantic tokens using generate_text_semantic
45
  dummy_text = "Dummy text for history prompt generation."
46
  semantic_tokens = generate_text_semantic(
47
  text=dummy_text,
48
+ max_gen_len=512,
49
  temp=0.7,
50
  silent=True
51
  )
52
 
53
+ # Ensure semantic_tokens is a 1D numpy array of int64
54
  semantic_tokens = np.array(semantic_tokens, dtype=np.int64)
55
+ if semantic_tokens.ndim != 1:
56
+ semantic_tokens = semantic_tokens.flatten()
57
 
58
+ # Simulate coarse tokens (typically shorter or quantized version of semantic tokens)
59
+ coarse_tokens = semantic_tokens[:256] # Truncate to simulate coarse quantization
60
+ coarse_tokens = np.array(coarse_tokens, dtype=np.int64)
61
+
62
+ # Simulate fine tokens (often similar to coarse tokens in Bark)
63
+ fine_tokens = coarse_tokens.copy() # Simplified assumption
64
+ fine_tokens = np.array(fine_tokens, dtype=np.int64)
65
 
66
  # Create history prompt dictionary
67
  history_prompt = {