shukdevdatta123 commited on
Commit
7666acf
·
verified ·
1 Parent(s): e25f277

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -34
app.py CHANGED
@@ -34,40 +34,42 @@ def preprocess_audio_to_npz(audio_path):
34
  Returns:
35
  str: Path to the generated .npz file.
36
  """
37
- # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
38
- audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
39
-
40
- # Ensure audio is a float32 array
41
- audio = audio.astype(np.float32)
42
-
43
- # Tokenize and process through HuBERT for semantic tokens
44
- hubert_manager = load_model(model_type="hubert", device="cpu")
45
- hubert_tokenizer = load_model(model_type="hubert_tokenizer", device="cpu")
46
-
47
- # Generate semantic tokens
48
- tokens = _tokenize(audio, hubert_manager, hubert_tokenizer)
49
- semantic_tokens = tokens[0] # Extract semantic tokens
50
-
51
- # Load coarse model for coarse tokens
52
- coarse_model = load_model(model_type="coarse", device="cpu")
53
-
54
- # Generate coarse tokens
55
- coarse_tokens = generate_text_semantic(
56
- semantic_tokens=semantic_tokens,
57
- model=coarse_model,
58
- max_gen_len=512
59
- )
60
-
61
- # Create history prompt dictionary
62
- history_prompt = {
63
- "semantic_prompt": semantic_tokens,
64
- "coarse_prompt": coarse_tokens
65
- }
66
-
67
- # Save to temporary .npz file
68
- with tempfile.NamedTemporaryFile(suffix=".npz", delete=False) as temp_file:
69
- np.savez(temp_file.name, **history_prompt)
70
- npz_path = temp_file.name
 
 
71
 
72
  return npz_path
73
 
 
34
  Returns:
35
  str: Path to the generated .npz file.
36
  """
37
+ # Set device to CPU
38
+ with torch.device("cpu"):
39
+ # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
40
+ audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
41
+
42
+ # Ensure audio is a float32 array
43
+ audio = audio.astype(np.float32)
44
+
45
+ # Load HuBERT models for semantic token extraction
46
+ hubert_manager = load_model(model_type="hubert")
47
+ hubert_tokenizer = load_model(model_type="hubert_tokenizer")
48
+
49
+ # Generate semantic tokens
50
+ tokens = _tokenize(audio, hubert_manager, hubert_tokenizer)
51
+ semantic_tokens = tokens[0] # Extract semantic tokens
52
+
53
+ # Load coarse model for coarse tokens
54
+ coarse_model = load_model(model_type="coarse")
55
+
56
+ # Generate coarse tokens
57
+ coarse_tokens = generate_text_semantic(
58
+ semantic_tokens=semantic_tokens,
59
+ model=coarse_model,
60
+ max_gen_len=512
61
+ )
62
+
63
+ # Create history prompt dictionary
64
+ history_prompt = {
65
+ "semantic_prompt": semantic_tokens,
66
+ "coarse_prompt": coarse_tokens
67
+ }
68
+
69
+ # Save to temporary .npz file
70
+ with tempfile.NamedTemporaryFile(suffix=".npz", delete=False) as temp_file:
71
+ np.savez(temp_file.name, **history_prompt)
72
+ npz_path = temp_file.name
73
 
74
  return npz_path
75