File size: 1,043 Bytes
7105a54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
CONDA_ROOT=/home/$(whoami)/miniconda3

source ${CONDA_ROOT}/etc/profile.d/conda.sh
conda activate contentvec

mkdir -p feature/lab

# Generate manifest files
python3 fairseq/examples/wav2vec/wav2vec_manifest.py dataset --dest feature --valid-percent 0.1

# Filter out files with silence and update manifests
python remove_silence_files.py feature/train.tsv feature/valid.tsv feature/filtered

cp feature/filtered/train.tsv feature/lab/train.tsv
cp feature/filtered/valid.tsv feature/lab/valid.tsv

# Continue with feature extraction
rm -rf fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py
cp dump_hubert_feature.py fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py

tsv_dir="feature/lab"
split="train"
ckpt_path="checkpoint_best_legacy_500.pt"
layer=12
nshard=1
rank=0
feat_dir="feature"
km_path="feature/${split}.km"
lab_dir="feature/lab"
n_clusters=100

python speaker.py

# Extract features
python fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py $tsv_dir $split $ckpt_path $layer $nshard $rank $feat_dir