Spaces:

clr
/

pce

Sleeping

App Files Files Community

catiR commited on Oct 19, 2023

Commit

e2901c5

1 Parent(s): 0d95ee8

run clustering

Browse files

Files changed (1) hide show

scripts/runSQ.py +8 -36

scripts/runSQ.py CHANGED Viewed

@@ -192,12 +192,19 @@ def f0_tts(sentence, voices, ttsdir, reaper_path = "REAPER/build/reaper"):
         fpath = f'{ttsdir}{dpath}/{v}.f0'
         if not os.path.exists(fpath):
             no_f0.append(v)
     if no_f0:
         print(f'Need to estimate pitch for {len(no_f0)} voices')
         for v in voices:
             wav_path = f'{ttsdir}{dpath}/{v}.wav'
             fpath = f'{ttsdir}{dpath}/{v}.f0'
             f0_data = estimate_pitch(wav_path, reaper_path)
             save_pitch(f0_data,fpath)
@@ -251,41 +258,6 @@ def localtest():
 # https://colab.research.google.com/drive/1RApnJEocx3-mqdQC2h5SH8vucDkSlQYt?authuser=1#scrollTo=410ecd91fa29bc73
-# CLUSTER the humans
-# - read energy and pitch, to alignments
-# - dtw based with selected chunking ? code should exist.
-# ... experimental variants?
-# ** 1 dimension at a time vs 2 on top of each other
-# ** 25 points resampling (euclidean, kmeans, i guess....) vs all points dtw kmediods
-#  +/or maybe some intermediate parts of that??? like 25 points dtw medoids particularly **
-# --different normings for pitch? different settings for energy (tbqh i hope not too much?)
-# TODO '''replacement with a constant low value''' ********
-# errrrrrrrm duration?
-#   duration feature vector will have a different length than the others, BUT,
- # besides the single clustering,,
- # i SUPPOSE one could TRY assigning the phone's 'speech rate' value to every frame of the phone, so it doesn't change while the other 2 values do change.... like it would still VAGUELY represent that 2 people elongating the same vowel/syllable are doing similar things with duration while someone eliding that vowel is doing a different durational thing right there?
- # might want to z-score this dimension across ALL speakers tho not within a speaker
-  # try doing it both ways at least. bc not sure to what extent i want absolute vs. relative rate info here.
-   #(note - unless chengs dur metric is of a kind where only rel makes sense in the first place. idr.)
-# GRAPH the humans.
-# - probably modify this code a bit to centre on boundary.
-# - idk.
-# TEST each TTS
-# - structure its features
-# - find its avg dist for each human cluster
-# - find the lowest dist cluster
-# - report the dist for i guess this and all clusters
-# - GRAPH the tts with its best cluster
 # EVALUATION
 # - of the tts
 # - of the method: consistency? coherency / interpretability of 'best' voice across different features; alt. ability to recover good & problematic features from a combined method if that is chosen as the best?

         fpath = f'{ttsdir}{dpath}/{v}.f0'
         if not os.path.exists(fpath):
             no_f0.append(v)
+    ttt = subprocess.run(["ls", "-la", "ttsdir"], capture_output=True, text=True)
+    print('LS::', ttt.stdout)
     if no_f0:
         print(f'Need to estimate pitch for {len(no_f0)} voices')
         for v in voices:
             wav_path = f'{ttsdir}{dpath}/{v}.wav'
             fpath = f'{ttsdir}{dpath}/{v}.f0'
+            print(wav_path)
+            print(fpath)
             f0_data = estimate_pitch(wav_path, reaper_path)
             save_pitch(f0_data,fpath)
 # https://colab.research.google.com/drive/1RApnJEocx3-mqdQC2h5SH8vucDkSlQYt?authuser=1#scrollTo=410ecd91fa29bc73
 # EVALUATION
 # - of the tts
 # - of the method: consistency? coherency / interpretability of 'best' voice across different features; alt. ability to recover good & problematic features from a combined method if that is chosen as the best?