Sin2pi commited on
Commit
606e3d4
·
verified ·
1 Parent(s): f7847b1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -1
README.md CHANGED
@@ -21,7 +21,7 @@ tags:
21
 
22
  ---
23
 
24
- ASR model
25
 
26
  <img width="1363" height="732" alt="pitch_spectrogram" src="https://github.com/user-attachments/assets/ceb65e94-7df4-41b7-aa3d-c4aa4c6c0717" />
27
 
@@ -89,8 +89,22 @@ Reference: [PyTorch Documentation - torch.polar]https:pytorch.orgdocsstablegener
89
  # freqs = pos / (self.theta ** (dim / self.head_dim))
90
  # dim = torch.arange(0, self.head_dim, 2, dtype=torch.float32, device=device)
91
  ```
 
 
 
 
 
 
 
 
 
 
 
92
 
 
 
93
 
 
94
  def _apply_radii(self, freqs, f0, ctx):
95
  if self.radii and f0 is not None:
96
  radius = f0.to(device, dtype)
@@ -239,3 +253,4 @@ The Complex Frequency Result:
239
 
240
 
241
 
 
 
21
 
22
  ---
23
 
24
+ ASR model
25
 
26
  <img width="1363" height="732" alt="pitch_spectrogram" src="https://github.com/user-attachments/assets/ceb65e94-7df4-41b7-aa3d-c4aa4c6c0717" />
27
 
 
89
  # freqs = pos / (self.theta ** (dim / self.head_dim))
90
  # dim = torch.arange(0, self.head_dim, 2, dtype=torch.float32, device=device)
91
  ```
92
+ # 200Hz - 4000Hz (covers 95% of speech content)
93
+ freqs = (self.theta / 220.0) * 200 * (torch.pow(10, torch.linspace(0, 2595 * torch.log10(torch.tensor(1 + 4000/200)), self.head_dim // 2, device=device, dtype=dtype) / 2595) - 1) / 1000
94
+
95
+ # 150Hz - 6000Hz (covers speech + some emotion/intonation)
96
+ freqs = (self.theta / 220.0) * 150 * (torch.pow(10, torch.linspace(0, 2595 * torch.log10(torch.tensor(1 + 6000/150)), self.head_dim // 2, device=device, dtype=dtype) / 2595) - 1) / 1000
97
+
98
+ # 80Hz - 2000Hz (focus on fundamental frequencies + first few harmonics)
99
+ freqs = (self.theta / 220.0) * 80 * (torch.pow(10, torch.linspace(0, 2595 * torch.log10(torch.tensor(1 + 2000/80)), self.head_dim // 2, device=device, dtype=dtype) / 2595) - 1) / 1000
100
+
101
+ # original
102
+ freqs = (self.theta / 220.0) * 700 * (torch.pow(10, torch.linspace(0, 2595 * torch.log10(torch.tensor(1 + 8000/700)), self.head_dim // 2, device=device, dtype=dtype) / 2595) - 1) / 1000
103
 
104
+ Standard RoPE: 1, 0.1, 0.01, 0.001... (arbitrary geometric)
105
+ This RoPE: 80Hz, 100Hz, 140Hz... (perceptually meaningful)
106
 
107
+ ----
108
  def _apply_radii(self, freqs, f0, ctx):
109
  if self.radii and f0 is not None:
110
  radius = f0.to(device, dtype)
 
253
 
254
 
255
 
256
+