Update README.md
Browse files
README.md
CHANGED
@@ -21,7 +21,7 @@ tags:
|
|
21 |
|
22 |
---
|
23 |
|
24 |
-
ASR model
|
25 |
|
26 |
<img width="1363" height="732" alt="pitch_spectrogram" src="https://github.com/user-attachments/assets/ceb65e94-7df4-41b7-aa3d-c4aa4c6c0717" />
|
27 |
|
@@ -89,8 +89,22 @@ Reference: [PyTorch Documentation - torch.polar]https:pytorch.orgdocsstablegener
|
|
89 |
# freqs = pos / (self.theta ** (dim / self.head_dim))
|
90 |
# dim = torch.arange(0, self.head_dim, 2, dtype=torch.float32, device=device)
|
91 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
|
|
|
|
93 |
|
|
|
94 |
def _apply_radii(self, freqs, f0, ctx):
|
95 |
if self.radii and f0 is not None:
|
96 |
radius = f0.to(device, dtype)
|
@@ -239,3 +253,4 @@ The Complex Frequency Result:
|
|
239 |
|
240 |
|
241 |
|
|
|
|
21 |
|
22 |
---
|
23 |
|
24 |
+
ASR model
|
25 |
|
26 |
<img width="1363" height="732" alt="pitch_spectrogram" src="https://github.com/user-attachments/assets/ceb65e94-7df4-41b7-aa3d-c4aa4c6c0717" />
|
27 |
|
|
|
89 |
# freqs = pos / (self.theta ** (dim / self.head_dim))
|
90 |
# dim = torch.arange(0, self.head_dim, 2, dtype=torch.float32, device=device)
|
91 |
```
|
92 |
+
# 200Hz - 4000Hz (covers 95% of speech content)
|
93 |
+
freqs = (self.theta / 220.0) * 200 * (torch.pow(10, torch.linspace(0, 2595 * torch.log10(torch.tensor(1 + 4000/200)), self.head_dim // 2, device=device, dtype=dtype) / 2595) - 1) / 1000
|
94 |
+
|
95 |
+
# 150Hz - 6000Hz (covers speech + some emotion/intonation)
|
96 |
+
freqs = (self.theta / 220.0) * 150 * (torch.pow(10, torch.linspace(0, 2595 * torch.log10(torch.tensor(1 + 6000/150)), self.head_dim // 2, device=device, dtype=dtype) / 2595) - 1) / 1000
|
97 |
+
|
98 |
+
# 80Hz - 2000Hz (focus on fundamental frequencies + first few harmonics)
|
99 |
+
freqs = (self.theta / 220.0) * 80 * (torch.pow(10, torch.linspace(0, 2595 * torch.log10(torch.tensor(1 + 2000/80)), self.head_dim // 2, device=device, dtype=dtype) / 2595) - 1) / 1000
|
100 |
+
|
101 |
+
# original
|
102 |
+
freqs = (self.theta / 220.0) * 700 * (torch.pow(10, torch.linspace(0, 2595 * torch.log10(torch.tensor(1 + 8000/700)), self.head_dim // 2, device=device, dtype=dtype) / 2595) - 1) / 1000
|
103 |
|
104 |
+
Standard RoPE: 1, 0.1, 0.01, 0.001... (arbitrary geometric)
|
105 |
+
This RoPE: 80Hz, 100Hz, 140Hz... (perceptually meaningful)
|
106 |
|
107 |
+
----
|
108 |
def _apply_radii(self, freqs, f0, ctx):
|
109 |
if self.radii and f0 is not None:
|
110 |
radius = f0.to(device, dtype)
|
|
|
253 |
|
254 |
|
255 |
|
256 |
+
|