Update js-esm/text_to_arpa.js
Browse files- js-esm/text_to_arpa.js +91 -91
js-esm/text_to_arpa.js
CHANGED
|
@@ -1,92 +1,92 @@
|
|
| 1 |
-
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
async function text2text_generation(words,convert_ipa=false) {
|
| 5 |
-
const generator = await pipeline('text2text-generation', 'mini-bart-g2p',{quantized: false});
|
| 6 |
-
const inputTexts = words;
|
| 7 |
-
const options = { max_new_tokens: 100 };
|
| 8 |
-
const outputs = await generator(inputTexts, options);
|
| 9 |
-
|
| 10 |
-
if (convert_ipa){
|
| 11 |
-
const ipas = []
|
| 12 |
-
outputs.forEach(output => {
|
| 13 |
-
const ipa = arpa_to_ipa(output.generated_text).replace(/\s/g, "")
|
| 14 |
-
ipas.push(ipa)
|
| 15 |
-
});
|
| 16 |
-
return ipas
|
| 17 |
-
}else{
|
| 18 |
-
return outputs //arpa
|
| 19 |
-
}
|
| 20 |
-
|
| 21 |
-
}
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
async function textToArpa(cmudict,text,replace_questions=false){
|
| 25 |
-
if (replace_questions){
|
| 26 |
-
text = text.replaceAll("!",".").replaceAll("?",".")
|
| 27 |
-
}
|
| 28 |
-
const cleanedString = text.replace(/[^a-zA-Z0-9.,!? ]/g, '');
|
| 29 |
-
|
| 30 |
-
const dict = wordsToArpa(cmudict,cleanedString)
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
const result = dict["result"]
|
| 35 |
-
const non_converted = dict["non_converted"]
|
| 36 |
-
|
| 37 |
-
let arpa_text = result.join(" ");
|
| 38 |
-
|
| 39 |
-
//console.log(non_converted.length)
|
| 40 |
-
if (non_converted.length > 0){
|
| 41 |
-
console.log("non_converted length = "+non_converted.length)
|
| 42 |
-
const arpas = await text2text_generation(non_converted)
|
| 43 |
-
console.log(arpas)
|
| 44 |
-
for (let i = 0; i < non_converted.length; i++) {
|
| 45 |
-
const word = non_converted[i]
|
| 46 |
-
const arpa = arpas[i].generated_text
|
| 47 |
-
console.log("@"+word,arpa)
|
| 48 |
-
arpa_text = arpa_text.replace("@"+word,arpa)
|
| 49 |
-
}
|
| 50 |
-
}
|
| 51 |
-
return arpa_text
|
| 52 |
-
|
| 53 |
-
}
|
| 54 |
-
|
| 55 |
-
function get_arpa(cmudict,word){
|
| 56 |
-
return cmudict[word.toUpperCase()]
|
| 57 |
-
}
|
| 58 |
-
|
| 59 |
-
function wordsToArpa(cmudict,text){
|
| 60 |
-
var keep_words = [",",".","!","?"]
|
| 61 |
-
let inputText = text.toUpperCase()
|
| 62 |
-
keep_words.forEach(function(key){
|
| 63 |
-
inputText = inputText.replaceAll(key," "+key+" ");
|
| 64 |
-
});
|
| 65 |
-
//console.log(`replaced ${inputText}`)
|
| 66 |
-
|
| 67 |
-
let result = []
|
| 68 |
-
let non_converted = []
|
| 69 |
-
var words = inputText.split(" ")
|
| 70 |
-
|
| 71 |
-
words.forEach(word => {
|
| 72 |
-
|
| 73 |
-
if (keep_words.includes(word)){//,.!? just keep
|
| 74 |
-
result.push(word)
|
| 75 |
-
}else if (word ==""){
|
| 76 |
-
|
| 77 |
-
}else{
|
| 78 |
-
const arpa = get_arpa(cmudict,word)
|
| 79 |
-
|
| 80 |
-
if (typeof arpa == "undefined"){
|
| 81 |
-
result.push("@"+word)
|
| 82 |
-
non_converted.push(word)
|
| 83 |
-
}else{
|
| 84 |
-
result.push(arpa)
|
| 85 |
-
}
|
| 86 |
-
}
|
| 87 |
-
});
|
| 88 |
-
|
| 89 |
-
return {"result":result,"non_converted":non_converted}
|
| 90 |
-
}
|
| 91 |
-
|
| 92 |
export{env,textToArpa}
|
|
|
|
| 1 |
+
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
|
| 2 |
+
env.backends.onnx.logSeverityLevel = 3
|
| 3 |
+
|
| 4 |
+
async function text2text_generation(words,convert_ipa=false) {
|
| 5 |
+
const generator = await pipeline('text2text-generation', 'mini-bart-g2p',{quantized: false});
|
| 6 |
+
const inputTexts = words;
|
| 7 |
+
const options = { max_new_tokens: 100 };
|
| 8 |
+
const outputs = await generator(inputTexts, options);
|
| 9 |
+
|
| 10 |
+
if (convert_ipa){
|
| 11 |
+
const ipas = []
|
| 12 |
+
outputs.forEach(output => {
|
| 13 |
+
const ipa = arpa_to_ipa(output.generated_text).replace(/\s/g, "")
|
| 14 |
+
ipas.push(ipa)
|
| 15 |
+
});
|
| 16 |
+
return ipas
|
| 17 |
+
}else{
|
| 18 |
+
return outputs //arpa
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
async function textToArpa(cmudict,text,replace_questions=false){
|
| 25 |
+
if (replace_questions){
|
| 26 |
+
text = text.replaceAll("!",".").replaceAll("?",".")
|
| 27 |
+
}
|
| 28 |
+
const cleanedString = text.replace(/[^a-zA-Z0-9.,!? ]/g, '');
|
| 29 |
+
|
| 30 |
+
const dict = wordsToArpa(cmudict,cleanedString)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
const result = dict["result"]
|
| 35 |
+
const non_converted = dict["non_converted"]
|
| 36 |
+
|
| 37 |
+
let arpa_text = result.join(" ");
|
| 38 |
+
|
| 39 |
+
//console.log(non_converted.length)
|
| 40 |
+
if (non_converted.length > 0){
|
| 41 |
+
console.log("non_converted length = "+non_converted.length)
|
| 42 |
+
const arpas = await text2text_generation(non_converted)
|
| 43 |
+
console.log(arpas)
|
| 44 |
+
for (let i = 0; i < non_converted.length; i++) {
|
| 45 |
+
const word = non_converted[i]
|
| 46 |
+
const arpa = arpas[i].generated_text
|
| 47 |
+
console.log("@"+word,arpa)
|
| 48 |
+
arpa_text = arpa_text.replace("@"+word,arpa)
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
return arpa_text
|
| 52 |
+
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
function get_arpa(cmudict,word){
|
| 56 |
+
return cmudict[word.toUpperCase()]
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
function wordsToArpa(cmudict,text){
|
| 60 |
+
var keep_words = [",",".","!","?"]
|
| 61 |
+
let inputText = text.toUpperCase()
|
| 62 |
+
keep_words.forEach(function(key){
|
| 63 |
+
inputText = inputText.replaceAll(key," "+key+" ");
|
| 64 |
+
});
|
| 65 |
+
//console.log(`replaced ${inputText}`)
|
| 66 |
+
|
| 67 |
+
let result = []
|
| 68 |
+
let non_converted = []
|
| 69 |
+
var words = inputText.split(" ")
|
| 70 |
+
|
| 71 |
+
words.forEach(word => {
|
| 72 |
+
|
| 73 |
+
if (keep_words.includes(word)){//,.!? just keep
|
| 74 |
+
result.push(word)
|
| 75 |
+
}else if (word ==""){
|
| 76 |
+
|
| 77 |
+
}else{
|
| 78 |
+
const arpa = get_arpa(cmudict,word)
|
| 79 |
+
|
| 80 |
+
if (typeof arpa == "undefined"){
|
| 81 |
+
result.push("@"+word)
|
| 82 |
+
non_converted.push(word)
|
| 83 |
+
}else{
|
| 84 |
+
result.push(arpa)
|
| 85 |
+
}
|
| 86 |
+
}
|
| 87 |
+
});
|
| 88 |
+
|
| 89 |
+
return {"result":result,"non_converted":non_converted}
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
export{env,textToArpa}
|