Spaces:
Runtime error
Runtime error
File size: 3,772 Bytes
a013c5c c95a8ea db6d318 0db47ae 2b29a37 145304e c95a8ea 2b29a37 da9bcd4 c95a8ea bc0875e 905833b 12dfc24 905833b 24c5e5a 905833b 24c5e5a c95a8ea bba97f2 c95a8ea 7c012d1 c95a8ea 79fe79b 49de2b2 0db47ae 49de2b2 f6ae8b8 0db47ae 0e45975 c95a8ea 5105682 c95a8ea 1651e6e b33e08e 619b882 817d838 c95a8ea 67938d2 817d838 b33e08e 67938d2 49de2b2 c95a8ea 383b08c 0ba8db9 73db0ba 5105682 0ba8db9 4045b93 2b29a37 44f705d 2bd2657 b33e08e 44f705d 2bd2657 1526a67 410f826 c95a8ea 2bd2657 44f705d a013c5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import streamlit as st
import epitran
import langcodes
from langcodes import LanguageTagError
from pathlib import Path
from operator import itemgetter
from collections import defaultdict
# TODO: reverse transliterate?
def get_lang_description_from_mapping_name(string_to_check):
if "generic-Latn" == string_to_check:
return "Generic Latin Script"
if len(string_to_check)<2:
return None
substrings = string_to_check.split("-")
substrings = substrings[:2] # first two are ISO 639-3 language, and ISO 15924 script
string_to_check = "-".join(substrings)
description = None
lang = langcodes.get(string_to_check)
if lang:
items = []
for key, value in lang.describe().items():
if key == "language":
iso_code = lang.to_alpha3()
value = f"[{value}](https://iso639-3.sil.org/code/{iso_code})"
items.append(f"{key}: {value}")
description = ", ".join(items)
if substrings[-1] == "red":
description = description + " (reduced)"
return description
@st.cache
def get_valid_epitran_mappings_list():
map_path = Path(epitran.__path__[0]) / "data" / "map"
map_files = map_path.glob("*.*")
valid_mappings = [map_file.stem for map_file in map_files]
valid_mappings.append("cmn-Hans") # special case
problem_mappings = ['generic-Latn',
'tur-Latn-bab',
'ood-Latn-sax',
'vie-Latn-so',
'vie-Latn-ce',
'vie-Latn-no',
'kaz-Cyrl-bab'] # https://github.com/dmort27/epitran/issues/98
filtered_mappings = [mapping for mapping in valid_mappings if mapping not in problem_mappings]
return filtered_mappings
## TODO: Fix unhashable type in return
#@st.cache
#def get_epitran(selected_mapping):
# if selected_mapping == "cmn-Hans":
## st.info("Chinese requires a special dictionary. Downloading now")
# epitran.download.cedict()
#
# epi = epitran.Epitran(selected_mapping)
# return epi
st.write("# Phonemize your text with [Epitran](https://github.com/dmort27/epitran)!")
st.write("Epitran is a library and tool for transliterating orthographic text as IPA (International Phonetic Alphabet), by Mortensen, David R. and Dalmia, Siddharth and Littell, Patrick.")
valid_epitran_mappings = get_valid_epitran_mappings_list()
index_of_default_language = valid_epitran_mappings.index("swa-Latn")
st.write(f"It supports converting many writing sytems to IPA symbols, including approximately {len(valid_epitran_mappings)} languages/scripts, listed below:")
#st.write(valid_epitran_mappings)
selected_mapping = st.selectbox("Select input language/script:", valid_epitran_mappings, index=index_of_default_language)
description = get_lang_description_from_mapping_name(selected_mapping)
st.write(f"Selected input language/script: {description}")
st.info("attempting to instantiate Epitran for your language/script")
if selected_mapping == "cmn-Hans":
st.info("Chinese requires a special dictionary. Downloading now")
epitran.download.cedict()
epi = epitran.Epitran(selected_mapping)
examples = defaultdict(lambda: 'Try typing some words in the language you chose, and they will be transliterated.')
examples['cmn-Hans'] = '太初有道,道与神同在,道就是神。'
examples['swa-Latn'] = 'Mwanzoni Kabla ulimwengu haujaumbwa alikuwepo Neno Huyo Neno alikuwa pamoja na Mungu, na Neno alikuwa Mungu.'
input_text = st.text_area(label="Whatever you type here will be transliterated!", value=examples[selected_mapping])
st.info(f"transliterating `{input_text}`\n\tusing {selected_mapping}...")
transliteration = epi.transliterate(input_text)
output = {
"original": input_text,
"transliteration":transliteration,
}
st.write(output)
|