File size: 3,047 Bytes
f03ee05 73dfaab f03ee05 8e03cef f03ee05 73dfaab f03ee05 277e947 21363bd 277e947 f03ee05 73dfaab 896d88b 73dfaab f03ee05 8e03cef f03ee05 d536a72 e97a1f1 5b98a9c 8e03cef ce13d97 8e03cef d7da2d0 8e03cef 0b739e4 fc28430 8e03cef f03ee05 742d18f f03ee05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import os
import time
import requests
import streamlit as st
API_URL = "https://api-inference.huggingface.co/models/pere/nb-nn-translation"
def translate(text, wait=True):
headers = {"Authorization": f"Bearer {os.environ['BEARER']}"}
payload = {
"inputs": text,
"options": {
"wait_for_model": not wait
}
}
response = requests.post(API_URL, headers=headers, json=payload)
json_response = response.json()
if (isinstance(json_response, dict)
and "error" in json_response
and "estimated_time" in json_response):
st.write(json_response)
if wait:
with st.spinner(json_response["error"]):
bar = st.progress(0)
time_to_load = int(json_response["estimated_time"]) + 1
for progress in range(time_to_load):
bar.progress(progress / time_to_load)
time.sleep(1)
bar.empty()
return translate(text, wait=False)
else:
return "We could not load the model"
elif (isinstance(json_response, list)
and "translation_text" in json_response[0]):
return json_response[0]["translation_text"]
else:
return f"Oops, something went wrong: {str(json_response)}"
st.set_page_config(
page_title='Norwegian Bokmål to Nynorsk',
page_icon='translator-icon.png',
)
st.title("Bokmål ⇔ Nynorsk")
st.sidebar.title("Translation Demo")
st.sidebar.write("""
Here are some sample texts in Norwegian Bokmål and Norwegian Nynorsk that you can try to translate. They are here presented in pairs (Bokmål, Nynorsk, Bokmål...). This way you can also see a suggested translation of the text. The model will automatically understand if the input is in Nynorsk or Bokmål, and switch the translation.
""")
masked_texts = [
"Hvordan kan man lære maskinen å oversette?",
"Korleis kan ein lære maskinen å omsetje?"
"Hun vil ikke gi bort sine personlige data.",
"Ho vil ikkje gi bort dei personlege dataa sine.",
"Bilen er rød. Den er rask, og hun kjører den veldig raskt.",
"Bilen er raud. Han er rask, og ho køyrer han veldig raskt..",
"De ventes å ankomme ganske snart.",
"Dei blir venta å komme ganske snart.",
]
input_text = st.sidebar.selectbox("Select a Text", options=masked_texts)
st.sidebar.write("""
As you can see there are a lot of similarities between the languages. Since there also are some grammatical differences, simple dictionary replacements do not give a good result. A finetuned model on top of a pretrained t5-base from a balanced corpus, seem to solve the task with a SACREBLEU-score of 88.17.
""")
text = st.text_area(" ",
input_text,
height=None,
max_chars=None,
key=None,
help="Enter your text here",
)
if st.button('Translate'):
if str(text).strip() == "":
st.warning('Please **enter text** for translation')
else:
st.info(str(translate(text)))
|