File size: 3,047 Bytes
f03ee05
73dfaab
f03ee05
 
 
 
8e03cef
f03ee05
73dfaab
 
f03ee05
 
 
277e947
21363bd
277e947
f03ee05
73dfaab
 
 
 
 
 
 
 
 
896d88b
 
 
73dfaab
 
 
 
 
 
 
 
 
 
f03ee05
 
 
8e03cef
f03ee05
 
d536a72
e97a1f1
5b98a9c
8e03cef
 
ce13d97
8e03cef
 
 
 
 
 
 
 
 
 
 
d7da2d0
8e03cef
 
0b739e4
 
fc28430
8e03cef
f03ee05
 
 
 
 
 
742d18f
 
 
 
 
f03ee05
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import time

import requests
import streamlit as st

API_URL = "https://api-inference.huggingface.co/models/pere/nb-nn-translation"


def translate(text, wait=True):
    headers = {"Authorization": f"Bearer {os.environ['BEARER']}"}
    payload  = {
        "inputs": text,
        "options": {
            "wait_for_model": not wait
        }
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    json_response = response.json()
    if (isinstance(json_response, dict)
        and "error" in json_response
        and "estimated_time" in json_response):
        st.write(json_response)
        if wait:
            with st.spinner(json_response["error"]):
                bar = st.progress(0)
                time_to_load = int(json_response["estimated_time"]) + 1
                for progress in range(time_to_load):
                    bar.progress(progress / time_to_load)
                    time.sleep(1)
                bar.empty()
            return translate(text, wait=False)
        else:
            return "We could not load the model"
    elif (isinstance(json_response, list)
        and "translation_text" in json_response[0]):
        return json_response[0]["translation_text"]
    else:
        return f"Oops, something went wrong: {str(json_response)}"


st.set_page_config(
    page_title='Norwegian Bokmål to Nynorsk',
    page_icon='translator-icon.png',
)
st.title("Bokmål ⇔ Nynorsk")
st.sidebar.title("Translation Demo")
st.sidebar.write("""
Here are some sample texts in Norwegian Bokmål and Norwegian Nynorsk that you can try to translate. They are here presented in pairs (Bokmål, Nynorsk, Bokmål...). This way you can also see a suggested translation of the text. The model will automatically understand if the input is in Nynorsk or Bokmål, and switch the translation.
""")

masked_texts = [
        "Hvordan kan man lære maskinen å oversette?",
        "Korleis kan ein lære maskinen å omsetje?"
        "Hun vil ikke gi bort sine personlige data.",
        "Ho vil ikkje gi bort dei personlege dataa sine.",
        "Bilen er rød. Den er rask, og hun kjører den veldig raskt.",
        "Bilen er raud. Han er rask, og ho køyrer han veldig raskt..",
        "De ventes å ankomme ganske snart.",
        "Dei blir venta å komme ganske snart.",
        ]
input_text = st.sidebar.selectbox("Select a Text", options=masked_texts)

st.sidebar.write("""
As you can see there are a lot of similarities between the languages. Since there also are some grammatical differences, simple dictionary replacements do not give a good result. A finetuned model on top of a pretrained t5-base from a balanced corpus, seem to solve the task with a SACREBLEU-score of 88.17.
""")

text = st.text_area(" ",
    input_text,
    height=None,
    max_chars=None,
    key=None,
    help="Enter your text here",
)

if st.button('Translate'):
    if str(text).strip() == "":
        st.warning('Please **enter text** for translation')
    else:
        st.info(str(translate(text)))