File size: 2,868 Bytes
6b03889
 
 
 
964a180
6b03889
964a180
6b03889
 
f366264
4b0094d
 
6b03889
 
 
 
 
 
 
 
 
 
 
7431216
6b03889
 
4b0094d
 
 
 
 
12b70c7
4b0094d
e64b40a
4b0094d
 
6b03889
7431216
6b03889
 
7431216
 
 
 
 
 
6b03889
 
 
 
 
 
 
7431216
 
 
6b03889
 
7431216
6b03889
 
 
964a180
cc67ce1
e64b40a
cc67ce1
 
6b03889
 
 
 
 
 
 
f366264
6b03889
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import torch
import gradio as gr
import time
from transformers import AutoTokenizer, pipeline

# Modelo de Meta capaz de traducir a más de 200 idiomas
model = 'facebook/nllb-200-distilled-600M'
tokenizer = model
# Pero como traduce mal al asturiano, añadimos este otro fine-tuned que traduce del español
# Es del proyecto AINA: https://huggingface.co/projecte-aina
model_ast = "projecte-aina/aina-translator-es-ast"

flores_codes = {}
flores_codes["Asturianu"] = "ast_Latn"
flores_codes["Castellano"] = "spa_Latn"
flores_codes["Català"] = "cat_Latn"
flores_codes["English"] = "eng_Latn"
flores_codes["Euskera"] = "eus_Latn"
flores_codes["Galego"] = "glg_Latn"

def translation(source, target, text):

    #start_time = time.time()
    source = flores_codes[source]
    target = flores_codes[target]
    if target == flores_codes["Asturianu"]:
        texto_castellano = source
        if source != flores_codes["Castellano"]:
            translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=flores_codes["Castellano"])
            texto_castellano = translator(text, max_length=400)
        translator_ast = pipeline('translation', model=model_ast, tokenizer=tokenizer, src_lang=flores_codes["Castellano"], tgt_lang=flores_codes["Asturianu"])
        output = translator_ast(text, max_length=400)
    else:
        translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target)
        output = translator(text, max_length=400)

    #end_time = time.time()

    output = output[0]['translation_text']
    #result = {'inference_time': end_time - start_time,
    #          'source': source,
    #          'target': target,
    #          'result': output}
    #return result
    return output;

if __name__ == '__main__':
    print('\tIniciando...')

   
    # define gradio demo
    lang_codes = list(flores_codes.keys())
    inputs = [gr.Dropdown(lang_codes, value='Castellano', label='Idioma original'),
              gr.Dropdown(lang_codes, value='Asturianu', label='Traducir al...'),
              gr.Textbox(label="Texto a traducir"),
              ]

    outputs = [gr.Textbox(label="Texto traducido"),]

    title = "Traductor Multilingüe"

    description = """
        Este traductor utiliza el siguiente modelo de lenguaje de Meta: https://github.com/facebookresearch/fairseq/tree/nllb\n
        Excepto para traducir al asturiano que usa el modelo del  proyecto AINA: https://huggingface.co/projecte-aina/aina-translator-es-ast\n
        Adaptado de: https://huggingface.co/spaces/Azwaw/Text_Translation_Multi-languages
        """


    gr.Interface(translation,
                 inputs,
                 outputs,
                 title=title,
                 description=description,
                 submit_btn="Traducir"
                 ).launch()