Spaces:
Sleeping
Sleeping
File size: 2,868 Bytes
6b03889 964a180 6b03889 964a180 6b03889 f366264 4b0094d 6b03889 7431216 6b03889 4b0094d 12b70c7 4b0094d e64b40a 4b0094d 6b03889 7431216 6b03889 7431216 6b03889 7431216 6b03889 7431216 6b03889 964a180 cc67ce1 e64b40a cc67ce1 6b03889 f366264 6b03889 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import os
import torch
import gradio as gr
import time
from transformers import AutoTokenizer, pipeline
# Modelo de Meta capaz de traducir a más de 200 idiomas
model = 'facebook/nllb-200-distilled-600M'
tokenizer = model
# Pero como traduce mal al asturiano, añadimos este otro fine-tuned que traduce del español
# Es del proyecto AINA: https://huggingface.co/projecte-aina
model_ast = "projecte-aina/aina-translator-es-ast"
flores_codes = {}
flores_codes["Asturianu"] = "ast_Latn"
flores_codes["Castellano"] = "spa_Latn"
flores_codes["Català"] = "cat_Latn"
flores_codes["English"] = "eng_Latn"
flores_codes["Euskera"] = "eus_Latn"
flores_codes["Galego"] = "glg_Latn"
def translation(source, target, text):
#start_time = time.time()
source = flores_codes[source]
target = flores_codes[target]
if target == flores_codes["Asturianu"]:
texto_castellano = source
if source != flores_codes["Castellano"]:
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=flores_codes["Castellano"])
texto_castellano = translator(text, max_length=400)
translator_ast = pipeline('translation', model=model_ast, tokenizer=tokenizer, src_lang=flores_codes["Castellano"], tgt_lang=flores_codes["Asturianu"])
output = translator_ast(text, max_length=400)
else:
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target)
output = translator(text, max_length=400)
#end_time = time.time()
output = output[0]['translation_text']
#result = {'inference_time': end_time - start_time,
# 'source': source,
# 'target': target,
# 'result': output}
#return result
return output;
if __name__ == '__main__':
print('\tIniciando...')
# define gradio demo
lang_codes = list(flores_codes.keys())
inputs = [gr.Dropdown(lang_codes, value='Castellano', label='Idioma original'),
gr.Dropdown(lang_codes, value='Asturianu', label='Traducir al...'),
gr.Textbox(label="Texto a traducir"),
]
outputs = [gr.Textbox(label="Texto traducido"),]
title = "Traductor Multilingüe"
description = """
Este traductor utiliza el siguiente modelo de lenguaje de Meta: https://github.com/facebookresearch/fairseq/tree/nllb\n
Excepto para traducir al asturiano que usa el modelo del proyecto AINA: https://huggingface.co/projecte-aina/aina-translator-es-ast\n
Adaptado de: https://huggingface.co/spaces/Azwaw/Text_Translation_Multi-languages
"""
gr.Interface(translation,
inputs,
outputs,
title=title,
description=description,
submit_btn="Traducir"
).launch()
|