Update goai_helpers/goai_traduction.py
Browse files
goai_helpers/goai_traduction.py
CHANGED
@@ -2,6 +2,7 @@ import torch
|
|
2 |
import spaces
|
3 |
import re
|
4 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
|
|
5 |
from huggingface_hub import login
|
6 |
import os
|
7 |
|
@@ -55,6 +56,13 @@ def goai_traduction(text, src_lang, tgt_lang, max_chunk_length=80):
|
|
55 |
|
56 |
def translate_chunk(text, src_lang, tgt_lang):
|
57 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
if src_lang == "mos_Latn" and tgt_lang == "fra_Latn":
|
60 |
model_id = "ArissBandoss/nllb-200-3.3B-mos2fr"
|
|
|
2 |
import spaces
|
3 |
import re
|
4 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
5 |
+
from goai_helpers.utils import MooreConverter
|
6 |
from huggingface_hub import login
|
7 |
import os
|
8 |
|
|
|
56 |
|
57 |
def translate_chunk(text, src_lang, tgt_lang):
|
58 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
59 |
+
|
60 |
+
number_converter = MooreConverter()
|
61 |
+
numbers = re.findall(r'\b\d+\b', text)
|
62 |
+
for number in numbers:
|
63 |
+
moore_number = number_converter.number_to_moore(int(number))
|
64 |
+
if moore_number: # Only replace if conversion succeeded
|
65 |
+
text = text.replace(number, moore_number)
|
66 |
|
67 |
if src_lang == "mos_Latn" and tgt_lang == "fra_Latn":
|
68 |
model_id = "ArissBandoss/nllb-200-3.3B-mos2fr"
|