ArissBandoss commited on
Commit
3cd8516
·
verified ·
1 Parent(s): 6f3d2cb

Update goai_helpers/goai_traduction.py

Browse files
Files changed (1) hide show
  1. goai_helpers/goai_traduction.py +12 -1
goai_helpers/goai_traduction.py CHANGED
@@ -2,7 +2,7 @@ import torch
2
  import spaces
3
  import re
4
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
5
- from goai_helpers.utils import MooreConverter
6
  from huggingface_hub import login
7
  import os
8
 
@@ -62,6 +62,8 @@ def translate_chunk(text, src_lang, tgt_lang):
62
  else:
63
  model_id = "ArissBandoss/nllb-200-3.3B-fr2mos"
64
  #model_id = "ArissBandoss/nllb-200-3.3B-mos-fr-bidirectional-peft"
 
 
65
 
66
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=auth_token)
67
  model = AutoModelForSeq2SeqLM.from_pretrained(model_id, token=auth_token).to(device)
@@ -86,6 +88,15 @@ def translate_chunk(text, src_lang, tgt_lang):
86
  # Décodage
87
  translation = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
88
 
 
 
 
 
 
 
 
 
 
89
 
90
  return translation
91
 
 
2
  import spaces
3
  import re
4
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
5
+ from goai_helpers.utils import MooreConverter, mark_numbers, unmark_numbers
6
  from huggingface_hub import login
7
  import os
8
 
 
62
  else:
63
  model_id = "ArissBandoss/nllb-200-3.3B-fr2mos"
64
  #model_id = "ArissBandoss/nllb-200-3.3B-mos-fr-bidirectional-peft"
65
+
66
+ text = mark_numbers(text)
67
 
68
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=auth_token)
69
  model = AutoModelForSeq2SeqLM.from_pretrained(model_id, token=auth_token).to(device)
 
88
  # Décodage
89
  translation = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
90
 
91
+ translation = unmark_numbers(translation)
92
+
93
+ number_converter = MooreConverter()
94
+ numbers = re.findall(r'\b\d+\b', translation)
95
+ for number in numbers:
96
+ moore_number = number_converter.number_to_moore(int(number))
97
+ if moore_number: # Only replace if conversion succeeded
98
+ translation = translation.replace(number, moore_number)
99
+
100
 
101
  return translation
102