Update goai_helpers/goai_traduction.py
Browse files
goai_helpers/goai_traduction.py
CHANGED
@@ -3,6 +3,7 @@ import spaces
|
|
3 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
4 |
from peft import PeftModel, PeftConfig
|
5 |
import os
|
|
|
6 |
from huggingface_hub import login
|
7 |
|
8 |
max_length = 512
|
@@ -37,13 +38,16 @@ def goai_traduction(text, src_lang, tgt_lang):
|
|
37 |
outputs = model.generate(
|
38 |
**inputs,
|
39 |
forced_bos_token_id=tgt_lang_id,
|
40 |
-
max_new_tokens=1024
|
|
|
|
|
41 |
)
|
42 |
|
43 |
# Décodage de la sortie
|
44 |
translation = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
45 |
print("ici translation")
|
46 |
print(translation)
|
|
|
47 |
return translation
|
48 |
|
49 |
|
|
|
3 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
4 |
from peft import PeftModel, PeftConfig
|
5 |
import os
|
6 |
+
import unicodedata
|
7 |
from huggingface_hub import login
|
8 |
|
9 |
max_length = 512
|
|
|
38 |
outputs = model.generate(
|
39 |
**inputs,
|
40 |
forced_bos_token_id=tgt_lang_id,
|
41 |
+
max_new_tokens=1024,
|
42 |
+
bad_words_ids=None, # Évite de bloquer des tokens spécifiques
|
43 |
+
no_repeat_ngram_size=0 # Désactive la pénalité pour les répétitions
|
44 |
)
|
45 |
|
46 |
# Décodage de la sortie
|
47 |
translation = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
48 |
print("ici translation")
|
49 |
print(translation)
|
50 |
+
translation = unicodedata.normalize('NFC', translation)
|
51 |
return translation
|
52 |
|
53 |
|