Moore-Language-Space-ZeroGPU

Sleeping

ArissBandoss commited on May 18

Commit

b8808c9

verified ·

1 Parent(s): 3e462e2

Update goai_helpers/goai_traduction.py

Files changed (1) hide show

goai_helpers/goai_traduction.py CHANGED Viewed

@@ -34,12 +34,12 @@ def goai_traduction(text, src_lang, tgt_lang):
         # Instead of using the pipeline, do direct generation
         tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
         def translate(text, src_lang, tgt_lang, max_length=512):
-            inputs = tokenizer(text, return_tensors="pt", max_length=max_length, truncation=True)
             inputs = {k: v.to(device) for k, v in inputs.items()}
             generation_kwargs = {}
             if src_lang and tgt_lang:
-                generation_kwargs["forced_bos_token_id"] = tokenizer.lang_code_to_id[tgt_lang]
             outputs = model.generate(**inputs, max_length=max_length, **generation_kwargs)
             return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

         # Instead of using the pipeline, do direct generation
         tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
         def translate(text, src_lang, tgt_lang, max_length=512):
+            inputs = tokenizer(text, return_tensors="pt", max_length=max_length)
             inputs = {k: v.to(device) for k, v in inputs.items()}
             generation_kwargs = {}
             if src_lang and tgt_lang:
+                generation_kwargs["forced_bos_token_id"] = tokenizer.convert_tokens_to_ids[tgt_lang]
             outputs = model.generate(**inputs, max_length=max_length, **generation_kwargs)
             return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]