rishiraj commited on
Commit
483ab43
·
1 Parent(s): 9bf8c04

Update alignment/data.py

Browse files
Files changed (1) hide show
  1. alignment/data.py +9 -0
alignment/data.py CHANGED
@@ -18,6 +18,7 @@ import re
18
  from typing import List, Literal, Optional
19
 
20
  from datasets import DatasetDict, concatenate_datasets, load_dataset
 
21
 
22
  from .configs import DataArguments
23
 
@@ -34,6 +35,14 @@ def apply_chat_template(
34
 
35
  if task in ["sft", "generation"]:
36
  messages = example["messages"]
 
 
 
 
 
 
 
 
37
  # We add an empty system message if there is none
38
  if messages[0]["role"] != "system":
39
  messages.insert(0, {"role": "system", "content": ""})
 
18
  from typing import List, Literal, Optional
19
 
20
  from datasets import DatasetDict, concatenate_datasets, load_dataset
21
+ from googletrans import Translator
22
 
23
  from .configs import DataArguments
24
 
 
35
 
36
  if task in ["sft", "generation"]:
37
  messages = example["messages"]
38
+ print("Message Length: ", len(messages))
39
+ for i in range(len(messages)):
40
+ try:
41
+ translator = Translator()
42
+ messages[i]["content"] = translator.translate(messages[i]["content"], dest='hi').text
43
+ except Exception as e:
44
+ print(e, messages[i]["content"])
45
+ messages[i]["content"] = ""
46
  # We add an empty system message if there is none
47
  if messages[0]["role"] != "system":
48
  messages.insert(0, {"role": "system", "content": ""})