Spaces:
Sleeping
Sleeping
Update alignment/data.py
Browse files- alignment/data.py +9 -0
alignment/data.py
CHANGED
@@ -18,6 +18,7 @@ import re
|
|
18 |
from typing import List, Literal, Optional
|
19 |
|
20 |
from datasets import DatasetDict, concatenate_datasets, load_dataset
|
|
|
21 |
|
22 |
from .configs import DataArguments
|
23 |
|
@@ -34,6 +35,14 @@ def apply_chat_template(
|
|
34 |
|
35 |
if task in ["sft", "generation"]:
|
36 |
messages = example["messages"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
# We add an empty system message if there is none
|
38 |
if messages[0]["role"] != "system":
|
39 |
messages.insert(0, {"role": "system", "content": ""})
|
|
|
18 |
from typing import List, Literal, Optional
|
19 |
|
20 |
from datasets import DatasetDict, concatenate_datasets, load_dataset
|
21 |
+
from googletrans import Translator
|
22 |
|
23 |
from .configs import DataArguments
|
24 |
|
|
|
35 |
|
36 |
if task in ["sft", "generation"]:
|
37 |
messages = example["messages"]
|
38 |
+
print("Message Length: ", len(messages))
|
39 |
+
for i in range(len(messages)):
|
40 |
+
try:
|
41 |
+
translator = Translator()
|
42 |
+
messages[i]["content"] = translator.translate(messages[i]["content"], dest='hi').text
|
43 |
+
except Exception as e:
|
44 |
+
print(e, messages[i]["content"])
|
45 |
+
messages[i]["content"] = ""
|
46 |
# We add an empty system message if there is none
|
47 |
if messages[0]["role"] != "system":
|
48 |
messages.insert(0, {"role": "system", "content": ""})
|