Update tokenize_and_upload_mistral.py
Browse files
tokenize_and_upload_mistral.py
CHANGED
@@ -12,11 +12,11 @@ from fastapi import FastAPI
|
|
12 |
from fastapi.responses import JSONResponse
|
13 |
|
14 |
# === Sabitler ===
|
15 |
-
MODEL_NAME = "
|
16 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
17 |
-
SOURCE_DATASET_ID = "UcsTurkey/turkish-
|
18 |
-
TRAIN_TARGET_DATASET_ID = "UcsTurkey/turkish-
|
19 |
-
RAG_TARGET_DATASET_ID = "UcsTurkey/turkish-
|
20 |
BUFFER_SIZE = 5
|
21 |
START_CHUNK_NUMBER = 0
|
22 |
PROCESS_CHUNK_COUNT = 776
|
|
|
12 |
from fastapi.responses import JSONResponse
|
13 |
|
14 |
# === Sabitler ===
|
15 |
+
MODEL_NAME = "TURKCELL/Turkcell-LLM-7b-v1"
|
16 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
17 |
+
SOURCE_DATASET_ID = "UcsTurkey/turkish-train-chunks"
|
18 |
+
TRAIN_TARGET_DATASET_ID = "UcsTurkey/turkish-train-tokenized"
|
19 |
+
RAG_TARGET_DATASET_ID = "UcsTurkey/turkish-train-rag"
|
20 |
BUFFER_SIZE = 5
|
21 |
START_CHUNK_NUMBER = 0
|
22 |
PROCESS_CHUNK_COUNT = 776
|