ciyidogan commited on
Commit
df4528e
·
verified ·
1 Parent(s): e93d840

Update tokenize_and_upload_mistral.py

Browse files
Files changed (1) hide show
  1. tokenize_and_upload_mistral.py +4 -4
tokenize_and_upload_mistral.py CHANGED
@@ -12,11 +12,11 @@ from fastapi import FastAPI
12
  from fastapi.responses import JSONResponse
13
 
14
  # === Sabitler ===
15
- MODEL_NAME = "malhajar/Mistral-7B-Instruct-v0.2-turkish"
16
  HF_TOKEN = os.getenv("HF_TOKEN")
17
- SOURCE_DATASET_ID = "UcsTurkey/turkish-general-culture-chunks"
18
- TRAIN_TARGET_DATASET_ID = "UcsTurkey/turkish-general-culture-tokenized"
19
- RAG_TARGET_DATASET_ID = "UcsTurkey/turkish-general-culture-rag"
20
  BUFFER_SIZE = 5
21
  START_CHUNK_NUMBER = 0
22
  PROCESS_CHUNK_COUNT = 776
 
12
  from fastapi.responses import JSONResponse
13
 
14
  # === Sabitler ===
15
+ MODEL_NAME = "TURKCELL/Turkcell-LLM-7b-v1"
16
  HF_TOKEN = os.getenv("HF_TOKEN")
17
+ SOURCE_DATASET_ID = "UcsTurkey/turkish-train-chunks"
18
+ TRAIN_TARGET_DATASET_ID = "UcsTurkey/turkish-train-tokenized"
19
+ RAG_TARGET_DATASET_ID = "UcsTurkey/turkish-train-rag"
20
  BUFFER_SIZE = 5
21
  START_CHUNK_NUMBER = 0
22
  PROCESS_CHUNK_COUNT = 776