Itsme5 commited on
Commit
2b354eb
·
verified ·
1 Parent(s): 2dabd17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -1,8 +1,11 @@
1
- from fastapi import FastAPI
2
 
3
- # Δημιουργία FastAPI εφαρμογής
4
- app = FastAPI()
5
 
6
- @app.get("/")
7
- async def root():
8
- return {"message": "Welcome to your basic FastAPI application!"}
 
 
 
 
 
1
+ from tokenizers import models, trainers, Tokenizer
2
 
3
+ tokenizer = Tokenizer(model=models.WordPiece(unk_token="[UNK]"))
 
4
 
5
+ special_tokens = ["[UNK]", "[PAD]", "[CLS]", "[SEP]", "[MASK]"]
6
+ trainer = trainers.WordPieceTrainer(vocab_size=25000, special_tokens=special_tokens)
7
+
8
+ tokenizer.train(["wikitext-2.txt"], trainer=trainer)
9
+
10
+ encoding = tokenizer.encode("Let's test this tokenizer...", "on a pair of sentences.")
11
+ print(encoding.ids)