Azzan Dwi Riski
commited on
Commit
·
2af5ea1
1
Parent(s):
2e4a786
fix tokenizer issues
Browse files- app.py +12 -12
- screenshots/.gitkeep +0 -0
app.py
CHANGED
@@ -26,18 +26,18 @@ print(f"Using device: {device}")
|
|
26 |
|
27 |
# Load tokenizer with proper error handling
|
28 |
try:
|
29 |
-
# Try to load from local tokenizer directory
|
30 |
-
tokenizer_path = '/app/tokenizers/indobert-base-p1'
|
31 |
-
if os.path.exists(tokenizer_path):
|
32 |
-
|
33 |
-
|
34 |
-
else:
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
except Exception as e:
|
42 |
print(f"Error loading tokenizer: {e}")
|
43 |
# Fallback to default BERT tokenizer if needed
|
|
|
26 |
|
27 |
# Load tokenizer with proper error handling
|
28 |
try:
|
29 |
+
# # Try to load from local tokenizer directory
|
30 |
+
# tokenizer_path = '/app/tokenizers/indobert-base-p1'
|
31 |
+
# if os.path.exists(tokenizer_path):
|
32 |
+
# print(f"Loading tokenizer from local path: {tokenizer_path}")
|
33 |
+
# tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
|
34 |
+
# else:
|
35 |
+
# # If local not available, try direct download with cache
|
36 |
+
# print("Local tokenizer not found, downloading from Hugging Face...")
|
37 |
+
# # tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1',
|
38 |
+
# # use_fast=True,
|
39 |
+
# # cache_dir='/app/tokenizers')
|
40 |
+
tokenizer = BertTokenizerFast.from_pretrained("indobenchmark/indobert-base-p1")
|
41 |
except Exception as e:
|
42 |
print(f"Error loading tokenizer: {e}")
|
43 |
# Fallback to default BERT tokenizer if needed
|
screenshots/.gitkeep
ADDED
File without changes
|