Azzan Dwi Riski commited on
Commit
2af5ea1
·
1 Parent(s): 2e4a786

fix tokenizer issues

Browse files
Files changed (2) hide show
  1. app.py +12 -12
  2. screenshots/.gitkeep +0 -0
app.py CHANGED
@@ -26,18 +26,18 @@ print(f"Using device: {device}")
26
 
27
  # Load tokenizer with proper error handling
28
  try:
29
- # Try to load from local tokenizer directory
30
- tokenizer_path = '/app/tokenizers/indobert-base-p1'
31
- if os.path.exists(tokenizer_path):
32
- print(f"Loading tokenizer from local path: {tokenizer_path}")
33
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
34
- else:
35
- # If local not available, try direct download with cache
36
- print("Local tokenizer not found, downloading from Hugging Face...")
37
- # tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1',
38
- # use_fast=True,
39
- # cache_dir='/app/tokenizers')
40
- tokenizer = BertTokenizerFast.from_pretrained("indobenchmark/indobert-base-p1")
41
  except Exception as e:
42
  print(f"Error loading tokenizer: {e}")
43
  # Fallback to default BERT tokenizer if needed
 
26
 
27
  # Load tokenizer with proper error handling
28
  try:
29
+ # # Try to load from local tokenizer directory
30
+ # tokenizer_path = '/app/tokenizers/indobert-base-p1'
31
+ # if os.path.exists(tokenizer_path):
32
+ # print(f"Loading tokenizer from local path: {tokenizer_path}")
33
+ # tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
34
+ # else:
35
+ # # If local not available, try direct download with cache
36
+ # print("Local tokenizer not found, downloading from Hugging Face...")
37
+ # # tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1',
38
+ # # use_fast=True,
39
+ # # cache_dir='/app/tokenizers')
40
+ tokenizer = BertTokenizerFast.from_pretrained("indobenchmark/indobert-base-p1")
41
  except Exception as e:
42
  print(f"Error loading tokenizer: {e}")
43
  # Fallback to default BERT tokenizer if needed
screenshots/.gitkeep ADDED
File without changes