nadhiffh commited on
Commit
9fc7e76
·
1 Parent(s): 6627e63

Fix tokenizer and import issues for T5 Indonesian model

Browse files
Files changed (2) hide show
  1. app.py +2 -3
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,13 +1,12 @@
1
  import streamlit as st
2
  from newspaper import Article
3
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
4
 
5
  # Load model from Hugging Face
6
  @st.cache_resource
7
  def load_summarizer():
8
  model_name = "cahya/t5-base-indonesian-summarization-cased"
9
-
10
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
11
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
12
 
13
  return pipeline("summarization", model=model, tokenizer=tokenizer)
 
1
  import streamlit as st
2
  from newspaper import Article
3
+ from transformers import pipeline, T5Tokenizer, AutoModelForSeq2SeqLM
4
 
5
  # Load model from Hugging Face
6
  @st.cache_resource
7
  def load_summarizer():
8
  model_name = "cahya/t5-base-indonesian-summarization-cased"
9
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
 
10
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
11
 
12
  return pipeline("summarization", model=model, tokenizer=tokenizer)
requirements.txt CHANGED
@@ -1,5 +1,7 @@
1
  streamlit
2
  newspaper3k
3
- transformers
4
  torch
 
 
5
  lxml[html_clean]
 
1
  streamlit
2
  newspaper3k
3
+ transformers==4.36.2
4
  torch
5
+ tiktoken
6
+ sentencepiece
7
  lxml[html_clean]