Fix tokenizer and import issues for T5 Indonesian model
Browse files- app.py +2 -3
- requirements.txt +3 -1
app.py
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
import streamlit as st
|
2 |
from newspaper import Article
|
3 |
-
from transformers import pipeline,
|
4 |
|
5 |
# Load model from Hugging Face
|
6 |
@st.cache_resource
|
7 |
def load_summarizer():
|
8 |
model_name = "cahya/t5-base-indonesian-summarization-cased"
|
9 |
-
|
10 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
11 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
12 |
|
13 |
return pipeline("summarization", model=model, tokenizer=tokenizer)
|
|
|
1 |
import streamlit as st
|
2 |
from newspaper import Article
|
3 |
+
from transformers import pipeline, T5Tokenizer, AutoModelForSeq2SeqLM
|
4 |
|
5 |
# Load model from Hugging Face
|
6 |
@st.cache_resource
|
7 |
def load_summarizer():
|
8 |
model_name = "cahya/t5-base-indonesian-summarization-cased"
|
9 |
+
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
|
|
10 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
11 |
|
12 |
return pipeline("summarization", model=model, tokenizer=tokenizer)
|
requirements.txt
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
streamlit
|
2 |
newspaper3k
|
3 |
-
transformers
|
4 |
torch
|
|
|
|
|
5 |
lxml[html_clean]
|
|
|
1 |
streamlit
|
2 |
newspaper3k
|
3 |
+
transformers==4.36.2
|
4 |
torch
|
5 |
+
tiktoken
|
6 |
+
sentencepiece
|
7 |
lxml[html_clean]
|