import streamlit as st from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Must be the first Streamlit command st.set_page_config(page_title="Khmer Text Summarization", page_icon="πŸ“", layout="wide") import torch MODEL_ID = "songhieng/khmer-mt5-summarization-duplicated" @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID) return tokenizer, model tokenizer, model = load_model() st.title("πŸ“ Khmer Text Summarization") st.markdown("Input Khmer text and get a concise summary powered by your fine-tuned mT5 model.") # Sidebar controls st.sidebar.header("Settings") max_length = st.sidebar.slider("Max summary length", 50, 300, 150, step=10) min_length = st.sidebar.slider("Min summary length", 10, 100, 30, step=5) num_beams = st.sidebar.slider("Number of beams", 1, 10, 4) # Input text text = st.text_area("✏️ Paste Khmer text below:", height=300, placeholder="αžŸαžΌαž˜αžœαžΆαž™αž’αžαŸ’αžαž”αž‘αžαŸ’αž˜αŸ‚αžšαž“αŸ…αž‘αžΈαž“αŸαŸ‡β€¦") if st.button("πŸ” Summarize"): if not text.strip(): st.warning("⚠️ Please enter some text.") else: with st.spinner("Summarizing..."): inputs = tokenizer(text, return_tensors="pt", truncation=True) summary_ids = model.generate( **inputs, max_length=max_length, min_length=min_length, num_beams=num_beams, length_penalty=2.0, early_stopping=True ) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) st.subheader("πŸ“„ Summary") st.success(summary)