import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import os # Disable Streamlit usage stats to avoid permission issues os.environ["STREAMLIT_BROWSER_GATHER_USAGE_STATS"] = "false" # Use /tmp which is usually writable in containers os.environ["HF_HOME"] = "/tmp/huggingface" os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface" @st.cache_resource def load_model(): # Ensure the cache directory exists cache_dir = "/tmp/model_cache" os.makedirs(cache_dir, exist_ok=True) tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct", cache_dir=cache_dir) model = AutoModelForCausalLM.from_pretrained("tianzhechu/BookQA-7B-Instruct", cache_dir=cache_dir) return pipeline("text-generation", model=model, tokenizer=tokenizer) st.set_page_config(page_title="LLM Demo", layout="centered") st.title("🚀 FLAN-T5 Small - HuggingFace Demo") pipe = load_model() user_input = st.text_area("Enter your instruction or question:", "") if st.button("Generate Response"): if user_input.strip() == "": st.warning("Please enter some text.") else: with st.spinner("Generating..."): output = pipe(user_input, max_new_tokens=100)[0]["generated_text"] st.success("### Response:") st.write(output)