Spaces:
Running
Running
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
import os | |
# Disable Streamlit usage stats to avoid permission issues | |
os.environ["STREAMLIT_BROWSER_GATHER_USAGE_STATS"] = "false" | |
# Use /tmp which is usually writable in containers | |
os.environ["HF_HOME"] = "/tmp/huggingface" | |
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface" | |
def load_model(): | |
# Ensure the cache directory exists | |
cache_dir = "/tmp/model_cache" | |
os.makedirs(cache_dir, exist_ok=True) | |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct", cache_dir=cache_dir) | |
model = AutoModelForCausalLM.from_pretrained("tianzhechu/BookQA-7B-Instruct", cache_dir=cache_dir) | |
return pipeline("text-generation", model=model, tokenizer=tokenizer) | |
st.set_page_config(page_title="LLM Demo", layout="centered") | |
st.title("π FLAN-T5 Small - HuggingFace Demo") | |
pipe = load_model() | |
user_input = st.text_area("Enter your instruction or question:", "") | |
if st.button("Generate Response"): | |
if user_input.strip() == "": | |
st.warning("Please enter some text.") | |
else: | |
with st.spinner("Generating..."): | |
output = pipe(user_input, max_new_tokens=100)[0]["generated_text"] | |
st.success("### Response:") | |
st.write(output) |