szeandlinkProject_Testing

Sleeping

App Files Files Community

Szeyu commited on Apr 30

Commit

64fd107

verified ·

1 Parent(s): 82a099b

Create app.py

Browse files

Files changed (1) hide show

app.py +146 -0

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import streamlit as st
+from transformers import pipeline
+from PIL import Image
+import io, textwrap, numpy as np, soundfile as sf
+# ------------------ Streamlit Page Configuration ------------------
+st.set_page_config(
+    page_title="Picture to Story Magic",  # App title on browser tab
+    page_icon="🦄",                       # Fun unicorn icon
+    layout="centered"
+)
+# ------------------ Custom CSS for a Colorful Background ------------------
+st.markdown(
+    """
+    <style>
+    body {
+        background-color: #FDEBD0;  /* A soft pastel color */
+    }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+# ------------------ Playful Header for Young Users ------------------
+st.markdown(
+    """
+    <h1 style='text-align: center; color: #ff66cc;'>Picture to Story Magic!</h1>
+    <p style='text-align: center; font-size: 24px;'>
+      Hi little artist! Upload your picture and let us create a fun story just for you! 🎉
+    </p>
+    """,
+    unsafe_allow_html=True
+)
+# ------------------ Lazy Model Loading ------------------
+def load_models():
+    """
+    Lazy-load the required pipelines and store them in session state.
+    Pipelines:
+      1. Captioner: Generates descriptive text from an image using a lighter model.
+      2. Storyer: Generates a humorous children's story using aspis/gpt2-genre-story-generation.
+      3. TTS: Converts text into audio.
+    """
+    if "captioner" not in st.session_state:
+        # Use the "base" version for faster/cost-effective captioning.
+        st.session_state.captioner = pipeline(
+            "image-to-text",
+            model="Salesforce/blip-image-captioning-base"
+        )
+    if "storyer" not in st.session_state:
+        st.session_state.storyer = pipeline(
+            "text-generation",
+            model="aspis/gpt2-genre-story-generation"
+        )
+    if "tts" not in st.session_state:
+        st.session_state.tts = pipeline(
+            "text-to-speech",
+            model="facebook/mms-tts-eng"
+        )
+# ------------------ Caching Functions ------------------
+@st.cache_data(show_spinner=False)
+def get_caption(image_bytes):
+    """
+    Convert the image bytes into a smaller image to speed up captioning,
+    then return the generated caption.
+    """
+    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+    # Resize the image (preserving aspect ratio) to only 256x256 for faster processing.
+    image.thumbnail((256, 256))
+    caption = st.session_state.captioner(image)[0]["generated_text"]
+    return caption
+@st.cache_data(show_spinner=False)
+def get_story(caption):
+    """
+    Generate a humorous and engaging children's story using the caption.
+    The prompt instructs the model to produce a playful story (50-100 words).
+    We lower max_new_tokens to 80 so that it generates its text faster.
+    """
+    prompt = (
+        f"Write a funny, warm, and imaginative children's story for ages 3-10, 50-100 words, "
+        f"in third-person narrative, as if the author is playfully describing the scene in the image: {caption}. "
+        "Explicitly mention the exact venue or location (such as a park, school, or home), describe specific characters "
+        "(for example, a little girl named Lily or a boy named Jack), and detail the humorous actions they perform. "
+        "Ensure the story is playful, engaging, and ends with a complete sentence."
+    )
+    raw_story = st.session_state.storyer(
+        prompt,
+        max_new_tokens=80,   # Reduced token generation for faster response
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9,
+        return_full_text=False
+    )[0]["generated_text"].strip()
+    words = raw_story.split()
+    return " ".join(words[:100])
+@st.cache_data(show_spinner=False)
+def get_audio(story):
+    """
+    Convert the generated story text into audio.
+    The text is split into 300-character chunks to reduce repeated TTS calls,
+    the audio chunks are concatenated, and then stored in an in-memory WAV buffer.
+    """
+    chunks = textwrap.wrap(story, width=300)
+    audio_chunks = [st.session_state.tts(chunk)["audio"].squeeze() for chunk in chunks]
+    audio = np.concatenate(audio_chunks)
+    buffer = io.BytesIO()
+    sf.write(buffer, audio, st.session_state.tts.model.config.sampling_rate, format="WAV")
+    buffer.seek(0)
+    return buffer
+# ------------------ Main App Logic ------------------
+uploaded_file = st.file_uploader("Choose a Picture...", type=["jpg", "jpeg", "png"])
+if uploaded_file is not None:
+    try:
+        load_models()  # Ensure models are loaded once
+        image_bytes = uploaded_file.getvalue()
+        # Display the user-uploaded image
+        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        st.image(image, caption="Your Amazing Picture!", use_column_width=True)
+        st.markdown("<h3 style='text-align: center;'>Ready for your story?</h3>", unsafe_allow_html=True)
+        if st.button("Story, Please!"):
+            with st.spinner("Generating caption..."):
+                caption = get_caption(image_bytes)
+            st.markdown("<h3 style='text-align: center;'>Caption:</h3>", unsafe_allow_html=True)
+            st.write(caption)
+            with st.spinner("Generating story..."):
+                story = get_story(caption)
+            st.markdown("<h3 style='text-align: center;'>Your Story:</h3>", unsafe_allow_html=True)
+            st.write(story)
+            with st.spinner("Generating audio..."):
+                audio_buffer = get_audio(story)
+            st.audio(audio_buffer, format="audio/wav", start_time=0)
+            st.markdown(
+                "<p style='text-align: center; font-weight: bold;'>Enjoy your magical story! 🎶</p>",
+                unsafe_allow_html=True
+            )
+    except Exception as e:
+        st.error("Oops! Something went wrong. Please try a different picture or check the file format!")
+        st.error(f"Error details: {e}")