szeandlinkProject_Testing

Sleeping

App Files Files Community

Szeyu commited on Apr 30

Commit

ed3e053

verified ·

1 Parent(s): 03cd04b

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -44

app.py CHANGED Viewed

@@ -2,11 +2,15 @@ import streamlit as st
 from transformers import pipeline
 from PIL import Image
 import io, textwrap, numpy as np, soundfile as sf
 # ------------------ Streamlit Page Configuration ------------------
 st.set_page_config(
-    page_title="Picture to Story Magic",  # App title on browser tab
-    page_icon="🦄",                       # Fun unicorn icon
     layout="centered"
 )
@@ -15,7 +19,7 @@ st.markdown(
     """
     <style>
     body {
-        background-color: #FDEBD0;  /* A soft pastel color */
     }
     </style>
     """,
@@ -37,11 +41,6 @@ st.markdown(
 def load_models():
     """
     Lazy-load the required pipelines and store them in session state.
-    Pipelines:
-      1. Captioner: Generates descriptive text from an image using a lighter model.
-      2. Storyer: Generates a humorous children's story using aspis/gpt2-genre-story-generation.
-      3. TTS: Converts text into audio.
     """
     if "captioner" not in st.session_state:
         st.session_state.captioner = pipeline(
@@ -49,10 +48,17 @@ def load_models():
             model="Salesforce/blip-image-captioning-large"
         )
     if "storyer" not in st.session_state:
-        st.session_state.storyer = pipeline(
-            "text-generation",
-            model="aspis/gpt2-genre-story-generation"
-        )
     if "tts" not in st.session_state:
         st.session_state.tts = pipeline(
             "text-to-speech",
@@ -75,46 +81,54 @@ def get_caption(image_bytes):
 def get_story(caption):
     """
     Generates a humorous and engaging children's story based on the caption.
-    Uses a prompt to instruct the model and limits token generation.
     """
     prompt = (
-        f"Write a funny, warm, and imaginative children's story for ages 3-10, 50-100 words, "
-        f"{caption}\nStory: in third-person narrative, as if the author is playfully describing the scene in the image."
-    )
-    result = st.session_state.storyer(
-        prompt,
-        max_new_tokens=120,    # Increased from 80 to 120 for more continuation space
-        do_sample=True,
-        temperature=0.7,
-        top_p=0.9,
-        return_full_text=False
     )
-    # Log the raw result for debugging (viewable in server logs)
-    print("Story generation raw result:", result)
-    raw_story = result[0].get("generated_text", "").strip()
-    # If the generated text starts with the prompt, remove it only if there is substantial extra content.
-    if raw_story.startswith(prompt):
-        # Compute the extra part after the prompt.
-        extra_text = raw_story[len(prompt):].strip()
-        # Only use the extra text if it is longer than a threshold (e.g. 20 characters).
-        if len(extra_text) > 20:
-            raw_story = extra_text
-        else:
-            # If not, use the full raw_story instead.
-            raw_story = raw_story
-    words = raw_story.split()
-    story = " ".join(words[:100])
-    return story
 @st.cache_data(show_spinner=False)
 def get_audio(story):
     """
     Converts the generated story text into audio.
     Splits the text into 300-character chunks to reduce repeated TTS calls.
-    Checks each chunk; if no valid audio is produced, creates 1 second of silence.
     """
     chunks = textwrap.wrap(story, width=300)
     audio_chunks = []
@@ -173,4 +187,4 @@ if uploaded_file is not None:
             )
     except Exception as e:
         st.error("Oops! Something went wrong. Please try a different picture or check the file format!")
-        st.error(f"Error details: {e}")

 from transformers import pipeline
 from PIL import Image
 import io, textwrap, numpy as np, soundfile as sf
+import logging
+# Set up logging for debugging
+logging.basicConfig(level=logging.INFO)
 # ------------------ Streamlit Page Configuration ------------------
 st.set_page_config(
+    page_title="Picture to Story Magic",
+    page_icon="🦄",
     layout="centered"
 )
     """
     <style>
     body {
+        background-color: #FDEBD0;
     }
     </style>
     """,
 def load_models():
     """
     Lazy-load the required pipelines and store them in session state.
     """
     if "captioner" not in st.session_state:
         st.session_state.captioner = pipeline(
             model="Salesforce/blip-image-captioning-large"
         )
     if "storyer" not in st.session_state:
+        try:
+            st.session_state.storyer = pipeline(
+                "text-generation",
+                model="aspis/gpt2-genre-story-generation"
+            )
+        except Exception as e:
+            logging.warning(f"Failed to load aspis/gpt2-genre-story-generation: {e}. Falling back to gpt2.")
+            st.session_state.storyer = pipeline(
+                "text-generation",
+                model="gpt2"
+            )
     if "tts" not in st.session_state:
         st.session_state.tts = pipeline(
             "text-to-speech",
 def get_story(caption):
     """
     Generates a humorous and engaging children's story based on the caption.
+    Uses a simplified prompt and robust output parsing.
     """
     prompt = (
+        f"Create a funny, warm children's story (50-100 words) for ages 3-10 based on: {caption}. "
+        f"Use third-person narrative, as if playfully describing the scene."
     )
+    try:
+        result = st.session_state.storyer(
+            prompt,
+            max_new_tokens=150,    # Increased to allow more room for story
+            do_sample=True,
+            temperature=0.8,       # Slightly higher for creativity
+            top_p=0.9,
+            return_full_text=False
+        )
+        logging.info(f"Story generation raw result: {result}")
+        # Extract generated text
+        raw_story = result[0].get("generated_text", "").strip()
+        # If no meaningful output, generate a fallback story
+        if not raw_story or len(raw_story.split()) < 10:
+            logging.warning("Generated story too short or empty. Using fallback.")
+            raw_story = (
+                f"Once upon a time, in a land of {caption}, a silly squirrel named Sammy "
+                f"found a shiny treasure! He danced with joy, but oh no! It was a magic acorn! "
+                f"It grew into a giant tree, and Sammy climbed to the top, giggling all the way. "
+                f"The tree sang funny songs, and all the animals joined in for a big party!"
+            )
+        # Truncate to 100 words
+        words = raw_story.split()
+        story = " ".join(words[:100])
+        return story
+    except Exception as e:
+        logging.error(f"Story generation failed: {e}")
+        # Fallback story in case of errors
+        return (
+            f"Once upon a time, in a land of {caption}, a silly squirrel named Sammy "
+            f"found a shiny treasure! He danced with joy, but oh no! It was a magic acorn! "
+            f"It grew into a giant tree, and Sammy climbed to the top, giggling all the way."
+        )
 @st.cache_data(show_spinner=False)
 def get_audio(story):
     """
     Converts the generated story text into audio.
     Splits the text into 300-character chunks to reduce repeated TTS calls.
     """
     chunks = textwrap.wrap(story, width=300)
     audio_chunks = []
             )
     except Exception as e:
         st.error("Oops! Something went wrong. Please try a different picture or check the file format!")
+        st.error(f"Error details: {e}")