Spaces:

BillyZ1129
/

Billy_Space

Sleeping

App Files Files Community

BillyZ1129 commited on Apr 30

Commit

c0da145

verified ·

1 Parent(s): 7d6fb10

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -162

app.py CHANGED Viewed

@@ -4,200 +4,193 @@ import io
 import torch
 from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
 from gtts import gTTS
-import tempfile
 import os
 import base64
-import numpy as np
-# Set page config
 st.set_page_config(
-    page_title="StoryTime: Kids' Storyteller",
     page_icon="📚",
     layout="centered"
 )
-# Load and apply CSS
-def load_css(file_name):
-    with open(file_name) as f:
-        st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
-try:
-    load_css("style.css")
-except:
-    st.warning("Style file not found. Using default styling.")
 # Function to load image captioning model
 @st.cache_resource
-def load_captioning_model():
-    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-    return processor, model
 # Function to load story generation model
 @st.cache_resource
-def load_story_generator():
-    return pipeline('text-generation', model='gpt2')
 # Function to generate caption from image
 def generate_caption(image, processor, model):
-    # 确保图像是RGB格式
-    if image.mode != 'RGB':
-        image = image.convert('RGB')
-    # 标准预处理：调整大小到BLIP模型期望的输入尺寸
-    image = image.resize((384, 384))
-    try:
-        # 使用处理器准备图像
-        inputs = processor(image, return_tensors="pt", padding=True)
-        # 生成caption
-        out = model.generate(**inputs, max_length=30)
-        caption = processor.decode(out[0], skip_special_tokens=True)
-        return caption
-    except Exception as e:
-        # 如果有错误，使用一个备用方法
-        st.warning(f"Caption generation error: {str(e)}. Using fallback method.")
-        # 转换图像为numpy数组
-        img_array = np.array(image)
-        # 手动准备图像为模型输入
-        pixel_values = processor.image_processor(images=img_array, return_tensors="pt").pixel_values
-        # 生成caption
-        generated_ids = model.generate(pixel_values=pixel_values, max_length=30)
-        caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        return caption
 # Function to generate story from caption
-def generate_story(caption, generator):
-    prompt = f"{caption} "
-    story = generator(
-        prompt,
-        max_length=150,
-        num_return_sequences=1,
-        temperature=0.8,
-        top_k=50
-    )[0]['generated_text']
-    # Clean up the story
-    story = story.replace('\n', ' ')
-    sentences = story.split('.')
-    if len(sentences) > 5:
-        story = '.'.join(sentences[:5]) + '.'
-    # Strictly control word count between 50-100 words
-    words = story.split()
-    word_count = len(words)
-    if word_count < 50:
-        # If story is too short, generate more content
-        additional_content = generator(
-            story + " Then, ",
-            max_length=100,
-            num_return_sequences=1,
-            temperature=0.8,
-            top_k=50
-        )[0]['generated_text']
-        # Add only what's needed to reach 50 words
-        additional_words = additional_content.split()[word_count:]
-        words_needed = 50 - word_count
-        story = ' '.join(words + additional_words[:words_needed])
-    if word_count > 100:
-        # If story is too long, truncate to exactly 100 words
-        story = ' '.join(words[:100])
-    # Ensure the story ends with a period
-    if not story.endswith('.'):
-        story += '.'
     return story
-# Function to convert text to speech
 def text_to_speech(text):
-    tts = gTTS(text=text, lang='en', slow=False)
-    fp = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
-    tts.save(fp.name)
-    return fp.name
-# Add background decorations
-def add_background_decorations():
-    st.markdown(
         """
-        <div style="position: fixed; top: 0; right: 0; z-index: -1; opacity: 0.3;">
-            <img src="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyMDAiIGhlaWdodD0iMjAwIiB2aWV3Qm94PSIwIDAgMjAwIDIwMCI+PHBhdGggZD0iTTEwMCAxOTBjNTEuNCAwIDkwLTQwLjYgOTAtOTBTMTUxLjQgMTAgMTAwIDEwIDEwIDUwLjYgMTAgMTAwczM4LjYgOTAgOTAgOTB6IiBmaWxsPSIjNzZiNWM1IiBvcGFjaXR5PSIwLjIiLz48cGF0aCBkPSJNMTgwIDEwMGMwIDQ0LjEtMzUuOSA4MC04MCA4MHMtODAtMzUuOS04MC04MCAzNS45LTgwIDgwLTgwIDgwIDM1LjkgODAgODB6IiBmaWxsPSIjM2Q4NWM2IiBvcGFjaXR5PSIwLjIiLz48L3N2Zz4=" width="200"/>
-        </div>
-        <div style="position: fixed; bottom: 0; left: 0; z-index: -1; opacity: 0.3;">
-            <img src="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyMDAiIGhlaWdodD0iMjAwIiB2aWV3Qm94PSIwIDAgMjAwIDIwMCI+PHBhdGggZD0iTTEwMCAxOTBjNTEuNCAwIDkwLTQwLjYgOTAtOTBTMTUxLjQgMTAgMTAwIDEwIDEwIDUwLjYgMTAgMTAwczM4LjYgOTAgOTAgOTB6IiBmaWxsPSIjNzZiNWM1IiBvcGFjaXR5PSIwLjIiLz48cGF0aCBkPSJNMTgwIDEwMGMwIDQ0LjEtMzUuOSA4MC04MCA4MHMtODAtMzUuOS04MC04MCAzNS45LTgwIDgwLTgwIDgwIDM1LjkgODAgODB6IiBmaWxsPSIjM2Q4NWM2IiBvcGFjaXR5PSIwLjIiLz48L3N2Zz4=" width="200"/>
-        </div>
-        """,
-        unsafe_allow_html=True
-    )
-# Main UI
-def main():
-    add_background_decorations()
-    st.title("📚 StoryTime: Kids' Storyteller")
-    st.markdown("### Upload a picture and listen to a magical story!")
-    # Load models
-    with st.spinner("Loading models... This might take a moment!"):
-        processor, caption_model = load_captioning_model()
-        story_generator = load_story_generator()
-    # Create columns for better layout
-    col1, col2 = st.columns([1, 1])
-    # Image upload
-    with col1:
-        uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
-    if uploaded_file is not None:
-        # Display the uploaded image
-        image_bytes = uploaded_file.getvalue()
-        original_image = Image.open(io.BytesIO(image_bytes))
-        # Display the image
         with col1:
-            st.image(original_image, caption='Your Magical Picture', use_column_width=True)
-        # Generate caption and story
-        with st.spinner("Looking at your picture and thinking of a story..."):
-            caption = generate_caption(original_image, processor, caption_model)
-            # 打印图片描述
-            st.info(f"Image caption: {caption}")
-            story = generate_story(caption, story_generator)
-        # Display the story and audio
-        with col2:
-            st.markdown("### Here's your story:")
-            st.write(story)
-            # Word count display
-            word_count = len(story.split())
-            st.caption(f"Story length: {word_count} words")
-            # Convert to speech and play
-            with st.spinner("Creating the storytelling voice..."):
-                audio_file = text_to_speech(story)
-            st.audio(audio_file, format='audio/mp3')
-            st.success("Story created! Click the play button to listen!")
-            # Add download button for audio
-            with open(audio_file, "rb") as f:
-                audio_bytes = f.read()
-            audio_b64 = base64.b64encode(audio_bytes).decode()
-            href = f'<a href="data:audio/mp3;base64,{audio_b64}" download="story.mp3">Download the story audio</a>'
-            st.markdown(href, unsafe_allow_html=True)
-            # Clean up the temp file
-            os.unlink(audio_file)
-if __name__ == "__main__":
-    main()

 import torch
 from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
 from gtts import gTTS
 import os
 import base64
+import time
+# Set page configuration
 st.set_page_config(
+    page_title="Storyteller for Kids",
     page_icon="📚",
     layout="centered"
 )
+# Custom CSS
+st.markdown("""
+<style>
+    .main {
+        background-color: #f5f7ff;
+    }
+    .stTitle {
+        color: #3366cc;
+        font-family: 'Comic Sans MS', cursive;
+    }
+    .stHeader {
+        font-family: 'Comic Sans MS', cursive;
+    }
+    .stImage {
+        border-radius: 15px;
+        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
+    }
+    .story-container {
+        background-color: #e6f2ff;
+        padding: 20px;
+        border-radius: 10px;
+        border: 2px dashed #3366cc;
+        font-size: 18px;
+        line-height: 1.6;
+    }
+</style>
+""", unsafe_allow_html=True)
+# Title and description
+st.title("🧸 Kid's Storyteller 🧸")
+st.markdown("### Upload an image and I'll tell you a magical story about it!")
 # Function to load image captioning model
 @st.cache_resource
+def load_caption_model():
+    try:
+        with st.spinner("Loading image captioning model... (This may take a minute)"):
+            processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+            model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+            return processor, model, None
+    except Exception as e:
+        return None, None, str(e)
 # Function to load story generation model
 @st.cache_resource
+def load_story_model():
+    try:
+        with st.spinner("Loading story generation model... (This may take a minute)"):
+            story_generator = pipeline("text-generation", model="gpt2")
+            return story_generator, None
+    except Exception as e:
+        return None, str(e)
 # Function to generate caption from image
 def generate_caption(image, processor, model):
+    inputs = processor(image, return_tensors="pt")
+    out = model.generate(**inputs, max_length=50)
+    caption = processor.decode(out[0], skip_special_tokens=True)
+    return caption
 # Function to generate story from caption
+def generate_story(caption, story_generator):
+    # Make the prompt child-friendly and whimsical
+    prompt = f"Once upon a time in a magical land, {caption}. The children were amazed when "
+    result = story_generator(prompt, max_length=150, num_return_sequences=1, temperature=0.8)
+    story = result[0]['generated_text']
+    # Make sure the story is between 50-100 words
+    story_words = story.split()
+    if len(story_words) > 100:
+        story = ' '.join(story_words[:100])
+        # Add a closing sentence
+        story += ". And they all lived happily ever after."
+    elif len(story_words) < 50:
+        # If too short, generate more
+        additional = story_generator(story, max_length=150, num_return_sequences=1)
+        story = additional[0]['generated_text']
+        story_words = story.split()
+        if len(story_words) > 100:
+            story = ' '.join(story_words[:100])
+        story += ". And they all lived happily ever after."
     return story
+# Function to convert text to speech and create audio player
 def text_to_speech(text):
+    try:
+        tts = gTTS(text=text, lang='en', slow=False)
+        audio_file = "story_audio.mp3"
+        tts.save(audio_file)
+        # Create audio player
+        with open(audio_file, "rb") as file:
+            audio_bytes = file.read()
+        audio_b64 = base64.b64encode(audio_bytes).decode()
+        audio_player = f"""
+            <audio controls autoplay>
+                <source src="data:audio/mp3;base64,{audio_b64}" type="audio/mp3">
+                Your browser does not support the audio element.
+            </audio>
         """
+        return audio_player, None
+    except Exception as e:
+        return None, str(e)
+# Main application flow
+try:
+    # Load models with status checks
+    with st.spinner("Loading AI models... This may take a moment the first time you run the app."):
+        caption_processor, caption_model, caption_error = load_caption_model()
+        story_model, story_error = load_story_model()
+        if caption_error:
+            st.error(f"Error loading caption model: {caption_error}")
+        if story_error:
+            st.error(f"Error loading story model: {story_error}")
+    # If models loaded successfully
+    if caption_processor and caption_model and story_model:
+        # Show example images for kids to understand
+        st.markdown("### 🌟 Examples of images you can upload:")
+        col1, col2, col3 = st.columns(3)
         with col1:
+            st.markdown("🐱 Pets")
+        with col2:
+            st.markdown("🏰 Places")
+        with col3:
+            st.markdown("🧩 Toys")
+        # File uploader
+        uploaded_file = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"])
+        if uploaded_file is not None:
+            # Display the uploaded image
+            image_bytes = uploaded_file.getvalue()
+            image = Image.open(io.BytesIO(image_bytes))
+            st.image(image, caption='Uploaded Image', use_column_width=True, output_format="JPEG")
+            with st.spinner('Creating your story... 📝'):
+                # Generate caption
+                caption = generate_caption(image, caption_processor, caption_model)
+                # Generate story
+                story = generate_story(caption, story_model)
+                # Display the story with some styling
+                st.markdown("## 📖 Your Magical Story")
+                st.markdown(f"<div class='story-container'>{story}</div>",
+                            unsafe_allow_html=True)
+                # Convert to speech and play
+                st.markdown("## 🔊 Listen to the Story")
+                audio_player, audio_error = text_to_speech(story)
+                if audio_player:
+                    st.markdown(audio_player, unsafe_allow_html=True)
+                else:
+                    st.error(f"Could not generate audio: {audio_error}")
+                # Download options
+                st.download_button(
+                    label="Download Story (Text)",
+                    data=story,
+                    file_name="my_story.txt",
+                    mime="text/plain"
+                )
+    else:
+        st.warning("Some AI models didn't load correctly. Please refresh the page or try again later.")
+except Exception as e:
+    st.error(f"An error occurred: {e}")
+    st.markdown("Please try again with a different image.")
+# Footer
+st.markdown("---")
+st.markdown("Created for young storytellers aged 3-10 years old 🌈")
+st.markdown("Powered by Hugging Face Transformers 🤗")