Spaces:

smtsead
/

Assignment1

Sleeping

File size: 4,369 Bytes

9dd5dc1
4fddba4
 
3603f06
8de2446
4fddba4
3928fc8
4fddba4
a63c8c4
3928fc8
a63c8c4
 
 
 
 
02b4ede
a63c8c4
3928fc8
3a25fa2
02b4ede
 
 
 
 
 
 
4fddba4
2bdfc3f
4fddba4
a63c8c4
2bdfc3f
a63c8c4
 
 
 
 
2bdfc3f
a63c8c4
4636e10
5ed01cd
a63c8c4
2bdfc3f
5ed01cd
2bdfc3f
18423bf
1e96eb3
 
 
e282e9c
 
 
 
2bdfc3f
4fddba4
a63c8c4
4fddba4
a63c8c4
 
 
 
 
 
 
 
 
 
a107515
 
3603f06
796d097
8de2446
4fddba4
9dd5dc1
f9e85ad
 
2bdfc3f
f9e85ad
 
 
 
 
2bdfc3f
f9e85ad
2bdfc3f
f9e85ad
a63c8c4
 
f9e85ad
4fddba4
 
9dd5dc1
4fddba4
 
 
 
a107515
0daac1d
4e17b21
 
f9e85ad
4636e10
 
3a25fa2
 
2bdfc3f
4636e10
2bdfc3f
9323a68
a107515
f9e85ad
1e96eb3
 
 
 
 
a107515
f9e85ad
e282e9c
 
 
 
8de2446
2bdfc3f
1e96eb3

# Import necessary libraries
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os

# Function to convert image to text using Hugging Face's BLIP model
def img2text(url):
    """
    Converts an image to text using the Salesforce/blip-image-captioning-base model.
    
    Args:
        url (str): Path to the image file.
    
    Returns:
        str: Generated text caption from the image, without words like "illustration".
    """
    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
    text = image_to_text_model(url)[0]["generated_text"]
    
    # Remove unwanted words like "illustration"
    unwanted_words = ["illustration", "drawing", "sketch", "picture", "dream", "imagination"]
    for word in unwanted_words:
        text = text.replace(word, "")
    
    return text.strip()

# Function to generate a kid-friendly superhero story from the text caption
def text2story(text):
    """
    Generates a kid-friendly superhero story from the text caption using the pranavpsv/gpt2-genre-story-generator model.
    
    Args:
        text (str): Text caption generated from the image.
    
    Returns:
        str: Generated superhero story suitable for kids aged 3-10, within 100 words.
    """
    # Load the text generation model
    story_generator = pipeline("text-generation", model="pranavpsv/gpt2-genre-story-generator")
    
    # Generate the story with the superhero genre
    prompt = f"<BOS> <superhero> {text}"
    story = story_generator(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
    
    # Remove <BOS> and <superhero> tags from the generated story
    story = story.replace("<BOS>", "").replace("<superhero>", "").strip()
    
    # Remove the input text (scenario) from the generated story
    if text in story:
        story = story.replace(text, "").strip()
    
    return story

# Function to convert text to audio using gTTS
def text2audio(story_text):
    """
    Converts the generated story text to audio using gTTS.
    
    Args:
        story_text (str): The generated story text.
    
    Returns:
        str: Path to the generated audio file.
    """
    # Convert text to speech
    tts = gTTS(text=story_text, lang='en')
    audio_file = "story_audio.mp3"
    tts.save(audio_file)
    
    return audio_file

# Main application
st.set_page_config(page_title="Picture Stories 🎨📖", page_icon="🦄")
st.title("Picture Stories 🎨📖")
st.markdown("### Turn your pictures into fun superhero stories and listen to them! 🎉")

# Instructions for kids
st.markdown("""
**How to use this app:**
1. **Upload a picture** of something fun, like your favorite toy, a park, or your pet.
2. Wait for the app to **create a superhero story** from your picture.
3. **Listen to the story** by clicking the "Play Audio" button.
4. Enjoy your fun superhero story! 🎧
""")

# Upload image
uploaded_file = st.file_uploader("📷 **Upload your picture here!**", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    # Save the uploaded file
    bytes_data = uploaded_file.getvalue()
    with open(uploaded_file.name, "wb") as file:
        file.write(bytes_data)

    # Display the uploaded image
    st.image(uploaded_file, caption="Your awesome picture!", use_container_width=True)

    # Stage 1: Image to Text
    st.text('✨ Turning your picture into words...')
    scenario = img2text(uploaded_file.name)
    st.write("**What we see:**", scenario)

    # Stage 2: Text to Story
    st.text('📖 Creating a fun superhero story for you...')
    story = text2story(scenario)
    st.write("**Your superhero story:**", story)

    # Stage 3: Story to Audio
    st.text('🎧 Turning your story into audio...')
    
    # Use session state to avoid regenerating audio on button click
    if 'audio_file' not in st.session_state:
        st.session_state.audio_file = text2audio(story)
    
    # Play button for the generated audio
    if st.button("🎵 **Play Audio**"):
        if os.path.exists(st.session_state.audio_file):
            st.audio(st.session_state.audio_file, format="audio/mp3")
        else:
            st.error("Audio file not found. Please try again.")

    # Clean up the generated audio file and uploaded image
    if os.path.exists(uploaded_file.name):
        os.remove(uploaded_file.name)