Spaces:
Sleeping
Sleeping
File size: 4,369 Bytes
9dd5dc1 4fddba4 3603f06 8de2446 4fddba4 3928fc8 4fddba4 a63c8c4 3928fc8 a63c8c4 02b4ede a63c8c4 3928fc8 3a25fa2 02b4ede 4fddba4 2bdfc3f 4fddba4 a63c8c4 2bdfc3f a63c8c4 2bdfc3f a63c8c4 4636e10 5ed01cd a63c8c4 2bdfc3f 5ed01cd 2bdfc3f 18423bf 1e96eb3 e282e9c 2bdfc3f 4fddba4 a63c8c4 4fddba4 a63c8c4 a107515 3603f06 796d097 8de2446 4fddba4 9dd5dc1 f9e85ad 2bdfc3f f9e85ad 2bdfc3f f9e85ad 2bdfc3f f9e85ad a63c8c4 f9e85ad 4fddba4 9dd5dc1 4fddba4 a107515 0daac1d 4e17b21 f9e85ad 4636e10 3a25fa2 2bdfc3f 4636e10 2bdfc3f 9323a68 a107515 f9e85ad 1e96eb3 a107515 f9e85ad e282e9c 8de2446 2bdfc3f 1e96eb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# Import necessary libraries
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os
# Function to convert image to text using Hugging Face's BLIP model
def img2text(url):
"""
Converts an image to text using the Salesforce/blip-image-captioning-base model.
Args:
url (str): Path to the image file.
Returns:
str: Generated text caption from the image, without words like "illustration".
"""
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
# Remove unwanted words like "illustration"
unwanted_words = ["illustration", "drawing", "sketch", "picture", "dream", "imagination"]
for word in unwanted_words:
text = text.replace(word, "")
return text.strip()
# Function to generate a kid-friendly superhero story from the text caption
def text2story(text):
"""
Generates a kid-friendly superhero story from the text caption using the pranavpsv/gpt2-genre-story-generator model.
Args:
text (str): Text caption generated from the image.
Returns:
str: Generated superhero story suitable for kids aged 3-10, within 100 words.
"""
# Load the text generation model
story_generator = pipeline("text-generation", model="pranavpsv/gpt2-genre-story-generator")
# Generate the story with the superhero genre
prompt = f"<BOS> <superhero> {text}"
story = story_generator(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
# Remove <BOS> and <superhero> tags from the generated story
story = story.replace("<BOS>", "").replace("<superhero>", "").strip()
# Remove the input text (scenario) from the generated story
if text in story:
story = story.replace(text, "").strip()
return story
# Function to convert text to audio using gTTS
def text2audio(story_text):
"""
Converts the generated story text to audio using gTTS.
Args:
story_text (str): The generated story text.
Returns:
str: Path to the generated audio file.
"""
# Convert text to speech
tts = gTTS(text=story_text, lang='en')
audio_file = "story_audio.mp3"
tts.save(audio_file)
return audio_file
# Main application
st.set_page_config(page_title="Picture Stories π¨π", page_icon="π¦")
st.title("Picture Stories π¨π")
st.markdown("### Turn your pictures into fun superhero stories and listen to them! π")
# Instructions for kids
st.markdown("""
**How to use this app:**
1. **Upload a picture** of something fun, like your favorite toy, a park, or your pet.
2. Wait for the app to **create a superhero story** from your picture.
3. **Listen to the story** by clicking the "Play Audio" button.
4. Enjoy your fun superhero story! π§
""")
# Upload image
uploaded_file = st.file_uploader("π· **Upload your picture here!**", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Save the uploaded file
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
# Display the uploaded image
st.image(uploaded_file, caption="Your awesome picture!", use_container_width=True)
# Stage 1: Image to Text
st.text('β¨ Turning your picture into words...')
scenario = img2text(uploaded_file.name)
st.write("**What we see:**", scenario)
# Stage 2: Text to Story
st.text('π Creating a fun superhero story for you...')
story = text2story(scenario)
st.write("**Your superhero story:**", story)
# Stage 3: Story to Audio
st.text('π§ Turning your story into audio...')
# Use session state to avoid regenerating audio on button click
if 'audio_file' not in st.session_state:
st.session_state.audio_file = text2audio(story)
# Play button for the generated audio
if st.button("π΅ **Play Audio**"):
if os.path.exists(st.session_state.audio_file):
st.audio(st.session_state.audio_file, format="audio/mp3")
else:
st.error("Audio file not found. Please try again.")
# Clean up the generated audio file and uploaded image
if os.path.exists(uploaded_file.name):
os.remove(uploaded_file.name) |