# Import necessary libraries import streamlit as st from transformers import pipeline from gtts import gTTS import os # Function to convert image to text def img2text(url): image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") text = image_to_text_model(url)[0]["generated_text"] # Remove unwanted words like "illustration" unwanted_words = ["illustration", "painting", "drawing", "artwork"] for word in unwanted_words: text = text.replace(word, "") return text.strip() # Function to generate a story from text using T5 def text2story(text): # Use the T5 model for text generation text_generator = pipeline("text2text-generation", model="t5-small") # Add a strong prompt to guide the model prompt = f"Write a short, happy, and fun story for kids aged 3-10. " \ f"The story should be about: {text}. " \ "Make it cheerful, imaginative, and suitable for young children. " \ "Avoid any scary or sad elements. The story should be under 95 words." # Generate the story story = text_generator(prompt, max_length=95, num_return_sequences=1)[0]["generated_text"] # Clean up the output to remove the prompt story = story.replace(prompt, "").strip() # Ensure the story is under 95 words words = story.split() if len(words) > 95: story = " ".join(words[:95]) return story # Function to convert text to audio using gTTS def text2audio(story_text): audio_file = os.path.abspath("kids_playing_audio.wav") tts = gTTS(story_text, lang="en") tts.save(audio_file) return audio_file # Main application st.set_page_config(page_title="Image to Story", page_icon="📖") st.header("📖 Image to Story") st.markdown("### Turn your image into a fun story!") # Initialize session state if "story" not in st.session_state: st.session_state.story = None if "audio_file" not in st.session_state: st.session_state.audio_file = None uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: # Save the uploaded file bytes_data = uploaded_file.getvalue() with open(uploaded_file.name, "wb") as file: file.write(bytes_data) st.image(uploaded_file, caption="Your Uploaded Image", use_column_width=True) # Stage 1: Image to Text st.text('🖼️ Processing image...') scenario = img2text(uploaded_file.name) st.write("**What I see:**", scenario) # Stage 2: Text to Story if st.session_state.story is None or st.button("🔄 Generate New Story"): st.text('📝 Creating a story...') st.session_state.story = text2story(scenario) st.write("**Your Story:**", st.session_state.story) # Stage 3: Story to Audio st.text('🎙️ Turning your story into audio...') st.session_state.audio_file = text2audio(st.session_state.story) # Play button for audio if st.session_state.audio_file and st.button("🎧 Listen to the Story"): if os.path.exists(st.session_state.audio_file): st.audio(st.session_state.audio_file, format="audio/wav") else: st.error("Audio file not found. Please generate the story again.") # Clean up the generated audio file if st.session_state.audio_file and os.path.exists(st.session_state.audio_file): os.remove(st.session_state.audio_file) # Add some fun prompts for kids st.markdown("### 🎨 Tips for a Great Story!") st.write("1. Upload a picture of your favorite animal, place, or toy!") st.write("2. Imagine what's happening in the picture and let the story begin!") st.write("3. Listen to your story and share it with your friends!")