Spaces:
Sleeping
Sleeping
File size: 2,101 Bytes
9dd5dc1 4fddba4 3603f06 8de2446 4fddba4 796d097 4fddba4 3a25fa2 a107515 4fddba4 5d3ae61 4fddba4 796d097 02c1ec5 5d3ae61 18423bf 9dd5dc1 4fddba4 796d097 4fddba4 796d097 a107515 3603f06 796d097 8de2446 4fddba4 9dd5dc1 a107515 9dd5dc1 4fddba4 9dd5dc1 4fddba4 a107515 4e17b21 a107515 4fddba4 796d097 3a25fa2 796d097 a107515 9323a68 a107515 9323a68 a107515 8de2446 9dd5dc1 a107515 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# Import necessary libraries
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os
# Function to convert image to text
def img2text(url):
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
return text
# Function to convert text to a story
def text2story(text):
# Initialize the text generation pipeline
text_generator = pipeline("text-generation", model="distilbert/distilgpt2")
# Generate the story directly from the input text
story = text_generator(text, max_length=95, num_return_sequences=1)[0]['generated_text']
return story
# Function to convert text to audio
def text2audio(story_text):
# Convert the story text to audio using gTTS
tts = gTTS(text=story_text, lang='en')
audio_file = "story_audio.mp3"
tts.save(audio_file)
return audio_file
# Main application
st.set_page_config(page_title="Your Image to Audio Story",
page_icon="🦜")
st.header("Turn Your Image into a Fun Audio Story!")
uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Save the uploaded file
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
# Display the uploaded image
st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
# Stage 1: Image to Text
st.text('Processing image to text...')
scenario = img2text(uploaded_file.name)
st.write("**Scenario:**", scenario)
# Stage 2: Text to Story
st.text('Generating a fun story for kids...')
story = text2story(scenario)
st.write("**Story:**", story)
# Stage 3: Story to Audio
st.text('Converting story to audio...')
audio_file = text2audio(story)
# Play button for the generated audio
if st.button("Play Audio"):
st.audio(audio_file, format="audio/mp3")
# Clean up the generated audio file
os.remove(audio_file) |