Spaces:
Sleeping
Sleeping
File size: 3,623 Bytes
4fddba4 4e17b21 4fddba4 4e17b21 3e426ea aa762d0 4e17b21 aa762d0 4e17b21 4fddba4 4e17b21 3e426ea aa762d0 abdb243 3e426ea aa762d0 4e17b21 aa762d0 4e17b21 4fddba4 4e17b21 3e426ea abdb243 4e17b21 aa762d0 4e17b21 4fddba4 abdb243 aa762d0 4fddba4 aa762d0 4e17b21 aa762d0 4fddba4 4e17b21 aa762d0 4e17b21 aa762d0 4e17b21 aa762d0 4e17b21 aa762d0 4e17b21 aa762d0 abdb243 4e17b21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# import part
import streamlit as st
from transformers import pipeline
import os
# function part
# img2text
def img2text(url):
try:
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
# Remove unwanted words like "illustration"
text = text.replace("illustration", "").strip()
# Make the caption more fun and happy
fun_caption = f"π Wow! This picture shows {text.lower()}. Letβs turn it into a fun story! π"
return fun_caption
except Exception as e:
st.error(f"Oops! Something went wrong while looking at your picture. Please try again! π")
return None
# text2story
def text2story(text):
try:
# Use a better model for text generation (e.g., GPT-Neo)
story_generator = pipeline("text-generation", model="EleutherAI/gpt-neo-125M")
# Add a fun and happy prompt to guide the story generation
prompt = f"One sunny day, {text}. "
story = story_generator(prompt, max_length=100, num_return_sequences=1)[0]["generated_text"]
# Remove any unwanted text (e.g., usernames, special characters)
story = " ".join([word for word in story.split() if not word.startswith("@") and not word.startswith("http")])
# Make the story more fun by adding a happy ending
happy_story = story + " And everyone had a big smile on their faces at the end of the day! ππ"
return happy_story
except Exception as e:
st.error(f"Oops! Something went wrong while creating your story. Please try again! π")
return None
# text2audio
def text2audio(story_text):
try:
# Use a reliable TTS model (e.g., ESPnet's VITS model)
tts_pipeline = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits")
audio_output = tts_pipeline(story_text)
audio_file = "story_audio.wav"
# Save the audio file
with open(audio_file, "wb") as f:
f.write(audio_output["audio"])
return audio_file
except Exception as e:
st.error(f"Oops! Something went wrong while turning your story into audio. Please try again! π")
return None
# main part
st.set_page_config(page_title="Story Maker", page_icon="π")
st.header("π Story Maker: Turn Your Picture into a Happy Story! π")
uploaded_file = st.file_uploader("π· Choose a picture to create a fun story...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(uploaded_file, caption="Your fun picture!", use_container_width=True)
# Stage 1: Image to Text
st.write("π Letβs see whatβs in your picture... π")
scenario = img2text(uploaded_file.name)
if scenario:
st.write(scenario)
# Stage 2: Text to Story
st.write("π Turning your picture into a fun story... π")
story = text2story(scenario)
if story:
st.write("π Hereβs your fun story:")
st.write(story)
# Stage 3: Story to Audio data
st.write("π€ Getting ready to tell your story... π€")
audio_file = text2audio(story)
if audio_file:
# Play button
if st.button("π§ Listen to Your Story!"):
st.audio(audio_file, format="audio/wav")
# Clean up the audio file after playing
os.remove(audio_file) |