File size: 4,070 Bytes
4b14b3d 29e42d5 37631cc f22469a 29e42d5 f22469a 29e42d5 e35a81f f0a6b70 e35a81f 29e42d5 e35a81f 29e42d5 ccd016d f22469a e35a81f 37631cc f22469a e35a81f 822643b 29e42d5 ccd016d e35a81f 29e42d5 ccd016d 29e42d5 f0a6b70 f22469a e35a81f 29e42d5 e35a81f 29e42d5 f22469a e35a81f f22469a 822643b 29e42d5 f22469a e35a81f 29e42d5 f22469a e35a81f 29e42d5 e35a81f 29e42d5 f22469a ccd016d 822643b 29e42d5 f22469a 822643b f22469a 822643b f22469a 822643b e35a81f 822643b e35a81f 822643b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# import part
import streamlit as st
from transformers import pipeline
import textwrap
import numpy as np
import soundfile as sf
import tempfile
import os
from PIL import Image
import string
# Initialize pipelines with caching
@st.cache_resource
def load_pipelines():
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
storyer = pipeline("text-generation", model="aspis/gpt2-genre-story-generation")
tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
return captioner, storyer, tts
captioner, storyer, tts = load_pipelines()
# Function to extract keywords from caption
def extract_keywords(caption):
# Simple keyword extraction: split words and filter out common stop words
stop_words = {'the', 'a', 'an', 'in', 'on', 'at', 'of', 'to', 'is', 'are', 'with', 'and'}
words = caption.lower().split()
# Keep words longer than 2 characters and not in stop words
keywords = [word.strip(".,!?\"'") for word in words if word not in stop_words and len(word) > 2]
return keywords
# Function to generate content from an image
def generate_content(image):
pil_image = Image.open(image)
# Generate caption
caption = captioner(pil_image)[0]["generated_text"]
st.write("**๐ What's in the picture: ๐**")
st.write(caption)
# Extract keywords from the caption
keywords = extract_keywords(caption)
keywords_str = ", ".join(keywords)
# Create prompt for story, ensuring keywords are included
prompt = (
f"Write a funny, warm children's story for ages 3-10, 50โ100 words, "
f"in third-person narrative, that describes this scene exactly: {caption}. "
f"Explicitly include these keywords from the caption in the story: {keywords_str}. "
f"Mention the exact place, location, or venue within the scene, such as a park, pool, or gym."
)
# Generate raw story
raw = storyer(
prompt,
max_new_tokens=150,
temperature=0.7,
top_p=0.9,
no_repeat_ngram_size=2,
return_full_text=False
)[0]["generated_text"].strip()
# Define allowed characters to keep (removes symbols like * and ~)
allowed_chars = string.ascii_letters + string.digits + " .,!?\"'-"
# Clean the raw story by keeping only allowed characters
clean_raw = ''.join(c for c in raw if c in allowed_chars)
# Split into words and trim to 100 words
words = clean_raw.split()
story = " ".join(words[:100])
st.write("**๐ Your funny story: ๐**")
st.write(story)
# Generate audio from cleaned story
chunks = textwrap.wrap(story, width=200)
audio = np.concatenate([tts(chunk)["audio"].squeeze() for chunk in chunks])
# Save audio to temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
sf.write(temp_file.name, audio, tts.model.config.sampling_rate)
temp_file_path = temp_file.name
return caption, story, temp_file_path
# Streamlit UI
st.markdown(
"""
<style>
.stApp {
background: radial-gradient(circle, #e6f3ff, #e6fff2);
}
</style>
""",
unsafe_allow_html=True
)
st.title("โจ Magic Story Maker โจ")
st.markdown("Upload a picture to make a funny story and hear it too! ๐ธ")
uploaded_image = st.file_uploader("Choose your picture", type=["jpg", "jpeg", "png"])
if uploaded_image is None:
st.image("https://example.com/placeholder_image.jpg", caption="Upload your picture here! ๐ท", use_column_width=True)
else:
st.image(uploaded_image, caption="Your Picture ๐", use_column_width=True)
if st.button("โจ Make My Story! โจ"):
if uploaded_image is not None:
with st.spinner("๐ฎ Creating your magical story..."):
caption, story, audio_path = generate_content(uploaded_image)
st.success("๐ Your story is ready! ๐")
st.audio(audio_path, format="audio/wav")
os.remove(audio_path)
else:
st.warning("Please upload a picture first! ๐ธ") |