# import part import streamlit as st from transformers import pipeline import textwrap import numpy as np import soundfile as sf 短文 import tempfile import os from PIL import Image import string # Initialize pipelines with caching @st.cache_resource def load_pipelines(): captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") storyer = pipeline("text-generation", model="aspis/gpt2-genre-story-generation") tts = pipeline("text-to-speech", model="facebook/mms-tts-eng") return captioner, storyer, tts captioner, storyer, tts = load_pipelines() # Function part # Function to generate content from an image def generate_content(image): pil_image = Image.open(image) # Generate caption caption = captioner(pil_image)[0]["generated_text"] st.write("**🌟 What's in the picture: 🌟**") st.write(caption) # Create prompt for story (unchanged) prompt = ( f"Write a funny, warm children's story for ages 3-10, 50–100 words, " f"Completely and precisely centered on this scene {caption}\nStory:" ) # Generate raw story raw = storyer( prompt, max_new_tokens=150, temperature=0.7, top_p=0.9, no_repeat_ngram_size=2, return_full_text=False )[0]["generated_text"].strip() # Define allowed characters to keep (removes symbols like * and ~, and digits) allowed_chars = string.ascii_letters + " .,!?\"'-" # Clean the raw story by keeping only allowed characters clean_raw = ''.join(c for c in raw if c in allowed_chars) # Split into words and ensure at least 50 words, trim to 100 words words = clean_raw.split() if len(words) < 50: words.extend("The children laughed and played happily, making new friends in the sunny park.".split()) story = " ".join(words[:100]) st.write("**📖 Your funny story: 📖**") st.write(story) # Generate audio from cleaned story chunks = textwrap.wrap(story, width=200) audio = np.concatenate([tts(chunk)["audio"].squeeze() for chunk in chunks]) # Save audio to temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file: sf.write(temp_file.name, audio, tts.model.config.sampling_rate) temp_file_path = temp_file.name return caption, story, temp_file_path # Streamlit UI st.title("✨ Magic Story Maker ✨") st.markdown("Upload a picture to make a funny story and hear it too! 📸") uploaded_image = st.file_uploader("Choose your picture", type=["jpg", "jpeg", "png"]) if uploaded_image is None: st.image("https://example.com/placeholder_image.jpg", caption="Upload your picture here! 📷", use_column_width=True) else: st.image(uploaded_image, caption="Your Picture 🌟", use_column_width=True) if st.button("✨ Make My Story! ✨"): if uploaded_image is None: st.warning("Please upload a picture first! 📸") else: with st.spinner("🔮 Creating your magical story..."): caption, story, audio_path = generate_content(uploaded_image) st.success("🎉 Your story is ready! 🎉") st.audio(audio_path, format="audio/wav") os.remove(audio_path)