szeandlinkProject_Testing

Sleeping

File size: 3,256 Bytes

4b14b3d
29e42d5
 
 
 
 
b9ea623
29e42d5
37631cc
f22469a
29e42d5
f22469a
29e42d5
e35a81f
f0a6b70
e35a81f
 
 
29e42d5
e35a81f
29e42d5
856b5e7
f22469a
e35a81f
 
37631cc
f22469a
e35a81f
822643b
 
29e42d5
59b54a5
e35a81f
a6bf6c2
85134a0
acb46a7
29e42d5
f0a6b70
f22469a
e35a81f
 
e9ee97c
019caa7
 
 
29e42d5
 
 
59b54a5
 
f22469a
 
 
 
59b54a5
f22469a
e35a81f
f22469a
822643b
 
29e42d5
f22469a
e35a81f
 
29e42d5
f22469a
e35a81f
 
 
29e42d5
e35a81f
29e42d5
f22469a
822643b
 
29e42d5
f22469a
822643b
8b6a9db
822643b
8b6a9db
822643b
8b6a9db
59b54a5
 
 
822643b
e35a81f
822643b
e35a81f
59b54a5

# import part
import streamlit as st
from transformers import pipeline
import textwrap
import numpy as np
import soundfile as sf
import tempfile
import os
from PIL import Image
import string

# Initialize pipelines with caching
@st.cache_resource
def load_pipelines():
    captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
    storyer = pipeline("text-generation", model="aspis/gpt2-genre-story-generation")
    tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
    return captioner, storyer, tts

captioner, storyer, tts = load_pipelines()

# Function part
# Function to generate content from an image
def generate_content(image):
    pil_image = Image.open(image)
    
    # Generate caption
    caption = captioner(pil_image)[0]["generated_text"]
    st.write("**🌟 What's in the picture: 🌟**")
    st.write(caption)

    # Create prompt for story
    prompt = (
        f"Write a funny, interesting children's story that precisely centered on this scene {caption}\nStory:"
        f"in third-person narrative, that describes this scene exactly: {caption} "
        f"mention the exact place, location or venue within {caption}"
    )
    
    # Generate raw story
    raw = storyer(
        prompt,
        max_new_tokens=150,
        temperature=0.7,
        top_p=0.9,
        no_repeat_ngram_size=2,
        return_full_text=False
    )[0]["generated_text"].strip()

    # Define allowed characters to keep (removes symbols like * and ~)
    allowed_chars = string.ascii_letters + string.digits + " .,!?\"'-"
    
    # Clean the raw story by keeping only allowed characters
    clean_raw = ''.join(c for c in raw if c in allowed_chars)
    
    # Split into words and trim to 100 words
    words = clean_raw.split()
    story = " ".join(words[:100])
    
    st.write("**📖 Your funny story: 📖**")
    st.write(story)

    # Generate audio from cleaned story
    chunks = textwrap.wrap(story, width=200)
    audio = np.concatenate([tts(chunk)["audio"].squeeze() for chunk in chunks])

    # Save audio to temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
        sf.write(temp_file.name, audio, tts.model.config.sampling_rate)
        temp_file_path = temp_file.name

    return caption, story, temp_file_path

# Streamlit UI
st.title("✨ Magic Story Maker ✨")
st.markdown("Upload a picture to make a funny story and hear it too! 📸")

uploaded_image = st.file_uploader("Choose your picture", type=["jpg", "jpeg", "png"])

# Streamlit UI (modified image display section)
if uploaded_image is None:
    st.image("https://example.com/placeholder_image.jpg", caption="Upload your picture here! 📷", use_container_width=True)
else:
    st.image(uploaded_image, caption="Your Picture 🌟", use_container_width=True)

if st.button("✨ Make My Story! ✨"):
    if uploaded_image is not None:
        with st.spinner("🔮 Creating your magical story..."):
            caption, story, audio_path = generate_content(uploaded_image)
            st.success("🎉 Your story is ready! 🎉")
            st.audio(audio_path, format="audio/wav")
            os.remove(audio_path)
    else:
        st.warning("Please upload a picture first! 📸")