szeandlinkProject_Testing

Sleeping

App Files Files Community

szeandlinkProject_Testing / app.py

Szeyu

Update app.py

b95df49 verified 4 months ago

raw

history blame

3.27 kB

	# import part
	import streamlit as st
	from transformers import pipeline
	import textwrap
	import numpy as np
	import soundfile as sf
	短文 import tempfile
	import os
	from PIL import Image
	import string

	# Initialize pipelines with caching
	@st.cache_resource
	def load_pipelines():
	captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
	storyer = pipeline("text-generation", model="aspis/gpt2-genre-story-generation")
	tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
	return captioner, storyer, tts

	captioner, storyer, tts = load_pipelines()

	# Function part
	# Function to generate content from an image
	def generate_content(image):
	pil_image = Image.open(image)

	# Generate caption
	caption = captioner(pil_image)[0]["generated_text"]
	st.write("🌟 What's in the picture: 🌟")
	st.write(caption)

	# Create prompt for story (unchanged)
	prompt = (
	f"Write a funny, warm children's story for ages 3-10, 50–100 words, "
	f"Completely and precisely centered on this scene {caption}\nStory:"
	)

	# Generate raw story
	raw = storyer(
	prompt,
	max_new_tokens=150,
	temperature=0.7,
	top_p=0.9,
	no_repeat_ngram_size=2,
	return_full_text=False
	)[0]["generated_text"].strip()

	# Define allowed characters to keep (removes symbols like * and ~, and digits)
	allowed_chars = string.ascii_letters + " .,!?\"'-"

	# Clean the raw story by keeping only allowed characters
	clean_raw = ''.join(c for c in raw if c in allowed_chars)

	# Split into words and ensure at least 50 words, trim to 100 words
	words = clean_raw.split()
	if len(words) < 50:
	words.extend("The children laughed and played happily, making new friends in the sunny park.".split())
	story = " ".join(words[:100])

	st.write("📖 Your funny story: 📖")
	st.write(story)

	# Generate audio from cleaned story
	chunks = textwrap.wrap(story, width=200)
	audio = np.concatenate([tts(chunk)["audio"].squeeze() for chunk in chunks])

	# Save audio to temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
	sf.write(temp_file.name, audio, tts.model.config.sampling_rate)
	temp_file_path = temp_file.name

	return caption, story, temp_file_path

	# Streamlit UI
	st.title("✨ Magic Story Maker ✨")
	st.markdown("Upload a picture to make a funny story and hear it too! 📸")

	uploaded_image = st.file_uploader("Choose your picture", type=["jpg", "jpeg", "png"])

	if uploaded_image is None:
	st.image("https://example.com/placeholder_image.jpg", caption="Upload your picture here! 📷", use_column_width=True)
	else:
	st.image(uploaded_image, caption="Your Picture 🌟", use_column_width=True)

	if st.button("✨ Make My Story! ✨"):
	if uploaded_image is None:
	st.warning("Please upload a picture first! 📸")
	else:
	with st.spinner("🔮 Creating your magical story..."):
	caption, story, audio_path = generate_content(uploaded_image)
	st.success("🎉 Your story is ready! 🎉")
	st.audio(audio_path, format="audio/wav")
	os.remove(audio_path)