Spaces:

koey811
/

assignment1

Sleeping

App Files Files Community

assignment1 / app.py

koey811

Update app.py

06ff161 verified 9 months ago

raw

history blame

3.05 kB

	import streamlit as st
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	from gtts import gTTS
	import io
	from PIL import Image

	# Install PyTorch
	try:
	import torch
	except ImportError:
	st.warning("PyTorch is not installed. Installing PyTorch...")
	import subprocess
	subprocess.run(["pip", "install", "torch"])
	st.success("PyTorch has been successfully installed!")
	import torch

	# Load the image captioning model
	caption_model = pipeline("image-to-text", model="unography/blip-large-long-cap")

	story_generator = pipeline("text-generation", model="distilbert/distilgpt2")

	def generate_caption(image):
	# Generate the caption for the uploaded image
	caption = caption_model(image)[0]["generated_text"]
	return caption

	def generate_story(caption):
	# Generate the story based on the caption using the GPT-2 model
	prompt = f"Startig with 'Once upon a time...', also, based on the image described as '{caption}', here is a short, simple, and engaging story for children aged 3-10. The story should be easy to understand, use age-appropriate language, and convey a positive message. Focus on the main elements in the image and create a story that sparks their imagination:\n\n"
	story = story_generator(prompt, max_length=500, num_return_sequences=1)[0]["generated_text"]

	# Extract the story text from the generated output
	story = story.split("\n\n")[1].strip()

	# Post-process the story (example: remove inappropriate words)
	inappropriate_words = ["violence", "horror", "scary"]
	for word in inappropriate_words:
	story = story.replace(word, "")

	# Limit the story to approximately 100 words
	words = story.split()
	if len(words) > 100:
	story = " ".join(words[:100]) + "..."

	return story

	def convert_to_audio(story):
	# Convert the story to audio using gTTS
	tts = gTTS(text=story, lang="en")
	audio_bytes = io.BytesIO()
	tts.write_to_fp(audio_bytes)
	audio_bytes.seek(0)
	return audio_bytes

	def main():
	st.title("Storytelling Application")

	# File uploader for the image (restricted to JPG)
	uploaded_image = st.file_uploader("Upload an image", type=["jpg"])

	if uploaded_image is not None:
	# Convert the uploaded image to PIL image
	image = Image.open(uploaded_image)

	# Display the uploaded image
	st.image(image, caption="Uploaded Image", use_container_width=True)

	# Generate the caption for the image
	caption = generate_caption(image)
	st.subheader("Generated Caption:")
	st.write(caption)

	# Generate the story based on the caption using the GPT-2 model
	story = generate_story(caption)
	st.subheader("Generated Story:")
	st.write(story)

	# Convert the story to audio
	audio_bytes = convert_to_audio(story)

	# Display the audio player
	st.audio(audio_bytes, format="audio/mp3")

	if __name__ == "__main__":
	main()