Spaces:

koey811
/

assignment1

Sleeping

File size: 2,598 Bytes

478e2ea
dc29be0
478e2ea
dc29be0
224b704
dc29be0
4f0c03a
 
 
 
 
 
 
 
 
 
dc29be0
f976398
dc29be0
 
 
 
478e2ea
 
 
 
 
 
 
 
1f0f5c4
 
 
dc29be0
478e2ea
 
 
 
 
dc29be0
 
 
 
478e2ea
 
 
 
224b704
 
478e2ea
 
224b704
 
 
478e2ea
0f63834
478e2ea
 
224b704
478e2ea
 
 
 
 
 
 
 
 
dc29be0
478e2ea

import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from gtts import gTTS
import io
from PIL import Image

# Install PyTorch
try:
    import torch
except ImportError:
    st.warning("PyTorch is not installed. Installing PyTorch...")
    import subprocess
    subprocess.run(["pip", "install", "torch"])
    st.success("PyTorch has been successfully installed!")
    import torch

# Load the image captioning model
caption_model = pipeline("image-to-text", model="IAMJB/chexpert-mimic-cxr-findings-baseline"")

# Load the text generation model
text_generation_model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

def generate_caption(image):
    # Generate the caption for the uploaded image
    caption = caption_model(image)[0]["generated_text"]
    return caption

def generate_story(caption):
    # Generate the story based on the caption
    prompt = f"Imagine you are a storyteller for young children. Based on the image described as '{caption}', create a short and interesting story for children aged 3-10. Keep it positive and happy in tone."
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    output = text_generation_model.generate(input_ids, max_length=200, num_return_sequences=1)
    story = tokenizer.decode(output[0], skip_special_tokens=True)
    return story

def convert_to_audio(story):
    # Convert the story to audio using gTTS
    tts = gTTS(text=story, lang="en")
    audio_bytes = io.BytesIO()
    tts.write_to_fp(audio_bytes)
    audio_bytes.seek(0)
    return audio_bytes

def main():
    st.title("Storytelling Application")
    
    # File uploader for the image (restricted to JPG)
    uploaded_image = st.file_uploader("Upload an image", type=["jpg"])
    
    if uploaded_image is not None:
        # Convert the uploaded image to PIL image
        image = Image.open(uploaded_image)
        
        # Display the uploaded image
        st.image(image, caption="Uploaded Image", use_container_width=True)
        
        # Generate the caption for the image
        caption = generate_caption(image)
        st.subheader("Generated Caption:")
        st.write(caption)
        
        # Generate the story based on the caption
        story = generate_story(caption)
        st.subheader("Generated Story:")
        st.write(story)
        
        # Convert the story to audio
        audio_bytes = convert_to_audio(story)
        
        # Display the audio player
        st.audio(audio_bytes, format="audio/mp3")

if __name__ == "__main__":
    main()