Szeyu commited on
Commit
f22469a
ยท
verified ยท
1 Parent(s): f3f1d20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -35
app.py CHANGED
@@ -6,39 +6,35 @@ import soundfile as sf
6
  import tempfile
7
  import os
8
  from PIL import Image
 
9
 
10
- # Initialize pipelines with caching to avoid reloading
11
  @st.cache_resource
12
  def load_pipelines():
13
- # Load pipeline for generating captions from images
14
  captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
15
- # Load pipeline for generating stories from text prompts
16
  storyer = pipeline("text-generation", model="aspis/gpt2-genre-story-generation")
17
- # Load pipeline for converting text to speech
18
  tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
19
  return captioner, storyer, tts
20
 
21
- # Load the pipelines once and reuse them
22
  captioner, storyer, tts = load_pipelines()
23
 
24
- # Function to generate caption, story, and audio from an uploaded image
25
  def generate_content(image):
26
- # Convert the uploaded image to a PIL image format
27
  pil_image = Image.open(image)
28
 
29
- # Generate a caption based on the image content
30
  caption = captioner(pil_image)[0]["generated_text"]
31
  st.write("**๐ŸŒŸ What's in the picture: ๐ŸŒŸ**")
32
  st.write(caption)
33
 
34
- # Create a prompt for generating a children's story
35
  prompt = (
36
  f"Write a funny, warm children's story for ages 3-10, 50โ€“100 words, "
37
  f"in third-person narrative, that describes this scene exactly: {caption} "
38
  f"mention the exact place, location or venue within {caption}"
39
  )
40
 
41
- # Generate the story based on the prompt
42
  raw = storyer(
43
  prompt,
44
  max_new_tokens=150,
@@ -48,52 +44,42 @@ def generate_content(image):
48
  return_full_text=False
49
  )[0]["generated_text"].strip()
50
 
51
- # Trim the generated story to a maximum of 100 words
52
- words = raw.split()
 
 
 
 
 
 
53
  story = " ".join(words[:100])
 
54
  st.write("**๐Ÿ“– Your funny story: ๐Ÿ“–**")
55
  st.write(story)
56
 
57
- # Split the story into chunks of 200 characters for text-to-speech processing
58
  chunks = textwrap.wrap(story, width=200)
59
-
60
- # Generate and concatenate audio for each text chunk
61
  audio = np.concatenate([tts(chunk)["audio"].squeeze() for chunk in chunks])
62
 
63
- # Save the concatenated audio to a temporary WAV file
64
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
65
  sf.write(temp_file.name, audio, tts.model.config.sampling_rate)
66
  temp_file_path = temp_file.name
67
 
68
  return caption, story, temp_file_path
69
 
70
- # Streamlit UI for the application
71
- st.markdown(
72
- """
73
- <style>
74
- .stApp {
75
- background: linear-gradient(to right, #e0f7fa, #b2ebf2);
76
- }
77
- </style>
78
- """,
79
- unsafe_allow_html=True
80
- )
81
-
82
  st.title("โœจ Magic Story Maker โœจ")
83
  st.markdown("Upload a picture to make a funny story and hear it too! ๐Ÿ“ธ")
84
 
85
- # File uploader for image input
86
- uploaded_image = st.file_uploader("Choose your picture", type=["jpg", "jpeg", "png"], help="Pick a photo to start the magic!")
87
-
88
- # Placeholder image URL (replace with an actual URL of a child-friendly image)
89
- placeholder_url = "https://example.com/placeholder_image.jpg"
90
 
91
  if uploaded_image is None:
92
- st.image(placeholder_url, caption="Upload your picture here! ๐Ÿ“ท", use_column_width=True)
93
  else:
94
  st.image(uploaded_image, caption="Your Picture ๐ŸŒŸ", use_column_width=True)
95
 
96
- if st.button("โœจ Make My Story! โœจ", help="Click to create your magic story"):
97
  if uploaded_image is not None:
98
  with st.spinner("๐Ÿ”ฎ Creating your magical story..."):
99
  caption, story, audio_path = generate_content(uploaded_image)
 
6
  import tempfile
7
  import os
8
  from PIL import Image
9
+ import string
10
 
11
+ # Initialize pipelines with caching
12
  @st.cache_resource
13
  def load_pipelines():
 
14
  captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
 
15
  storyer = pipeline("text-generation", model="aspis/gpt2-genre-story-generation")
 
16
  tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
17
  return captioner, storyer, tts
18
 
 
19
  captioner, storyer, tts = load_pipelines()
20
 
21
+ # Function to generate content from an image
22
  def generate_content(image):
 
23
  pil_image = Image.open(image)
24
 
25
+ # Generate caption
26
  caption = captioner(pil_image)[0]["generated_text"]
27
  st.write("**๐ŸŒŸ What's in the picture: ๐ŸŒŸ**")
28
  st.write(caption)
29
 
30
+ # Create prompt for story
31
  prompt = (
32
  f"Write a funny, warm children's story for ages 3-10, 50โ€“100 words, "
33
  f"in third-person narrative, that describes this scene exactly: {caption} "
34
  f"mention the exact place, location or venue within {caption}"
35
  )
36
 
37
+ # Generate raw story
38
  raw = storyer(
39
  prompt,
40
  max_new_tokens=150,
 
44
  return_full_text=False
45
  )[0]["generated_text"].strip()
46
 
47
+ # Define allowed characters to keep (removes symbols like * and ~)
48
+ allowed_chars = string.ascii_letters + string.digits + " .,!?\"'-"
49
+
50
+ # Clean the raw story by keeping only allowed characters
51
+ clean_raw = ''.join(c for c in raw if c in allowed_chars)
52
+
53
+ # Split into words and trim to 100 words
54
+ words = clean_raw.split()
55
  story = " ".join(words[:100])
56
+
57
  st.write("**๐Ÿ“– Your funny story: ๐Ÿ“–**")
58
  st.write(story)
59
 
60
+ # Generate audio from cleaned story
61
  chunks = textwrap.wrap(story, width=200)
 
 
62
  audio = np.concatenate([tts(chunk)["audio"].squeeze() for chunk in chunks])
63
 
64
+ # Save audio to temporary file
65
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
66
  sf.write(temp_file.name, audio, tts.model.config.sampling_rate)
67
  temp_file_path = temp_file.name
68
 
69
  return caption, story, temp_file_path
70
 
71
+ # Streamlit UI
 
 
 
 
 
 
 
 
 
 
 
72
  st.title("โœจ Magic Story Maker โœจ")
73
  st.markdown("Upload a picture to make a funny story and hear it too! ๐Ÿ“ธ")
74
 
75
+ uploaded_image = st.file_uploader("Choose your picture", type=["jpg", "jpeg", "png"])
 
 
 
 
76
 
77
  if uploaded_image is None:
78
+ st.image("https://example.com/placeholder_image.jpg", caption="Upload your picture here! ๐Ÿ“ท", use_column_width=True)
79
  else:
80
  st.image(uploaded_image, caption="Your Picture ๐ŸŒŸ", use_column_width=True)
81
 
82
+ if st.button("โœจ Make My Story! โœจ"):
83
  if uploaded_image is not None:
84
  with st.spinner("๐Ÿ”ฎ Creating your magical story..."):
85
  caption, story, audio_path = generate_content(uploaded_image)