Spaces:

justREE
/

StoryTeller

Sleeping

App Files Files Community

justREE commited on Apr 24

Commit

272411e

verified ·

1 Parent(s): ad8fea9

Create app.py

Browse files

Files changed (1) hide show

app.py +48 -0

app.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from transformers import pipeline
+from kokoro import KPipeline
+import streamlit as st
+from PIL import Image
+import torch
+# 1) Vision→Text: BLIP-2
+captioner = pipeline(
+    "image-text-to-text",
+    model="Salesforce/blip2-opt-2.7b",
+    device="cuda" if torch.cuda.is_available() else "cpu",
+    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+)
+# 2) TTS: Kokoro
+tts = KPipeline(lang_code="a")
+def generate_story(img: Image.Image, max_words: int = 100) -> str:
+    out = captioner(
+        img,
+        text="Tell me a fun 50–100 word story about this image.",
+        max_new_tokens=200,
+        num_beams=4,
+        return_full_text=False,
+    )
+    story = out[0]["generated_text"]
+    return " ".join(story.split()[:max_words])
+# 3) Streamlit UI
+st.set_page_config(page_title="BLIP-2 Storyteller")
+st.title("📖 BLIP-2 + Kokoro Storyteller")
+st.write("Upload an image and hear a 50–100 word story!")
+uploaded = st.file_uploader("Choose an image…", type=["png", "jpg", "jpeg"])
+if uploaded:
+    img = Image.open(uploaded)
+    st.image(img, use_column_width=True)
+    if st.button("Tell My Story"):
+        with st.spinner("Generating…"):
+            story = generate_story(img)
+        st.markdown(f"**Story ({len(story.split())} words):**\n\n{story}")
+        with st.spinner("Speaking…"):
+            gen = tts(story, voice="af_heart")
+            _, _, audio = next(gen)
+        st.audio(audio, format="audio/wav", sample_rate=24000)
+        st.success("Enjoy! 🎉")