justREE commited on
Commit
272411e
·
verified ·
1 Parent(s): ad8fea9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from kokoro import KPipeline
3
+ import streamlit as st
4
+ from PIL import Image
5
+ import torch
6
+
7
+ # 1) Vision→Text: BLIP-2
8
+ captioner = pipeline(
9
+ "image-text-to-text",
10
+ model="Salesforce/blip2-opt-2.7b",
11
+ device="cuda" if torch.cuda.is_available() else "cpu",
12
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
13
+ )
14
+
15
+ # 2) TTS: Kokoro
16
+ tts = KPipeline(lang_code="a")
17
+
18
+ def generate_story(img: Image.Image, max_words: int = 100) -> str:
19
+ out = captioner(
20
+ img,
21
+ text="Tell me a fun 50–100 word story about this image.",
22
+ max_new_tokens=200,
23
+ num_beams=4,
24
+ return_full_text=False,
25
+ )
26
+ story = out[0]["generated_text"]
27
+ return " ".join(story.split()[:max_words])
28
+
29
+ # 3) Streamlit UI
30
+ st.set_page_config(page_title="BLIP-2 Storyteller")
31
+ st.title("📖 BLIP-2 + Kokoro Storyteller")
32
+ st.write("Upload an image and hear a 50–100 word story!")
33
+
34
+ uploaded = st.file_uploader("Choose an image…", type=["png", "jpg", "jpeg"])
35
+ if uploaded:
36
+ img = Image.open(uploaded)
37
+ st.image(img, use_column_width=True)
38
+
39
+ if st.button("Tell My Story"):
40
+ with st.spinner("Generating…"):
41
+ story = generate_story(img)
42
+ st.markdown(f"**Story ({len(story.split())} words):**\n\n{story}")
43
+
44
+ with st.spinner("Speaking…"):
45
+ gen = tts(story, voice="af_heart")
46
+ _, _, audio = next(gen)
47
+ st.audio(audio, format="audio/wav", sample_rate=24000)
48
+ st.success("Enjoy! 🎉")