Spaces:

BillyZ1129
/

Billy_Space

Sleeping

App Files Files Community

BillyZ1129 commited on Apr 30

Commit

7d6fb10

verified ·

1 Parent(s): 901fa0d

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -4

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from gtts import gTTS
 import tempfile
 import os
 import base64
 # Set page config
 st.set_page_config(
@@ -39,10 +40,35 @@ def load_story_generator():
 # Function to generate caption from image
 def generate_caption(image, processor, model):
-    inputs = processor(image, return_tensors="pt")
-    out = model.generate(**inputs, max_length=30)
-    caption = processor.decode(out[0], skip_special_tokens=True)
-    return caption
 # Function to generate story from caption
 def generate_story(caption, generator):
@@ -142,6 +168,8 @@ def main():
         # Generate caption and story
         with st.spinner("Looking at your picture and thinking of a story..."):
             caption = generate_caption(original_image, processor, caption_model)
             story = generate_story(caption, story_generator)
         # Display the story and audio

 import tempfile
 import os
 import base64
+import numpy as np
 # Set page config
 st.set_page_config(
 # Function to generate caption from image
 def generate_caption(image, processor, model):
+    # 确保图像是RGB格式
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    # 标准预处理：调整大小到BLIP模型期望的输入尺寸
+    image = image.resize((384, 384))
+    try:
+        # 使用处理器准备图像
+        inputs = processor(image, return_tensors="pt", padding=True)
+        # 生成caption
+        out = model.generate(**inputs, max_length=30)
+        caption = processor.decode(out[0], skip_special_tokens=True)
+        return caption
+    except Exception as e:
+        # 如果有错误，使用一个备用方法
+        st.warning(f"Caption generation error: {str(e)}. Using fallback method.")
+        # 转换图像为numpy数组
+        img_array = np.array(image)
+        # 手动准备图像为模型输入
+        pixel_values = processor.image_processor(images=img_array, return_tensors="pt").pixel_values
+        # 生成caption
+        generated_ids = model.generate(pixel_values=pixel_values, max_length=30)
+        caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return caption
 # Function to generate story from caption
 def generate_story(caption, generator):
         # Generate caption and story
         with st.spinner("Looking at your picture and thinking of a story..."):
             caption = generate_caption(original_image, processor, caption_model)
+            # 打印图片描述
+            st.info(f"Image caption: {caption}")
             story = generate_story(caption, story_generator)
         # Display the story and audio