JoannaKOKO commited on
Commit
808ca72
·
verified ·
1 Parent(s): 07da160

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
3
  import torch
4
  from PIL import Image
5
  import io
@@ -9,8 +9,8 @@ from kokoro import KPipeline # For text-to-speech
9
 
10
  # Load models globally to avoid reloading them repeatedly
11
  # Image-to-Text model
12
- processor = AutoProcessor.from_pretrained("Ertugrul/Qwen2-VL-7B-Captioner-Relaxed")
13
- caption_model = AutoModelForCausalLM.from_pretrained("Ertugrul/Qwen2-VL-7B-Captioner-Relaxed")
14
 
15
  # Text-to-Story model
16
  story_generator = pipeline("text-generation", model="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B")
@@ -52,7 +52,7 @@ def generate_audio(story):
52
  concatenated_audio = np.concatenate(audio_segments)
53
  # Write to a BytesIO buffer instead of saving to disk
54
  audio_buffer = io.BytesIO()
55
- #sf.write(audio_buffer, concatenated_audio, 24000, format='WAV')
56
  audio_buffer.seek(0)
57
  return audio_buffer
58
 
 
1
  import streamlit as st
2
+ from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline
3
  import torch
4
  from PIL import Image
5
  import io
 
9
 
10
  # Load models globally to avoid reloading them repeatedly
11
  # Image-to-Text model
12
+ processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
13
+ caption_model = AutoModelForImageTextToText.from_pretrained("Salesforce/blip-image-captioning-large")
14
 
15
  # Text-to-Story model
16
  story_generator = pipeline("text-generation", model="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B")
 
52
  concatenated_audio = np.concatenate(audio_segments)
53
  # Write to a BytesIO buffer instead of saving to disk
54
  audio_buffer = io.BytesIO()
55
+ sf.write(audio_buffer, concatenated_audio, 24000, format='WAV')
56
  audio_buffer.seek(0)
57
  return audio_buffer
58