yongyeol commited on
Commit
0836597
Β·
verified Β·
1 Parent(s): 82191e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -65,8 +65,10 @@ except ModuleNotFoundError:
65
  caption_model = VisionEncoderDecoderModel.from_pretrained(
66
  "nlpconnect/vit-gpt2-image-captioning",
67
  use_safetensors=True,
68
- low_cpu_mem_usage=True
69
  )
 
 
70
  feature_extractor = ViTImageProcessor.from_pretrained(
71
  "nlpconnect/vit-gpt2-image-captioning"
72
  )
@@ -85,9 +87,12 @@ musicgen.set_generation_params(duration=10)
85
  # ─────────────────────────────────────────────────────────────
86
  def generate_caption(image: Image.Image) -> str:
87
  pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
88
- ids = caption_model.generate(pixel_values, max_length=50)
 
 
89
  return tokenizer.decode(ids[0], skip_special_tokens=True)
90
 
 
91
  def generate_music(prompt: str) -> str:
92
  wav = musicgen.generate([prompt]) # batch size = 1
93
  tmpdir = tempfile.mkdtemp()
 
65
  caption_model = VisionEncoderDecoderModel.from_pretrained(
66
  "nlpconnect/vit-gpt2-image-captioning",
67
  use_safetensors=True,
68
+ low_cpu_mem_usage=True # κ·ΈλŒ€λ‘œ 두어도 OK
69
  )
70
+ caption_model.to("cpu") # β˜… μΆ”κ°€
71
+
72
  feature_extractor = ViTImageProcessor.from_pretrained(
73
  "nlpconnect/vit-gpt2-image-captioning"
74
  )
 
87
  # ─────────────────────────────────────────────────────────────
88
  def generate_caption(image: Image.Image) -> str:
89
  pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
90
+ caption_model.to(pixel_values.device) # β˜… μ•ˆμ „ 이동
91
+ with torch.no_grad():
92
+ ids = caption_model.generate(pixel_values, max_length=50)
93
  return tokenizer.decode(ids[0], skip_special_tokens=True)
94
 
95
+
96
  def generate_music(prompt: str) -> str:
97
  wav = musicgen.generate([prompt]) # batch size = 1
98
  tmpdir = tempfile.mkdtemp()