divyesh01 commited on
Commit
f8eb0cf
·
verified ·
1 Parent(s): 423a39a

Image Captinoning and Segementation Created

Browse files
Files changed (3) hide show
  1. app.py +59 -0
  2. image_captining.ipynb +0 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import numpy as np
4
+ import cv2
5
+ from PIL import Image
6
+ import tempfile
7
+ from torchvision.models.detection import maskrcnn_resnet50_fpn
8
+ from torchvision.transforms import functional as F
9
+ from transformers import BlipProcessor, BlipForConditionalGeneration
10
+
11
+ @st.cache_resource
12
+ def load_models():
13
+ seg_model = maskrcnn_resnet50_fpn(pretrained=True)
14
+ seg_model.eval()
15
+
16
+ caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
17
+ caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
18
+
19
+ return seg_model, caption_model, caption_processor
20
+
21
+ seg_model, caption_model, caption_processor = load_models()
22
+
23
+ st.title("🖼️ Image Segmentation & Captioning App")
24
+ uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
25
+
26
+ if uploaded_file is not None:
27
+ image = Image.open(uploaded_file).convert("RGB")
28
+ st.image(image, caption="Original Image", use_column_width=True)
29
+
30
+ img_np = np.array(image)
31
+ img_tensor = F.to_tensor(img_np)
32
+
33
+ with torch.no_grad():
34
+ pred = seg_model([img_tensor])[0]
35
+
36
+ def apply_masks(img, pred, threshold=0.7):
37
+ img = img.copy()
38
+ for i in range(len(pred["boxes"])):
39
+ score = pred["scores"][i].item()
40
+ if score < threshold:
41
+ continue
42
+ mask = pred["masks"][i, 0].mul(255).byte().cpu().numpy()
43
+ img[mask > 128] = [0, 255, 0]
44
+ return img
45
+
46
+ masked_img = apply_masks(img_np, pred)
47
+ st.image(masked_img, caption="Segmented Image", use_column_width=True)
48
+
49
+ inputs = caption_processor(images=image, return_tensors="pt")
50
+ out = caption_model.generate(**inputs)
51
+ caption = caption_processor.decode(out[0], skip_special_tokens=True)
52
+ st.markdown(f"**📝 Caption:** _{caption}_")
53
+
54
+ result_img = Image.fromarray(masked_img)
55
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
56
+ result_img.save(temp_file.name)
57
+
58
+ with open(temp_file.name, "rb") as f:
59
+ st.download_button("📥 Download Output", f, file_name="output_result.jpg", mime="image/jpeg")
image_captining.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ torchvision
4
+ transformers
5
+ Pillow
6
+ opencv-python-headless
7
+ timm