Spaces:

zerishdorelser
/

image-analysis

Running

App Files Files Community

zerishdorelser commited on May 11

Commit

c46d8ad

verified ·

1 Parent(s): f852217

Upload 6 files

Browse files

Files changed (6) hide show

.gitignore +18 -0
app.py +123 -0
functions.py +323 -0
type2.py +86 -0
type3.py +70 -0
type4.py +133 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,18 @@

+# Virtual environment
+venv/
+.venv/
+ENV/
+# Python cache/compiled files
+__pycache__/
+*.py[cod]
+*$py.class
+# IDE-specific files
+.vscode/
+.idea/
+*.swp
+*.swo
+# Environment variables
+.env

app.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import streamlit as st
+from transformers import pipeline
+from transformers import DetrImageProcessor, DetrForObjectDetection
+from transformers import CLIPProcessor, CLIPModel
+from transformers import BlipProcessor, BlipForQuestionAnswering
+#from transformers import YolosImageProcessor, YolosForObjectDetection
+from PIL import Image
+from functions import *
+import io
+#load models
+@st.cache_resource
+def load_models():
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
+    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50",revision="no_timm")
+    clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+    clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+    sales_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
+    sales_model = BlipForQuestionAnswering.from_pretrained(
+        "Salesforce/blip-vqa-base",
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32
+        ).to(device)
+    return {
+        "detector": model,
+        "processor": processor,
+        "clip": clip_model,
+        "clip process": clip_processor,
+        #"t5 token": t5_tokenizer,
+        #"t5": t5_model,
+        'story_teller': pipeline("text-generation", model="nickypro/tinyllama-15M"),
+        "sales process": sales_processor,
+        "sales model": sales_model,
+        "device": device
+    }
+def main():
+    st.header("📱 Nano AI Image Analyzer")
+    uploaded_file= st.file_uploader("upload image")#, type=['.PNG','png','jpg','jpeg'])
+    models= load_models()
+    st.write('models loaded')
+#im2=detect_objects(image_path=image, models= models)
+#st.write(im2)
+#st.write("done")
+#annotated_image= draw_bounding_boxes(image, im2)
+#st.image(annotated_image, caption="Detected Objects", use_container_width=True)
+#buttons UI
+    if uploaded_file is not None:
+        image_bytes = uploaded_file.getvalue()
+        st.write("Filename:", uploaded_file.name)
+        image = Image.open(uploaded_file).convert('RGB')
+        st.image(image, caption="Uploaded Image", width=200)  #use_container_width= False,
+        col1, col2, col3 = st.columns([0.33,0.33,0.33])
+        with col1:
+            detect= st.button("🔍 Detect Objects", key="btn1")
+        with col2:
+            describe= st.button("📝 Describe Image", key="btn2")
+        with col3:
+            story= st.button("📖 Generate Story", key="btn3",
+                             help="story is generated based on caption")
+        if detect:
+            with st.spinner("Detecting objects..."):
+                try:
+                    detections = detect_objects(image.copy(), models)
+                    annotated_image= draw_bounding_boxes(image, detections)
+                    st.image(annotated_image, caption="Detected Objects", use_column_width=True)
+                    show_detection_table(detections)
+                except:
+                    st.write("some error!! try another image")
+        elif describe:
+            with st.spinner("trying to describe..."):
+                description= get_image_description(image.copy(),models)
+                st.write(description)
+        elif story:
+            #st.write('btn3 clicked')
+            with st.spinner("getting a story..."):
+                description= get_image_description(image.copy(),models)
+                story= generate_story(description, models)
+                st.write(story)
+        # Chat interface
+        if "messages" not in st.session_state:
+            st.session_state.messages = []
+        chat_container = st.container(height=400)
+        with chat_container:
+            for message in st.session_state.messages:
+                with st.chat_message(message["role"]):
+                    st.markdown(message["content"])
+            if prompt := st.chat_input("Ask about the image"):
+                st.session_state.messages.append({"role": "user", "content": prompt})
+                with st.chat_message("user"):
+                    st.markdown(prompt)
+                with st.chat_message("assistant"):
+                    with st.spinner("Thinking..."):
+                        response = answer_question(image,
+                                                prompt,
+                                                models["sales process"],
+                                                models["sales model"],
+                                                models["device"])
+                        #response= "response sample"
+                    st.markdown(response)
+                st.session_state.messages.append({"role": "assistant", "content": response})
+if __name__ == "__main__":
+    main()

functions.py ADDED Viewed

	@@ -0,0 +1,323 @@

+from PIL import Image, ImageDraw
+from transformers import DetrImageProcessor, DetrForObjectDetection
+import numpy as np
+import torch
+import pandas as pd
+import streamlit as st
+from pathlib import Path
+def safe_image_open(uploaded_file):
+    try:
+        # Convert to lowercase and remove spaces
+        filename = Path(uploaded_file.name).stem.lower().replace(" ", "_") + ".png"
+        image = Image.open(uploaded_file).convert("RGB")
+        return image
+    except Exception as e:
+        st.error(f"Error loading image: {str(e)}")
+        return None
+def QA(image, question, models):
+    inputs= models['sales process'](image, question, return_tensors= 'pt')
+    out = models['sales model'].generate(**inputs)
+    return out
+def answer_question(image, question, processor, model, device):
+    inputs = processor(image, question, return_tensors="pt").to(device)
+    outputs = model.generate(**inputs, max_new_tokens=100)
+    return processor.decode(outputs[0], skip_special_tokens=True)
+def generate_story(caption, models):
+    """Generate short story"""
+    #caption= "a beutiful landscape"
+    return models['story_teller'](
+        f"Write story about: {caption}",
+        max_length=500,
+        do_sample=True,
+        temperature=0.7
+    )[0]['generated_text']
+def generate_story2(prompt, models):
+    input_text = f"Write a short story about {prompt}"
+    input_ids = models["t5 token"].encode(input_text, return_tensors="pt", max_length=64, truncation=True)
+    output_ids = models["t5"].generate(input_ids, max_length=512)
+    story = models["t5 token"].decode(output_ids[0], skip_special_tokens=True)
+    return story
+def get_image_description(image_path, models):
+    image = image_path
+    text_inputs = ["a dog", " cat", "a man", "a woman", "a child", "gruop of friends",
+                   "a scenic view", "a cityscape", "a forest", "a beach", "a mountain", "a group of people", "a car", "a bird",
+                   "a beautiful landscape", "a couple in love", "an animal", "amazing space",
+                   "incridible earth", "motion", "singularity", "anime", "emotions",
+                   "sorrow", "joy"]
+    inputs = models["clip process"](text=text_inputs, images=image, return_tensors="pt", padding=True)
+    outputs = models["clip"](**inputs)
+    logits_per_image = outputs.logits_per_image
+    probs = logits_per_image.softmax(dim=1)
+    best = text_inputs[probs.argmax()]
+    return best
+def show_detection_table(detection_text):
+    """
+    Convert detection text into a formatted Streamlit table
+    Args:
+        detection_text: String in format "[x1,y1,x2,y2] label score"
+    Returns:
+        Displays a Streamlit table with columns: Object Type, Box Coordinates, Score
+    """
+    # Parse each line into a list of dictionaries
+    detections = []
+    for line in detection_text.strip().split('\n'):
+        if not line:
+            continue
+        # Parse the components
+        bbox_part, label, score = line.rsplit(' ', 2)
+        bbox = bbox_part.strip('[]')
+        detections.append({
+            'Object Type': label,
+            'Box Coordinates': f"[{bbox}]",
+            'Score': float(score)
+        })
+    # Convert to DataFrame
+    df = pd.DataFrame(detections)
+    # Format the score column
+    df['Score'] = df['Score'].map('{:.2f}'.format)
+    # Display in Streamlit with some styling
+    st.dataframe(
+        df,
+        column_config={
+            "Object Type": "Object Type",
+            "Box Coordinates": "Box [x1,y1,x2,y2]",
+            "Score": st.column_config.NumberColumn(
+                "Confidence",
+                format="%.2f",
+            )
+        },
+        hide_index=True,
+        use_container_width=True
+    )
+def draw_bounding_boxes(image, detection_text):
+    """
+    Draw bounding boxes on image with different colors for people vs other objects
+    Args:
+        image: PIL Image object
+        detection_text: String in format "[x1,y1,x2,y2] label score"
+    Returns:
+        PIL Image with bounding boxes drawn
+    """
+    # Create a drawing context
+    draw = ImageDraw.Draw(image)
+    # Define colors
+    PERSON_COLOR = (255, 0, 0)    # Red for people
+    CAR_COLOR = (255, 165, 0)
+    OTHER_COLOR = (0, 255, 0)      # Green for other objects
+    TEXT_COLOR = (255, 255, 255)   # White text
+    # Parse each detection line
+    for line in detection_text.strip().split('\n'):
+        if not line:
+            continue
+        # Parse the detection info
+        bbox_part, label, score = line.rsplit(' ', 2)
+        bbox = list(map(int, bbox_part.strip('[]').split(',')))
+        confidence = float(score)
+        # Determine box color
+        #box_color = PERSON_COLOR if label == 'person' else OTHER_COLOR
+        if label == "person":
+            box_color= PERSON_COLOR
+        elif label == "car":
+            box_color= CAR_COLOR
+        else:
+            box_color= OTHER_COLOR
+        # Draw bounding box
+        draw.rectangle(
+            [(bbox[0], bbox[1]), (bbox[2], bbox[3])],
+            outline=box_color,
+            width=3
+        )
+        # Draw label with confidence
+        label_text = f"{label} {confidence:.2f}"
+        text_position = (bbox[0], bbox[1] - 15)
+        # Draw text background
+        text_bbox = draw.textbbox(text_position, label_text)
+        draw.rectangle(
+            [(text_bbox[0]-2, text_bbox[1]-2), (text_bbox[2]+2, text_bbox[3]+2)],
+            fill=box_color
+        )
+        # Draw text
+        draw.text(
+            text_position,
+            label_text,
+            fill=TEXT_COLOR
+        )
+    return image
+def detect_objects(image_path, models):
+    """
+    Detects objects in the provided image.
+    Args:
+        image_path (str): The path to the image file.
+    Returns:
+        str: A string with all the detected objects. Each object as '[x1, x2, y1, y2, class_name, confindence_score]'.
+    """
+    image = image_path
+    #processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
+    #model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
+    processor= models['processor']
+    model= models['detector']
+    inputs = processor(images=image, return_tensors="pt")
+    outputs = model(**inputs)
+    # convert outputs (bounding boxes and class logits) to COCO API
+    # let's only keep detections with score > 0.9
+    target_sizes = torch.tensor([image.size[::-1]])
+    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
+    detections = ""
+    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+        detections += '[{}, {}, {}, {}]'.format(int(box[0]), int(box[1]), int(box[2]), int(box[3]))
+        detections += ' {}'.format(model.config.id2label[int(label)])
+        detections += ' {}\n'.format(float(score))
+    return detections
+def detect_objects4(image, models):
+    processor= models['processor']
+    model= models['detector']
+    inputs = processor(images=image, return_tensors="pt")
+    outputs = model(**inputs)
+    target_sizes = torch.tensor([image.size[::-1]])
+    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
+    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+        box = [round(i, 2) for i in box.tolist()]
+        print(
+            f"Detected {model.config.id2label[label.item()]} with confidence "
+            f"{round(score.item(), 3)} at location {box}"
+        )
+def detect_objects3(image, models, threshold=0.7):
+    """Object detection with bounding boxes using DETR"""
+    if not isinstance(image, Image.Image):
+        image = Image.open(image)
+    processor = models['processor']
+    model = models['detector']
+    # Preprocess image
+    inputs = processor(images=image, return_tensors="pt")
+    # Run model
+    outputs = model(**inputs)
+    # Get original image size (height, width)
+    target_size = torch.tensor([image.size[::-1]])
+    # Post-process results
+    results = processor.post_process_object_detection(outputs, target_sizes=target_size, threshold=threshold)[0]
+    # Draw results
+    draw = ImageDraw.Draw(image)
+    formatted_results = []
+    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+        box = box.tolist()
+        label_text = model.config.id2label[label.item()]
+        score_val = score.item()
+        # Draw box
+        draw.rectangle(
+            [(box[0], box[1]), (box[2], box[3])],
+            outline="red",
+            width=3
+        )
+        draw.text(
+            (box[0], box[1] - 10),
+            f"{label_text} ({score_val:.2f})",
+            fill="red"
+        )
+        formatted_results.append({
+            "label": label_text,
+            "score": score_val,
+            "box": {
+                "xmin": box[0],
+                "ymin": box[1],
+                "xmax": box[2],
+                "ymax": box[3]
+            }
+        })
+    return image, formatted_results
+def detect_objects2(image, models):
+    """Function 1: Object detection with bounding boxes"""
+    results = models['detector'](image)
+    # Draw bounding boxes
+    draw = ImageDraw.Draw(image)
+    for result in results:
+        box = result['box']
+        draw.rectangle(
+            [(box['xmin'], box['ymin']), (box['xmax'], box['ymax'])],
+            outline="red",
+            width=3
+        )
+        draw.text(
+            (box['xmin'], box['ymin'] - 10),
+            f"{result['label']} ({result['score']:.2f})",
+            fill="red"
+        )
+    return image, results
+"""@st.cache_resource
+def load_light_models():
+    #Load lighter version of models with proper DETR handling
+    models = {}
+    # Load DETR components separately
+    with st.spinner("Loading object detection model..."):
+        models['detr_processor'] = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
+        models['detr_model'] = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+    # Use pipeline for captioning
+    with st.spinner("Loading captioning model..."):
+        models['captioner'] = pipeline(
+            "image-to-text",
+            model="Salesforce/blip-image-captioning-base"
+        )
+    return models"""
+"""@st.cache_resource
+def load_models():
+    return {
+        # Using tiny models for faster loading
+        'detector': pipeline("object-detection", model="hustvl/yolos-tiny")
+        #'captioner': pipeline("image-to-text", model="Salesforce/blip-image-captioning-base"),
+        #'story_teller': pipeline("text-generation", model="gpt2")
+    }"""

type2.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import streamlit as st
+from PIL import Image, ImageDraw
+from transformers import pipeline
+# Tiny models only
+@st.cache_resource
+def load_models():
+    return {
+        # Tiny object classifier (5MB)
+        #'detector': pipeline("image-classification", model="google/mobilenet_v1.0_224"),
+        # Micro captioning model (45MB)
+        #'captioner': pipeline("image-to-text", model="bipin/image-caption-generator"),
+        # Nano story generator (33MB)
+        'story_teller': pipeline("text-generation", model="sshleifer/tiny-gpt2")
+    }
+def analyze_image(image, models):
+    """Combined analysis to minimize model loads"""
+    results = {}
+    # Object classification (not detection)
+    with st.spinner("Identifying contents..."):
+        results['objects'] = models['detector'](image)
+    # Image captioning
+    with st.spinner("Generating caption..."):
+        results['caption'] = models['captioner'](image)[0]['generated_text']
+    return results
+def generate_story(caption, models):
+    """Generate short story"""
+    return models['story_teller'](
+        f"Write a 3-sentence story about: {caption}",
+        max_length=100,
+        do_sample=True,
+        temperature=0.7
+    )[0]['generated_text']
+def main():
+    st.title("📱 Nano AI Image Analyzer")
+    uploaded_file = st.file_uploader("Choose image...", type=["jpg", "png"])
+    if uploaded_file:
+        image = Image.open(uploaded_file).convert("RGB")
+        st.image(image, use_column_width=True)
+        models = load_models()
+        analysis = None
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            if st.button("🔍 Analyze", key="analyze"):
+                analysis = analyze_image(image, models)
+                st.session_state.analysis = analysis
+                st.subheader("Main Objects")
+                for obj in analysis['objects'][:3]:
+                    st.write(f"- {obj['label']} ({obj['score']:.0%})")
+        with col2:
+            if st.button("📝 Describe", key="describe"):
+                if 'analysis' not in st.session_state:
+                    st.warning("Analyze first!")
+                else:
+                    st.subheader("Caption")
+                    st.write(st.session_state.analysis['caption'])
+        with col3:
+            if st.button("📖 Mini Story", key="story"):
+                if 'analysis' not in st.session_state:
+                    st.warning("Analyze first!")
+                else:
+                    story = generate_story(
+                        st.session_state.analysis['caption'],
+                        models
+                    )
+                    st.subheader("Short Story")
+                    st.write(story)
+if __name__ == "__main__":
+    main()

type3.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import streamlit as st
+from PIL import Image
+from transformers import BlipProcessor, Blip2ForConditionalGeneration,BlipForQuestionAnswering
+import torch
+@st.cache_resource
+def load_blip_model():
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
+    model = BlipForQuestionAnswering.from_pretrained(
+        "Salesforce/blip-vqa-base",
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32
+    ).to(device)
+    return processor, model, device
+def answer_question(image, question, processor, model, device):
+    inputs = processor(image, question, return_tensors="pt").to(device)
+    outputs = model.generate(**inputs, max_new_tokens=100)
+    return processor.decode(outputs[0], skip_special_tokens=True)
+# Streamlit App
+def main():
+    st.title("Image Chat Assistant")
+    # Load model
+    processor, model, device = load_blip_model()
+    # Image upload
+    uploaded_file = st.file_uploader("Upload image", type=["jpg", "png", "jpeg"])
+    if uploaded_file:
+        image = Image.open(uploaded_file)
+        st.image(image, use_column_width=True)
+        col1, col2, col3 = st.columns([0.33,0.33,0.33])
+        with col1:
+            detect= st.button("🔍 Detect Objects", key="btn1")
+        with col2:
+            describe= st.button("📝 Describe Image", key="btn2")
+        with col3:
+            story= st.button("📖 Generate Story", key="btn3")
+        # Chat interface
+        if "messages" not in st.session_state:
+            st.session_state.messages = []
+        chat_container = st.container(height=400)
+        with chat_container:
+            for message in st.session_state.messages:
+                with st.chat_message(message["role"]):
+                    st.markdown(message["content"])
+            if prompt := st.chat_input("Ask about the image"):
+                st.session_state.messages.append({"role": "user", "content": prompt})
+                with st.chat_message("user"):
+                    st.markdown(prompt)
+                with st.chat_message("assistant"):
+                    with st.spinner("Thinking..."):
+                        response = answer_question(image, prompt, processor, model, device)
+                        #response= "response sample"
+                    st.markdown(response)
+                st.session_state.messages.append({"role": "assistant", "content": response})
+if __name__ == "__main__":
+    main()

type4.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import streamlit as st
+from transformers import DetrImageProcessor, DetrForObjectDetection
+from PIL import Image, ImageDraw
+import torch
+import re
+@st.cache_resource
+def load_detection_model():
+    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
+    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+    return processor, model
+def parse_detection_text(detection_text):
+    """Robust parsing of detection text with error handling"""
+    detections = []
+    pattern = r'\[([\d\s,]+)\]\s+([a-zA-Z\s]+)\s+([\d.]+)'
+    for line in detection_text.split('\n'):
+        if not line.strip():
+            continue
+        try:
+            match = re.match(pattern, line)
+            if match:
+                coords = [int(x.strip()) for x in match.group(1).split(',')]
+                label = match.group(2).strip()
+                score = float(match.group(3))
+                if len(coords) == 4:
+                    detections.append({
+                        'box': {'xmin': coords[0], 'ymin': coords[1],
+                                'xmax': coords[2], 'ymax': coords[3]},
+                        'label': label,
+                        'score': score
+                    })
+        except (ValueError, AttributeError) as e:
+            st.warning(f"Skipping malformed detection line: {line}")
+            continue
+    return detections
+def detect_objects(image, processor, model):
+    """Run DETR object detection with proper error handling"""
+    try:
+        inputs = processor(images=image, return_tensors="pt")
+        outputs = model(**inputs)
+        target_sizes = torch.tensor([image.size[::-1]])
+        results = processor.post_process_object_detection(
+            outputs,
+            target_sizes=target_sizes,
+            threshold=0.7
+        )[0]
+        detection_text = ""
+        for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+            detection_text += f"[{int(box[0])}, {int(box[1])}, {int(box[2])}, {int(box[3])}] " \
+                            f"{model.config.id2label[label.item()]} {score.item()}\n"
+        return detection_text, results
+    except Exception as e:
+        st.error(f"Detection failed: {str(e)}")
+        return "", None
+def draw_boxes(image, detections):
+    """Draw bounding boxes with different colors for different classes"""
+    draw = ImageDraw.Draw(image)
+    color_map = {
+        'person': 'red',
+        'cell phone': 'blue',
+        'default': 'green'
+    }
+    for det in detections:
+        box = det['box']
+        label = det['label']
+        color = color_map.get(label.lower(), color_map['default'])
+        draw.rectangle(
+            [(box['xmin'], box['ymin']), (box['xmax'], box['ymax'])],
+            outline=color,
+            width=3
+        )
+        draw.text(
+            (box['xmin'], box['ymin'] - 15),
+            f"{label} ({det['score']:.2f})",
+            fill=color
+        )
+    return image
+def main():
+    st.title("Object Detection with DETR")
+    processor, model = load_detection_model()
+    uploaded_file = st.file_uploader("Upload image", type=["jpg", "png", "jpeg"])
+    if uploaded_file:
+        image = Image.open(uploaded_file)
+        st.image(image, caption="Original Image", use_column_width=True)
+        if st.button("Detect Objects"):
+            with st.spinner("Detecting objects..."):
+                detection_text, results = detect_objects(image, processor, model)
+                if detection_text:
+                    st.subheader("Detection Results")
+                    # Show raw detections
+                    with st.expander("Raw Detection Output"):
+                        st.text(detection_text)
+                    # Show parsed results
+                    detections = parse_detection_text(detection_text)
+                    if detections:
+                        annotated_image = draw_boxes(image.copy(), detections)
+                        st.image(annotated_image, caption="Detected Objects", use_column_width=True)
+                        # Display in table
+                        st.subheader("Detected Objects")
+                        st.table([
+                            {
+                                "Object": d["label"],
+                                "Confidence": f"{d['score']:.2%}",
+                                "Position": f"({d['box']['xmin']}, {d['box']['ymin']}) to ({d['box']['xmax']}, {d['box']['ymax']})"
+                            }
+                            for d in detections
+                        ])
+                    else:
+                        st.warning("No valid detections found")
+if __name__ == "__main__":
+    main()