Spaces:

PSNbst
/

ImagesComparison_PAseer

Sleeping

App Files Files Community

PSNbst commited on Jan 20

Commit

9c08ff8

verified ·

1 Parent(s): 85ae7eb

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -103

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import gradio as gr
 import os
 from PIL import Image, ImageChops, ImageFilter
-from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration
 import torch
 import matplotlib.pyplot as plt
 import numpy as np
@@ -12,57 +14,70 @@ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-# 定义CLIP特征的名称（假设的特征名称，您可以根据需要调整）
-CLIP_FEATURE_NAMES = [f"Dimension {i}" for i in range(512)]
-# 图像处理函数
-def compute_difference_images(img_a, img_b):
-    def extract_sketch(image):
-        grayscale = image.convert("L")
-        inverted = ImageChops.invert(grayscale)
-        sketch = ImageChops.screen(grayscale, inverted)
-        return sketch
-    def compute_normal_map(image):
-        edges = image.filter(ImageFilter.FIND_EDGES)
-        return edges
-    diff_overlay = ImageChops.difference(img_a, img_b)
-    return {
-        "original_a": img_a,
-        "original_b": img_b,
-        "sketch_a": extract_sketch(img_a),
-        "sketch_b": extract_sketch(img_b),
-        "normal_a": compute_normal_map(img_a),
-        "normal_b": compute_normal_map(img_b),
-        "diff_overlay": diff_overlay
-    }
-# 保存图像到文件
-def save_images(images, prefix):
-    paths = []
-    for key, img in images.items():
-        path = f"{prefix}_{key}.png"
-        img.save(path)
-        paths.append((path, key.replace("_", " ").capitalize()))
-    return paths
-# BLIP生成更详尽描述
-def generate_detailed_caption(image):
-    inputs = blip_processor(image, return_tensors="pt")
-    caption = blip_model.generate(**inputs, max_length=128, num_beams=5, no_repeat_ngram_size=2)
-    return blip_processor.decode(caption[0], skip_special_tokens=True)
 # 特征差异可视化
-def plot_feature_differences(latent_diff, prefix):
     diff_magnitude = [abs(x) for x in latent_diff[0]]
     indices = range(len(diff_magnitude))
-    top_indices = np.argsort(diff_magnitude)[-10:][::-1]  # Top 10 differences
     plt.figure(figsize=(8, 4))
     plt.bar(indices, diff_magnitude, alpha=0.7)
-    plt.xlabel("Feature Index (Latent Dimension)")
     plt.ylabel("Magnitude of Difference")
     plt.title("Feature Differences (Bar Chart)")
     bar_chart_path = f"{prefix}_bar_chart.png"
@@ -72,7 +87,7 @@ def plot_feature_differences(latent_diff, prefix):
     plt.figure(figsize=(6, 6))
     plt.pie(
         [diff_magnitude[i] for i in top_indices],
-        labels=[CLIP_FEATURE_NAMES[i] for i in top_indices],
         autopct="%1.1f%%",
         startangle=140
     )
@@ -83,7 +98,7 @@ def plot_feature_differences(latent_diff, prefix):
     return bar_chart_path, pie_chart_path
-# 生成详细分析
 def generate_text_analysis(api_key, api_type, caption_a, caption_b):
     if api_type == "DeepSeek":
         client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
@@ -94,18 +109,21 @@ def generate_text_analysis(api_key, api_type, caption_a, caption_b):
         model="gpt-4" if api_type == "GPT" else "deepseek-chat",
         messages=[
             {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": f"图片A的描述为：{caption_a}。图片B的描述为：{caption_b}。\n请对两张图片的内容和潜在特征区别进行详细分析，并输出一个简洁但富有条理的总结。"}
         ]
     )
     return response.choices[0].message.content.strip()
-# 分析函数
 def analyze_images(img_a, img_b, api_key, api_type, prefix):
-    images_diff = compute_difference_images(img_a, img_b)
-    saved_images = save_images(images_diff, prefix)
-    caption_a = generate_detailed_caption(img_a)
-    caption_b = generate_detailed_caption(img_b)
     inputs = clip_processor(images=img_a, return_tensors="pt")
     features_a = clip_model.get_image_features(**inputs).detach().numpy()
@@ -115,66 +133,32 @@ def analyze_images(img_a, img_b, api_key, api_type, prefix):
     latent_diff = np.abs(features_a - features_b).tolist()
-    bar_chart, pie_chart = plot_feature_differences(latent_diff, prefix)
-    text_analysis = generate_text_analysis(api_key, api_type, caption_a, caption_b)
     return {
-        "saved_images": saved_images,
-        "caption_a": caption_a,
-        "caption_b": caption_b,
-        "text_analysis": text_analysis,
         "bar_chart": bar_chart,
-        "pie_chart": pie_chart
     }
-# 批量分析
-def batch_analyze(images_a, images_b, api_key, api_type):
-    num_pairs = min(len(images_a), len(images_b))
-    results = []
-    for i in range(num_pairs):
-        prefix = f"comparison_{i+1}"
-        result = analyze_images(images_a[i], images_b[i], api_key, api_type, prefix)
-        results.append({
-            "pair": (f"Image A-{i+1}", f"Image B-{i+1}"),
-            **result
-        })
-    return results
-# Gradio界面
 with gr.Blocks() as demo:
-    gr.Markdown("# 批量图像对比分析工具")
-    api_key_input = gr.Textbox(label="API Key", placeholder="输入您的 API Key", type="password")
     api_type_input = gr.Radio(label="API 类型", choices=["GPT", "DeepSeek"], value="GPT")
-    images_a_input = gr.File(label="上传文件夹A图片", file_types=[".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".gif", ".webp"], file_count="multiple")
-    images_b_input = gr.File(label="上传文件夹B图片", file_types=[".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".gif", ".webp"], file_count="multiple")
-    analyze_button = gr.Button("开始批量分析")
-    with gr.Row():
-        result_gallery = gr.Gallery(label="差异图像")
-    result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5)
-    def process_batch_analysis(images_a, images_b, api_key, api_type):
         images_a = [Image.open(img).convert("RGB") for img in images_a]
         images_b = [Image.open(img).convert("RGB") for img in images_b]
-        results = batch_analyze(images_a, images_b, api_key, api_type)
-        all_images = []
-        all_texts = []
-        for result in results:
-            all_images.extend(result["saved_images"])
-            all_images.append((result["bar_chart"], "Bar Chart"))
-            all_images.append((result["pie_chart"], "Pie Chart"))
-            all_texts.append(f"{result['pair'][0]} vs {result['pair'][1]}:\n{result['text_analysis']}")
-        return all_images, "\n\n".join(all_texts)
-    analyze_button.click(
-        fn=process_batch_analysis,
-        inputs=[images_a_input, images_b_input, api_key_input, api_type_input],
-        outputs=[result_gallery, result_text_analysis]
-    )
 demo.launch()

 import gradio as gr
 import os
 from PIL import Image, ImageChops, ImageFilter
+from ultralytics import YOLO
+from segment_anything import SamPredictor, sam_model_registry
+from transformers import BlipProcessor, BlipForConditionalGeneration, CLIPProcessor, CLIPModel, AutoProcessor, AutoModelForImageClassification
 import torch
 import matplotlib.pyplot as plt
 import numpy as np
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+sam_checkpoint = "sam_vit_h_4b8939.pth"  # 替换为实际权重路径
+sam = sam_model_registry["vit_h"](checkpoint=sam_checkpoint)
+sam_predictor = SamPredictor(sam)
+yolo_model = YOLO("yolov8x.pt")  # 替换为实际 YOLO 模型路径
+wd_processor = AutoProcessor.from_pretrained("SmilingWolf/wd-v1-4-vit-large-tagger")
+wd_model = AutoModelForImageClassification.from_pretrained("SmilingWolf/wd-v1-4-vit-large-tagger")
+# 自动识别图片类型
+def classify_image_type(image):
+    inputs = wd_processor(images=image, return_tensors="pt")
+    outputs = wd_model(**inputs)
+    scores = torch.softmax(outputs.logits, dim=1)[0]
+    anime_score = scores[wd_processor.label2id["anime"]].item()
+    return "anime" if anime_score > 0.5 else "real"
+# 分割图像对象
+def segment_objects(image, boxes):
+    image_np = np.array(image)
+    sam_predictor.set_image(image_np)
+    masks = []
+    for box in boxes:
+        mask, _, _ = sam_predictor.predict(
+            point_coords=None, point_labels=None, box=box, multimask_output=False
+        )
+        masks.append(mask)
+    return masks
+# 检测对象
+def detect_objects(image, image_type):
+    if image_type == "real":
+        results = yolo_model.predict(np.array(image), conf=0.25)
+        objects = [{"label": r["class"], "box": r["bbox"], "confidence": r["confidence"]} for r in results]
+    else:
+        inputs = wd_processor(images=image, return_tensors="pt")
+        outputs = wd_model(**inputs)
+        scores = torch.softmax(outputs.logits, dim=1)[0]
+        top_k = torch.topk(scores, k=5)
+        objects = [{"label": wd_processor.decode(top_k.indices[i].item()), "confidence": top_k.values[i].item()} for i in range(5)]
+    return objects
+# 生成语义描述
+def generate_object_descriptions(image, objects):
+    descriptions = []
+    for obj in objects:
+        box = obj.get("box", None)
+        if box:
+            cropped = image.crop(box)
+        else:
+            cropped = image
+        inputs = blip_processor(cropped, return_tensors="pt")
+        caption = blip_model.generate(**inputs, max_length=128, num_beams=5, no_repeat_ngram_size=2)
+        description = blip_processor.decode(caption[0], skip_special_tokens=True)
+        descriptions.append({"label": obj["label"], "description": description})
+    return descriptions
 # 特征差异可视化
+def plot_feature_differences(latent_diff, descriptions, prefix):
     diff_magnitude = [abs(x) for x in latent_diff[0]]
     indices = range(len(diff_magnitude))
+    top_indices = np.argsort(diff_magnitude)[-10:][::-1]
     plt.figure(figsize=(8, 4))
     plt.bar(indices, diff_magnitude, alpha=0.7)
+    plt.xlabel("Feature Index")
     plt.ylabel("Magnitude of Difference")
     plt.title("Feature Differences (Bar Chart)")
     bar_chart_path = f"{prefix}_bar_chart.png"
     plt.figure(figsize=(6, 6))
     plt.pie(
         [diff_magnitude[i] for i in top_indices],
+        labels=[descriptions[i] for i in top_indices],
         autopct="%1.1f%%",
         startangle=140
     )
     return bar_chart_path, pie_chart_path
+# 生成详细分析文本
 def generate_text_analysis(api_key, api_type, caption_a, caption_b):
     if api_type == "DeepSeek":
         client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
         model="gpt-4" if api_type == "GPT" else "deepseek-chat",
         messages=[
             {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": f"图片A的描述为：{caption_a}。\n图片B的描述为：{caption_b}。\n请对两张图片进行详细对比分析。"}
         ]
     )
     return response.choices[0].message.content.strip()
+# 分析单对图片
 def analyze_images(img_a, img_b, api_key, api_type, prefix):
+    type_a = classify_image_type(img_a)
+    type_b = classify_image_type(img_b)
+    objects_a = detect_objects(img_a, type_a)
+    objects_b = detect_objects(img_b, type_b)
+    descriptions_a = generate_object_descriptions(img_a, objects_a)
+    descriptions_b = generate_object_descriptions(img_b, objects_b)
     inputs = clip_processor(images=img_a, return_tensors="pt")
     features_a = clip_model.get_image_features(**inputs).detach().numpy()
     latent_diff = np.abs(features_a - features_b).tolist()
+    bar_chart, pie_chart = plot_feature_differences(latent_diff, [d['label'] for d in descriptions_a], prefix)
+    text_analysis = generate_text_analysis(api_key, api_type, descriptions_a, descriptions_b)
     return {
         "bar_chart": bar_chart,
+        "pie_chart": pie_chart,
+        "text_analysis": text_analysis
     }
+# Gradio 界面
 with gr.Blocks() as demo:
+    gr.Markdown("# 综合图像对比分析工具")
+    api_key_input = gr.Textbox(label="API Key", placeholder="输入 API Key", type="password")
     api_type_input = gr.Radio(label="API 类型", choices=["GPT", "DeepSeek"], value="GPT")
+    images_a_input = gr.File(label="上传文件夹A图片", file_types=[".png", ".jpg"], file_count="multiple")
+    images_b_input = gr.File(label="上传文件夹B图片", file_types=[".png", ".jpg"], file_count="multiple")
+    analyze_button = gr.Button("开始分析")
+    result_gallery = gr.Gallery(label="差异可视化")
+    result_text = gr.Textbox(label="分析结果", lines=5)
+    def process_batch(images_a, images_b, api_key, api_type):
         images_a = [Image.open(img).convert("RGB") for img in images_a]
         images_b = [Image.open(img).convert("RGB") for img in images_b]
+        results = [analyze_images(img_a, img_b, api_key, api_type, f"comparison_{i+1}") for i, (img_a, img_b) in enumerate(zip(images_a, images_b))]
+        return results
+    analyze_button.click(process_batch, inputs=[images_a_input, images_b_input, api_key_input, api_type_input], outputs=[result_gallery, result_text])
 demo.launch()