PSNbst commited on
Commit
8d8b4cc
·
verified ·
1 Parent(s): 740b171

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -18
app.py CHANGED
@@ -3,7 +3,7 @@ import torch
3
  from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration
4
  from PIL import Image
5
  import numpy as np
6
- import openai # GPT API 调用
7
 
8
  # 初始化模型
9
  clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
@@ -11,11 +11,8 @@ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
11
  blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
12
  blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
13
 
14
- # GPT API 配置
15
- openai.api_key = "your_openai_api_key"
16
-
17
  # 定义功能函数
18
- def analyze_images(image_a, image_b):
19
  # BLIP生成描述
20
  def generate_caption(image):
21
  inputs = blip_processor(image, return_tensors="pt")
@@ -44,17 +41,17 @@ def analyze_images(image_a, image_b):
44
  cosine_similarity = np.dot(features_a, features_b.T) / (np.linalg.norm(features_a) * np.linalg.norm(features_b))
45
  latent_diff = np.abs(features_a - features_b).tolist()
46
 
47
- # GPT API 调用生成文字描述
48
- gpt_prompt = (
49
- f"图片A的描述为:{caption_a}。图片B的描述为:{caption_b}。\n"
50
- "请对两张图片的内容和潜在特征区别进行详细分析,并输出一个简洁但富有条理的总结。"
51
- )
52
- gpt_response = openai.Completion.create(
53
- engine="text-davinci-003",
54
- prompt=gpt_prompt,
55
- max_tokens=150
56
  )
57
- textual_analysis = gpt_response['choices'][0]['text'].strip()
58
 
59
  # 返回结果
60
  return {
@@ -75,6 +72,8 @@ with gr.Blocks() as demo:
75
  with gr.Column():
76
  image_b = gr.Image(label="图片B", type="pil") # 使用 PIL 类型
77
 
 
 
78
  analyze_button = gr.Button("分析图片")
79
  result_caption_a = gr.Textbox(label="图片A描述", interactive=False)
80
  result_caption_b = gr.Textbox(label="图片B描述", interactive=False)
@@ -83,13 +82,13 @@ with gr.Blocks() as demo:
83
  result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5)
84
 
85
  # 分析逻辑
86
- def process_analysis(img_a, img_b):
87
- results = analyze_images(img_a, img_b)
88
  return results["caption_a"], results["caption_b"], results["similarity"], results["latent_diff"], results["text_analysis"]
89
 
90
  analyze_button.click(
91
  fn=process_analysis,
92
- inputs=[image_a, image_b],
93
  outputs=[result_caption_a, result_caption_b, result_similarity, result_latent_diff, result_text_analysis]
94
  )
95
 
 
3
  from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration
4
  from PIL import Image
5
  import numpy as np
6
+ from openai import OpenAI
7
 
8
  # 初始化模型
9
  clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 
11
  blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
12
  blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
13
 
 
 
 
14
  # 定义功能函数
15
+ def analyze_images(image_a, image_b, api_key):
16
  # BLIP生成描述
17
  def generate_caption(image):
18
  inputs = blip_processor(image, return_tensors="pt")
 
41
  cosine_similarity = np.dot(features_a, features_b.T) / (np.linalg.norm(features_a) * np.linalg.norm(features_b))
42
  latent_diff = np.abs(features_a - features_b).tolist()
43
 
44
+ # 调用 DeepSeek API 生成详细分析
45
+ client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
46
+ gpt_response = client.chat.completions.create(
47
+ model="deepseek-chat",
48
+ messages=[
49
+ {"role": "system", "content": "You are a helpful assistant."},
50
+ {"role": "user", "content": f"图片A的描述为:{caption_a}。图片B的描述为:{caption_b}。\n请对两张图片的内容和潜在特征区别进行详细分析,并输出一个简洁但富有条理的总结。"}
51
+ ],
52
+ stream=False
53
  )
54
+ textual_analysis = gpt_response.choices[0].message.content.strip()
55
 
56
  # 返回结果
57
  return {
 
72
  with gr.Column():
73
  image_b = gr.Image(label="图片B", type="pil") # 使用 PIL 类型
74
 
75
+ api_key_input = gr.Textbox(label="API Key", placeholder="输入您的 DeepSeek API Key", type="password")
76
+
77
  analyze_button = gr.Button("分析图片")
78
  result_caption_a = gr.Textbox(label="图片A描述", interactive=False)
79
  result_caption_b = gr.Textbox(label="图片B描述", interactive=False)
 
82
  result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5)
83
 
84
  # 分析逻辑
85
+ def process_analysis(img_a, img_b, api_key):
86
+ results = analyze_images(img_a, img_b, api_key)
87
  return results["caption_a"], results["caption_b"], results["similarity"], results["latent_diff"], results["text_analysis"]
88
 
89
  analyze_button.click(
90
  fn=process_analysis,
91
+ inputs=[image_a, image_b, api_key_input],
92
  outputs=[result_caption_a, result_caption_b, result_similarity, result_latent_diff, result_text_analysis]
93
  )
94