Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -67,28 +67,8 @@ def shot(image, labels_text, model_name, hypothesis_template_prefix, hypothesis_
|
|
67 |
if not domains_text == '':
|
68 |
domains = [domain.strip(" ") for domain in domains_text.strip(" ").split(",")]
|
69 |
else:
|
70 |
-
#img = Image.open(image)
|
71 |
-
#input_text = "Please describe the image from six dimensions, including weather (clear, sandstorm, foggy, rainy, snowy), angle (front, left, top), time (day, night), occlusion (no occlusion, light occlusion, partial occlusion, moderate occlusion, heavy occlusion), season (spring-summer, autumn, winter). Each dimension should be described in no more than 4 words and should match the image content. Please try to output from the options in the previous brackets. If there is no suitable result, output N/A."# Please also output a probability of your inference."# If there is no information in a certain dimension, you can directly output no information.
|
72 |
input_text = "You are an expert for domain knowledge analysis. Please describe the image from six domain shifts, including Weather (clear, sandstorm, foggy, rainy, snowy), Season (spring-summer, autumn, winter), Time (day, night), Angle (front, side, top) and Occlusion (no occlusion, light occlusion, partial occlusion, moderate occlusion, heavy occlusion). You are supposed to recognize each domain from the above domain shifts based on the image. Finally, you only need to output a list of domains like [clear, autumn, night, front, light occlusion]"
|
73 |
ans = gemini_response_vision(input_texts=input_text, image=image)
|
74 |
-
#IMAGE_PATH = './reasoning_xy.jpg'
|
75 |
-
# base64_image = encode_image('car.png')
|
76 |
-
# prompt = "Please describe the image from six dimensions, including weather (clear, sandstorm, foggy, rainy, snowy), angle (front, left, top), time (daytime, night), occlusion (no occlusion, light occlusion, partial occlusion, moderate occlusion, heavy occlusion), season (spring-summer, autumn, winter). Each dimension should be described in no more than 4 words and should match the image content. Please try to output from the options in the previous brackets. If there is no suitable result, output N/A."# Please also output a probability of your inference."# If there is no information in a certain dimension, you can directly output no information."
|
77 |
-
|
78 |
-
# response = client.chat.completions.create(
|
79 |
-
# model="gpt-4o",
|
80 |
-
# messages=[
|
81 |
-
# # {"role": "system", "content": "You are a helpful assistant that responds in Markdown. Help me with my math homework!"},
|
82 |
-
# {"role": "user", "content": [
|
83 |
-
# {"type": "text", "text": prompt},
|
84 |
-
# {"type": "image_url", "image_url": {
|
85 |
-
# "url": f"data:image/png;base64,{base64_image}"}
|
86 |
-
# }
|
87 |
-
# ]}
|
88 |
-
# ],
|
89 |
-
# temperature=0.0,
|
90 |
-
# )
|
91 |
-
# domains = response.choices[0].message.content
|
92 |
print(ans)
|
93 |
domains = [domain.strip(" ") for domain in ans.strip("[").strip("]").split(",")]
|
94 |
|
@@ -109,7 +89,7 @@ iface = gr.Interface(shot,
|
|
109 |
examples=[
|
110 |
#["festival.jpg", "lantern, firecracker, couplet", "ViT/B-16", "a photo of a {}", "in {} {} {} from {} with {}.", "clear, autumn, day, side, partial occlusion"],
|
111 |
["car.png", "car, bike, truck", "ViT/B-16", "a photo of a {}", "in {} {} {} from {} with {}.", ""]],
|
112 |
-
description="""<p>
|
113 |
Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>
|
114 |
To begin with the demo, provide a picture (either upload manually, or select from the given examples) and add class labels one by one. Optionally, you can also add template as a prefix to the class labels. <br>""",
|
115 |
title="Cross-Domain Recognition")
|
|
|
67 |
if not domains_text == '':
|
68 |
domains = [domain.strip(" ") for domain in domains_text.strip(" ").split(",")]
|
69 |
else:
|
|
|
|
|
70 |
input_text = "You are an expert for domain knowledge analysis. Please describe the image from six domain shifts, including Weather (clear, sandstorm, foggy, rainy, snowy), Season (spring-summer, autumn, winter), Time (day, night), Angle (front, side, top) and Occlusion (no occlusion, light occlusion, partial occlusion, moderate occlusion, heavy occlusion). You are supposed to recognize each domain from the above domain shifts based on the image. Finally, you only need to output a list of domains like [clear, autumn, night, front, light occlusion]"
|
71 |
ans = gemini_response_vision(input_texts=input_text, image=image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
print(ans)
|
73 |
domains = [domain.strip(" ") for domain in ans.strip("[").strip("]").split(",")]
|
74 |
|
|
|
89 |
examples=[
|
90 |
#["festival.jpg", "lantern, firecracker, couplet", "ViT/B-16", "a photo of a {}", "in {} {} {} from {} with {}.", "clear, autumn, day, side, partial occlusion"],
|
91 |
["car.png", "car, bike, truck", "ViT/B-16", "a photo of a {}", "in {} {} {} from {} with {}.", ""]],
|
92 |
+
description="""<p> <br><br>
|
93 |
Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>
|
94 |
To begin with the demo, provide a picture (either upload manually, or select from the given examples) and add class labels one by one. Optionally, you can also add template as a prefix to the class labels. <br>""",
|
95 |
title="Cross-Domain Recognition")
|