Spaces:

FengHou97
/

Cross-Domain-Recognition

Sleeping

App Files Files Community

FengHou97 commited on May 8

Commit

8df8184

verified ·

1 Parent(s): bbdbb0c

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -3

app.py CHANGED Viewed

@@ -58,9 +58,9 @@ def shot(image, labels_text, model_name, hypothesis_template_prefix, hypothesis_
     if not domains_text == '':
         domains = [domain.strip(" ") for domain in domains_text.strip(" ").split(",")]
     else:
-        img = Image.open(image)
         input_text = "Please describe the image from six dimensions, including weather (clear, sandstorm, foggy, rainy, snowy), angle (front, left, top), time (daytime, night), occlusion (unoccluded, lightly-occluded, partially-occluded, moderately-occluded, heavily-occluded), season (spring-summer, autumn, winter). Each dimension should be described in no more than 4 words and should match the image content. Please try to output from the options in the previous brackets. If there is no suitable result, output N/A."# Please also output a probability of your inference."# If there is no information in a certain dimension, you can directly output no information.
-        domains = gemini_response_vision(input_texts=input_text, image=img)
         print(domains)
     hypothesis_template = hypothesis_template_prefix + ' ' + hypothesis_template_suffix.format(*domains)
@@ -76,7 +76,8 @@ def shot(image, labels_text, model_name, hypothesis_template_prefix, hypothesis_
 iface = gr.Interface(shot,
             inputs,
             "label",
-            examples=[["festival.jpg", "lantern, firecracker, couplet", "ViT/B-16", "a photo of a {}", "in {} {} {} from {} with {}.", "clear, autumn, day, side, light occlusion"],
                      ["car.png", "car, bike, truck", "ViT/B-16", "a photo of a {}", "in {} {} {} from {} with {}.", ""]],
             description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
             Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>

     if not domains_text == '':
         domains = [domain.strip(" ") for domain in domains_text.strip(" ").split(",")]
     else:
+        #img = Image.open(image)
         input_text = "Please describe the image from six dimensions, including weather (clear, sandstorm, foggy, rainy, snowy), angle (front, left, top), time (daytime, night), occlusion (unoccluded, lightly-occluded, partially-occluded, moderately-occluded, heavily-occluded), season (spring-summer, autumn, winter). Each dimension should be described in no more than 4 words and should match the image content. Please try to output from the options in the previous brackets. If there is no suitable result, output N/A."# Please also output a probability of your inference."# If there is no information in a certain dimension, you can directly output no information.
+        domains = gemini_response_vision(input_texts=input_text, image=image)
         print(domains)
     hypothesis_template = hypothesis_template_prefix + ' ' + hypothesis_template_suffix.format(*domains)
 iface = gr.Interface(shot,
             inputs,
             "label",
+            examples=[
+                    #["festival.jpg", "lantern, firecracker, couplet", "ViT/B-16", "a photo of a {}", "in {} {} {} from {} with {}.", "clear, autumn, day, side, light occlusion"],
                      ["car.png", "car, bike, truck", "ViT/B-16", "a photo of a {}", "in {} {} {} from {} with {}.", ""]],
             description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
             Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>