Spaces:

FengHou97
/

Cross-Domain-Recognition

Sleeping

FengHou97 commited on May 8

Commit

a6d4446

verified ·

1 Parent(s): 0dd7d52

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,25 @@ import gradio as gr
 from transformers import pipeline
 import numpy as np
 from PIL import Image
 pipes = {
     "ViT/B-16": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"),
@@ -35,7 +53,15 @@ images="festival.jpg"
 def shot(image, labels_text, model_name, hypothesis_template_prefix, hypothesis_template_suffix, domains_text):
     labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
-    domains = [domain.strip(" ") for domain in domains_text.strip(" ").split(",")]
     hypothesis_template = hypothesis_template_prefix + ' ' + hypothesis_template_suffix.format(*domains)
     print(hypothesis_template)

 from transformers import pipeline
 import numpy as np
 from PIL import Image
+from dotenv import load_dotenv
+import google.generativeai as genai
+load_dotenv()
+GOOGLE_API_KEY = os.getenv("GOOGLE_API")
+genai.configure(api_key=GOOGLE_API_KEY)
+model_vision = genai.GenerativeModel('gemini-pro-vision')
+def gemini_response_vision(input_texts, image):
+    try:
+        if input_texts != "":
+            response2 = model_vision.generate_content([input_texts, image])
+        else:
+            response2 = model_vision.generate_content(image)
+        return response2.text
+    except Exception as e:
+        raise e
 pipes = {
     "ViT/B-16": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"),
 def shot(image, labels_text, model_name, hypothesis_template_prefix, hypothesis_template_suffix, domains_text):
     labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
+    if not domains_text == '':
+        domains = [domain.strip(" ") for domain in domains_text.strip(" ").split(",")]
+    else:
+        img = Image.open(image)
+        input_text = "Please describe the image"
+        domains = gemini_response_vision(input_texts=input_text, image=img)
+        print(domains)
     hypothesis_template = hypothesis_template_prefix + ' ' + hypothesis_template_suffix.format(*domains)
     print(hypothesis_template)