Spaces:

CelagenexResearch
/

Video

Sleeping

App Files Files Community

CelagenexResearch commited on Jul 27

Commit

8ca6ff4

verified ·

1 Parent(s): e841214

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -71

app.py CHANGED Viewed

@@ -1,36 +1,34 @@
 import gradio as gr
 from PIL import Image
 import torch
 import numpy as np
 import cv2
-from transformers import CLIPProcessor, CLIPModel
-# Hypot MedGemma imports (ensure you have access and HF token)
-from medgem import MedGemmaProcessor, MedGemmaForImageClassification
 # Device setup
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Load CLIP model for breed, age, and basic health aspects
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16").to(device)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
-# Load MedGemma for advanced medical insights
-medgemma_processor = MedGemmaProcessor.from_pretrained("google/medgemma-v1").to(device)
-medgemma_model = MedGemmaForImageClassification.from_pretrained("google/medgemma-v1").to(device)
-# Stanford Dogs breeds list & lifespan dict
-STANFORD_BREEDS = [
-    "afghan hound", "african hunting dog", "airedale", "american staffordshire terrier",
-    # ... (full list from earlier)
-    "wire-haired fox terrier", "yorkshire terrier"
-]
 BREED_LIFESPAN = {
-    "afghan hound": 11.1, "african hunting dog": 10.5, "airedale": 11.5,
-    # ... (full dict from earlier)
-    "yorkshire terrier": 13.3
 }
-# Healthspan questionnaire definitions
 QUESTIONNAIRE = [
     {"domain": "Mobility", "questions": [
         "Does your dog have difficulty rising from lying down?",
@@ -54,17 +52,18 @@ QUESTIONNAIRE = [
     ]}
 ]
-# Unified scoring map for questionnaire (0–5 scale)
 SCALE = ["0", "1", "2", "3", "4", "5"]
 def predict_biological_age(image: Image.Image, breed: str) -> int:
     avg = BREED_LIFESPAN.get(breed.lower(), 12)
-    prompts = [f"a photo of a {age}-year-old {breed}" for age in range(1, int(avg * 2) + 1)]
     inputs = clip_processor(text=prompts, images=image, return_tensors="pt", padding=True).to(device)
     with torch.no_grad():
         logits = clip_model(**inputs).logits_per_image.softmax(dim=1)[0].cpu().numpy()
     return int(np.argmax(logits) + 1)
 def analyze_medical_image(image: Image.Image):
     inputs = medgemma_processor(images=image, return_tensors="pt").to(device)
     with torch.no_grad():
@@ -74,13 +73,11 @@ def analyze_medical_image(image: Image.Image):
     conf = float(np.max(probs))
     return label, conf
 def classify_breed_and_health(image: Image.Image, user_breed=None):
-    # Image features
     inputs = clip_processor(images=image, return_tensors="pt").to(device)
     with torch.no_grad():
         img_feats = clip_model.get_image_features(**inputs)
-    # Breed classification
     texts = [f"a photo of a {b}" for b in STANFORD_BREEDS]
     t_in = clip_processor(text=texts, return_tensors="pt", padding=True).to(device)
     with torch.no_grad():
@@ -90,11 +87,10 @@ def classify_breed_and_health(image: Image.Image, user_breed=None):
     breed = user_breed or STANFORD_BREEDS[idx]
     breed_conf = float(sims[idx])
-    # Basic health aspects via CLIP
     aspects = {
         "Coat": ("shiny healthy coat", "dull patchy fur"),
         "Eyes": ("bright clear eyes", "cloudy milky eyes"),
-        "Body": ("ideal muscle tone", "visible ribs or hip bones"),
         "Teeth": ("clean white teeth", "yellow stained teeth")
     }
     health = {}
@@ -107,87 +103,81 @@ def classify_breed_and_health(image: Image.Image, user_breed=None):
         health[name] = {"assessment": choice, "confidence": float(max(sim))}
     return breed, breed_conf, health
 def analyze_video_health(video_path: str):
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS) or 24
-    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    duration = total_frames / fps
-    # sample 10 frames evenly
-    indices = np.linspace(0, total_frames - 1, num=10, dtype=int)
-    gait_scores = []
     for i in indices:
         cap.set(cv2.CAP_PROP_POS_FRAMES, i)
         ret, frame = cap.read()
-        if not ret: break
         img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         _, conf = analyze_medical_image(img)
-        gait_scores.append(conf)
     cap.release()
-    avg_conf = float(np.mean(gait_scores)) if gait_scores else 0.0
-    return {"duration_sec": round(duration, 1), "avg_gait_confidence": avg_conf}
 def compute_questionnaire_score(answers: list):
-    # answers in order of QUESTIONNAIRE domains × questions
     scores = {}
     idx = 0
-    for section in QUESTIONNAIRE:
-        vals = list(map(int, answers[idx: idx + len(section["questions"])]))
-        idx += len(section["questions"])
-        scores[section["domain"]] = round(sum(vals) / len(vals), 2)
     return scores
-# Build Gradio interface
 with gr.Blocks(title="🐶 Dog Health & Age Analyzer") as demo:
-    gr.Markdown("## Upload an Image or Video, or Record a Short Clip")
     with gr.Tab("Image Analysis"):
         img_in = gr.Image(type="pil", label="Upload Dog Image")
         breed_in = gr.Textbox(label="(Optional) Override Breed")
         age_in = gr.Number(label="Chronological Age (years)", precision=1)
-        btn_img = gr.Button("Analyze Image")
-        out_md = gr.Markdown()
-        def run_image(img, breed_override, chrono_age):
-            breed, b_conf, health = classify_breed_and_health(img, breed_override)
             med_label, med_conf = analyze_medical_image(img)
             bio_age = predict_biological_age(img, breed)
-            pace = round(bio_age / chrono_age, 2) if chrono_age else None
-            report = f"**Breed:** {breed} ({b_conf:.1%})  \n"
-            report += f"**MedGemma Finding:** {med_label} ({med_conf:.1%})  \n\n"
-            report += f"**Biological Age:** {bio_age} yrs  \n"
-            report += f"**Chronological Age:** {chrono_age or 'N/A'} yrs  \n"
             if pace:
-                report += f"**Pace of Aging:** {pace}×  \n\n"
-            report += "### Health Aspects\n"
             for k, v in health.items():
-                report += f"- **{k}:** {v['assessment']} ({v['confidence']:.1%})\n"
-            return report
-        btn_img.click(run_image, inputs=[img_in, breed_in, age_in], outputs=out_md)
     with gr.Tab("Video Analysis"):
-        video_in = gr.Video(label="Upload or Record Video (10–30s)")
-        btn_vid = gr.Button("Analyze Video")
         vid_out = gr.JSON()
-        btn_vid.click(lambda vid: analyze_video_health(vid), inputs=video_in, outputs=vid_out)
-    with gr.Tab("Healthspan Questionnaire"):
         widgets = []
-        for section in QUESTIONNAIRE:
-            gr.Markdown(f"### {section['domain']}")
-            for q in section["questions"]:
                 w = gr.Radio(choices=SCALE, label=q)
                 widgets.append(w)
-        btn_q = gr.Button("Compute Score")
-        q_out = gr.JSON()
-        btn_q.click(
-            fn=compute_questionnaire_score,
-            inputs=widgets,
-            outputs=q_out
-        )
     with gr.Tab("About"):
         gr.Markdown("""

+# app.py
 import gradio as gr
 from PIL import Image
 import torch
 import numpy as np
 import cv2
+from transformers import CLIPProcessor, CLIPModel AutoProcessor, AutoModelForImageClassification
 # Device setup
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load CLIP for breed, age, basic health
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16").to(device)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
+# Load MedGemma via Hugging Face Transformers
+# (no special pip package needed; uses AutoModel APIs)
+medgemma_processor = AutoProcessor.from_pretrained("google/medgemma-v1")
+medgemma_model = AutoModelForImageClassification.from_pretrained("google/medgemma-v1").to(device)
+# Stanford Dogs breeds & lifespans (abbreviated here; include full lists)
+STANFORD_BREEDS = ["afghan hound", "beagle", "golden retriever", "yorkshire terrier"]  # etc.
 BREED_LIFESPAN = {
+    "afghan hound": 11.1,
+    "beagle": 12.5,
+    "golden retriever": 13.2,
+    "yorkshire terrier": 13.3,
+    # etc.
 }
 QUESTIONNAIRE = [
     {"domain": "Mobility", "questions": [
         "Does your dog have difficulty rising from lying down?",
     ]}
 ]
 SCALE = ["0", "1", "2", "3", "4", "5"]
 def predict_biological_age(image: Image.Image, breed: str) -> int:
     avg = BREED_LIFESPAN.get(breed.lower(), 12)
+    prompts = [f"a {age}-year-old {breed}" for age in range(1, int(avg * 2) + 1)]
     inputs = clip_processor(text=prompts, images=image, return_tensors="pt", padding=True).to(device)
     with torch.no_grad():
         logits = clip_model(**inputs).logits_per_image.softmax(dim=1)[0].cpu().numpy()
     return int(np.argmax(logits) + 1)
 def analyze_medical_image(image: Image.Image):
     inputs = medgemma_processor(images=image, return_tensors="pt").to(device)
     with torch.no_grad():
     conf = float(np.max(probs))
     return label, conf
 def classify_breed_and_health(image: Image.Image, user_breed=None):
     inputs = clip_processor(images=image, return_tensors="pt").to(device)
     with torch.no_grad():
         img_feats = clip_model.get_image_features(**inputs)
     texts = [f"a photo of a {b}" for b in STANFORD_BREEDS]
     t_in = clip_processor(text=texts, return_tensors="pt", padding=True).to(device)
     with torch.no_grad():
     breed = user_breed or STANFORD_BREEDS[idx]
     breed_conf = float(sims[idx])
     aspects = {
         "Coat": ("shiny healthy coat", "dull patchy fur"),
         "Eyes": ("bright clear eyes", "cloudy milky eyes"),
+        "Body": ("ideal muscle tone", "visible ribs or bones"),
         "Teeth": ("clean white teeth", "yellow stained teeth")
     }
     health = {}
         health[name] = {"assessment": choice, "confidence": float(max(sim))}
     return breed, breed_conf, health
 def analyze_video_health(video_path: str):
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS) or 24
+    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    duration = total / fps
+    indices = np.linspace(0, total - 1, num=10, dtype=int)
+    scores = []
     for i in indices:
         cap.set(cv2.CAP_PROP_POS_FRAMES, i)
         ret, frame = cap.read()
+        if not ret:
+            break
         img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         _, conf = analyze_medical_image(img)
+        scores.append(conf)
     cap.release()
+    return {"duration_sec": round(duration, 1), "avg_gait_confidence": float(np.mean(scores))}
 def compute_questionnaire_score(answers: list):
     scores = {}
     idx = 0
+    for sec in QUESTIONNAIRE:
+        n = len(sec["questions"])
+        vals = list(map(int, answers[idx: idx + n]))
+        idx += n
+        scores[sec["domain"]] = round(sum(vals) / n, 2)
     return scores
 with gr.Blocks(title="🐶 Dog Health & Age Analyzer") as demo:
+    gr.Markdown("## Upload Image or Video, or Record a Short Clip")
     with gr.Tab("Image Analysis"):
         img_in = gr.Image(type="pil", label="Upload Dog Image")
         breed_in = gr.Textbox(label="(Optional) Override Breed")
         age_in = gr.Number(label="Chronological Age (years)", precision=1)
+        btn = gr.Button("Analyze")
+        out = gr.Markdown()
+        def run_image(img, override, chrono):
+            breed, b_conf, health = classify_breed_and_health(img, override)
             med_label, med_conf = analyze_medical_image(img)
             bio_age = predict_biological_age(img, breed)
+            pace = round(bio_age / chrono, 2) if chrono else None
+            rpt = f"**Breed:** {breed} ({b_conf:.1%})  \n"
+            rpt += f"**MedGemma Finding:** {med_label} ({med_conf:.1%})  \n\n"
+            rpt += f"**Biological Age:** {bio_age} yrs  \n"
+            rpt += f"**Chronological Age:** {chrono or 'N/A'} yrs  \n"
             if pace:
+                rpt += f"**Pace of Aging:** {pace}×  \n\n"
+            rpt += "### Health Aspects\n"
             for k, v in health.items():
+                rpt += f"- **{k}:** {v['assessment']} ({v['confidence']:.1%})\n"
+            return rpt
+        btn.click(run_image, inputs=[img_in, breed_in, age_in], outputs=out)
     with gr.Tab("Video Analysis"):
+        vid_in = gr.Video(label="Upload or Record Video (10–30s)")
+        btn2 = gr.Button("Analyze Video")
         vid_out = gr.JSON()
+        btn2.click(lambda v: analyze_video_health(v), inputs=vid_in, outputs=vid_out)
+    with gr.Tab("Questionnaire"):
         widgets = []
+        for sec in QUESTIONNAIRE:
+            gr.Markdown(f"### {sec['domain']}")
+            for q in sec["questions"]:
                 w = gr.Radio(choices=SCALE, label=q)
                 widgets.append(w)
+        btn3 = gr.Button("Compute Score")
+        out_q = gr.JSON()
+        btn3.click(compute_questionnaire_score, inputs=widgets, outputs=out_q)
     with gr.Tab("About"):
         gr.Markdown("""