Spaces:

CelagenexResearch
/

Video

Running

App Files Files Community

CelagenexResearch commited on 21 days ago

Commit

430248b

verified ·

1 Parent(s): 61d0a00

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -121

app.py CHANGED Viewed

@@ -1,189 +1,188 @@
 # app.py
 import gradio as gr
 from PIL import Image
 import torch
 import numpy as np
 import cv2
-from transformers import CLIPProcessor, CLIPModel, AutoProcessor, AutoModelForImageClassification
-# Device setup
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Load CLIP for breed, age, basic health
-clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16").to(device)
-clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
-# Load MedGemma via Hugging Face Transformers
-# (no special pip package needed; uses AutoModel APIs)
-medgemma_processor = AutoProcessor.from_pretrained("google/medgemma-v1")
-medgemma_model = AutoModelForImageClassification.from_pretrained("google/medgemma-v1").to(device)
-# Stanford Dogs breeds & lifespans (abbreviated here; include full lists)
-STANFORD_BREEDS = ["afghan hound", "beagle", "golden retriever", "yorkshire terrier"]  # etc.
 BREED_LIFESPAN = {
     "afghan hound": 11.1,
     "beagle": 12.5,
     "golden retriever": 13.2,
-    "yorkshire terrier": 13.3,
-    # etc.
 }
 QUESTIONNAIRE = [
     {"domain": "Mobility", "questions": [
-        "Does your dog have difficulty rising from lying down?",
-        "Does your dog hesitate before jumping up?"
     ]},
     {"domain": "Energy", "questions": [
-        "Does your dog tire quickly on walks?",
-        "Has your dog’s activity level decreased recently?"
     ]},
-    {"domain": "Physical Health", "questions": [
-        "Does your dog scratch or lick skin frequently?",
-        "Any noticeable changes in appetite or weight?"
     ]},
     {"domain": "Cognitive", "questions": [
-        "Does your dog get lost in familiar rooms?",
-        "Does your dog stare blankly at walls/windows?"
     ]},
     {"domain": "Social", "questions": [
-        "Has your dog’s interest in play declined?",
-        "Does your dog avoid interaction with family?"
     ]}
 ]
-SCALE = ["0", "1", "2", "3", "4", "5"]
-def predict_biological_age(image: Image.Image, breed: str) -> int:
     avg = BREED_LIFESPAN.get(breed.lower(), 12)
-    prompts = [f"a {age}-year-old {breed}" for age in range(1, int(avg * 2) + 1)]
-    inputs = clip_processor(text=prompts, images=image, return_tensors="pt", padding=True).to(device)
     with torch.no_grad():
-        logits = clip_model(**inputs).logits_per_image.softmax(dim=1)[0].cpu().numpy()
-    return int(np.argmax(logits) + 1)
-def analyze_medical_image(image: Image.Image):
-    inputs = medgemma_processor(images=image, return_tensors="pt").to(device)
     with torch.no_grad():
-        outputs = medgemma_model(**inputs)
-        probs = outputs.logits.softmax(dim=1)[0].cpu().numpy()
-    label = medgemma_model.config.id2label[np.argmax(probs)]
-    conf = float(np.max(probs))
     return label, conf
-def classify_breed_and_health(image: Image.Image, user_breed=None):
-    inputs = clip_processor(images=image, return_tensors="pt").to(device)
     with torch.no_grad():
-        img_feats = clip_model.get_image_features(**inputs)
-    texts = [f"a photo of a {b}" for b in STANFORD_BREEDS]
-    t_in = clip_processor(text=texts, return_tensors="pt", padding=True).to(device)
     with torch.no_grad():
-        text_feats = clip_model.get_text_features(**t_in)
-    sims = (img_feats @ text_feats.T).softmax(dim=-1)[0].cpu().numpy()
-    idx = sims.argmax()
-    breed = user_breed or STANFORD_BREEDS[idx]
     breed_conf = float(sims[idx])
     aspects = {
-        "Coat": ("shiny healthy coat", "dull patchy fur"),
-        "Eyes": ("bright clear eyes", "cloudy milky eyes"),
-        "Body": ("ideal muscle tone", "visible ribs or bones"),
-        "Teeth": ("clean white teeth", "yellow stained teeth")
     }
     health = {}
-    for name, (pos, neg) in aspects.items():
-        txt = clip_processor(text=[pos, neg], return_tensors="pt", padding=True).to(device)
         with torch.no_grad():
-            tf = clip_model.get_text_features(**txt)
-        sim = (img_feats @ tf.T).softmax(dim=-1)[0].cpu().numpy()
-        choice = pos if sim[0] > sim[1] else neg
-        health[name] = {"assessment": choice, "confidence": float(max(sim))}
     return breed, breed_conf, health
-def analyze_video_health(video_path: str):
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS) or 24
     total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    duration = total / fps
-    indices = np.linspace(0, total - 1, num=10, dtype=int)
-    scores = []
     for i in indices:
-        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
-        ret, frame = cap.read()
-        if not ret:
-            break
-        img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-        _, conf = analyze_medical_image(img)
         scores.append(conf)
     cap.release()
-    return {"duration_sec": round(duration, 1), "avg_gait_confidence": float(np.mean(scores))}
-def compute_questionnaire_score(answers: list):
-    scores = {}
-    idx = 0
     for sec in QUESTIONNAIRE:
-        n = len(sec["questions"])
-        vals = list(map(int, answers[idx: idx + n]))
-        idx += n
-        scores[sec["domain"]] = round(sum(vals) / n, 2)
-    return scores
 with gr.Blocks(title="🐶 Dog Health & Age Analyzer") as demo:
-    gr.Markdown("## Upload Image or Video, or Record a Short Clip")
-    with gr.Tab("Image Analysis"):
-        img_in = gr.Image(type="pil", label="Upload Dog Image")
-        breed_in = gr.Textbox(label="(Optional) Override Breed")
-        age_in = gr.Number(label="Chronological Age (years)", precision=1)
         btn = gr.Button("Analyze")
-        out = gr.Markdown()
-        def run_image(img, override, chrono):
-            breed, b_conf, health = classify_breed_and_health(img, override)
-            med_label, med_conf = analyze_medical_image(img)
-            bio_age = predict_biological_age(img, breed)
-            pace = round(bio_age / chrono, 2) if chrono else None
-            rpt = f"**Breed:** {breed} ({b_conf:.1%})  \n"
-            rpt += f"**MedGemma Finding:** {med_label} ({med_conf:.1%})  \n\n"
-            rpt += f"**Biological Age:** {bio_age} yrs  \n"
-            rpt += f"**Chronological Age:** {chrono or 'N/A'} yrs  \n"
-            if pace:
-                rpt += f"**Pace of Aging:** {pace}×  \n\n"
-            rpt += "### Health Aspects\n"
-            for k, v in health.items():
-                rpt += f"- **{k}:** {v['assessment']} ({v['confidence']:.1%})\n"
             return rpt
-        btn.click(run_image, inputs=[img_in, breed_in, age_in], outputs=out)
-    with gr.Tab("Video Analysis"):
-        vid_in = gr.Video(label="Upload or Record Video (10–30s)")
-        btn2 = gr.Button("Analyze Video")
-        vid_out = gr.JSON()
-        btn2.click(lambda v: analyze_video_health(v), inputs=vid_in, outputs=vid_out)
     with gr.Tab("Questionnaire"):
-        widgets = []
         for sec in QUESTIONNAIRE:
             gr.Markdown(f"### {sec['domain']}")
             for q in sec["questions"]:
-                w = gr.Radio(choices=SCALE, label=q)
                 widgets.append(w)
-        btn3 = gr.Button("Compute Score")
-        out_q = gr.JSON()
-        btn3.click(compute_questionnaire_score, inputs=widgets, outputs=out_q)
     with gr.Tab("About"):
-        gr.Markdown("""
-        **MedGemma v1**: Veterinary medical image analysis
-        **Video Module**: Gait & posture confidence score
-        **Questionnaire**: Healthspan domains (Mobility, Energy, Physical, Cognitive, Social)
-        """)
 demo.launch()

 # app.py
+import os
 import gradio as gr
 from PIL import Image
 import torch
 import numpy as np
 import cv2
+from transformers import (
+    CLIPProcessor, CLIPModel,
+    AutoProcessor, AutoModelForImageClassification
+)
+# —————————————————————————————
+# CONFIG: set your HF token here or via env var HF_TOKEN
+HF_TOKEN = os.getenv("HF_TOKEN", "<YOUR_HF_TOKEN_HERE>")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# 1. CLIP for breed, age, basic health
+clip_model = CLIPModel.from_pretrained(
+    "openai/clip-vit-base-patch16",
+    use_auth_token=HF_TOKEN
+).to(device)
+clip_processor = CLIPProcessor.from_pretrained(
+    "openai/clip-vit-base-patch16",
+    use_auth_token=HF_TOKEN
+)
+# 2. MedGemma 4B multimodal (public but gated) for medical findings
+medgemma_processor = AutoProcessor.from_pretrained(
+    "google/medgemma-4b-it",
+    use_auth_token=HF_TOKEN
+)
+medgemma_model = AutoModelForImageClassification.from_pretrained(
+    "google/medgemma-4b-it",
+    use_auth_token=HF_TOKEN
+).to(device)
+# 3. Stanford Dogs & lifespans (abbreviated; expand as needed)
+STANFORD_BREEDS = [
+    "afghan hound", "beagle", "golden retriever", "yorkshire terrier"
+]
 BREED_LIFESPAN = {
     "afghan hound": 11.1,
     "beagle": 12.5,
     "golden retriever": 13.2,
+    "yorkshire terrier": 13.3
 }
+# 4. Questionnaire
 QUESTIONNAIRE = [
     {"domain": "Mobility", "questions": [
+        "Difficulty rising from lying down?",
+        "Hesitate before jumping up?"
     ]},
     {"domain": "Energy", "questions": [
+        "Tire quickly on walks?",
+        "Activity level decreased?"
     ]},
+    {"domain": "Physical", "questions": [
+        "Scratch or lick skin frequently?",
+        "Changes in appetite or weight?"
     ]},
     {"domain": "Cognitive", "questions": [
+        "Get lost in familiar rooms?",
+        "Stare blankly at walls?"
     ]},
     {"domain": "Social", "questions": [
+        "Interest in play declined?",
+        "Avoid interaction with family?"
     ]}
 ]
+SCALE = ["0","1","2","3","4","5"]
+def predict_biological_age(img: Image.Image, breed: str) -> int:
     avg = BREED_LIFESPAN.get(breed.lower(), 12)
+    prompts = [f"a {age}-year-old {breed}" for age in range(1, int(avg*2)+1)]
+    inputs = clip_processor(text=prompts, images=img, return_tensors="pt", padding=True).to(device)
     with torch.no_grad():
+        probs = clip_model(**inputs).logits_per_image.softmax(1)[0].cpu().numpy()
+    return int(np.argmax(probs)+1)
+def analyze_medical_image(img: Image.Image):
+    inputs = medgemma_processor(images=img, return_tensors="pt").to(device)
     with torch.no_grad():
+        logits = medgemma_model(**inputs).logits.softmax(-1)[0].cpu().numpy()
+    label = medgemma_model.config.id2label[int(np.argmax(logits))]
+    conf = float(np.max(logits))
     return label, conf
+def classify_breed_and_health(img: Image.Image, override=None):
+    inp = clip_processor(images=img, return_tensors="pt").to(device)
     with torch.no_grad():
+        feats = clip_model.get_image_features(**inp)
+    text_prompts = [f"a photo of a {b}" for b in STANFORD_BREEDS]
+    ti = clip_processor(text=text_prompts, return_tensors="pt", padding=True).to(device)
     with torch.no_grad():
+        tf = clip_model.get_text_features(**ti)
+    sims = (feats @ tf.T).softmax(-1)[0].cpu().numpy()
+    idx = int(np.argmax(sims))
+    breed = override or STANFORD_BREEDS[idx]
     breed_conf = float(sims[idx])
     aspects = {
+        "Coat": ("shiny healthy coat","dull patchy fur"),
+        "Eyes": ("bright clear eyes","cloudy milky eyes"),
+        "Body": ("ideal muscle tone","visible ribs or bones"),
+        "Teeth":("clean white teeth","yellow stained teeth")
     }
     health = {}
+    for name,(p,n) in aspects.items():
+        ti = clip_processor(text=[p,n], return_tensors="pt", padding=True).to(device)
         with torch.no_grad():
+            tf2 = clip_model.get_text_features(**ti)
+        sim2 = (feats @ tf2.T).softmax(-1)[0].cpu().numpy()
+        choice = p if sim2[0]>sim2[1] else n
+        health[name] = {"assessment":choice,"confidence":float(max(sim2))}
     return breed, breed_conf, health
+def analyze_video(video_path):
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS) or 24
     total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    indices = np.linspace(0,total-1,10,dtype=int)
+    scores=[]
     for i in indices:
+        cap.set(cv2.CAP_PROP_POS_FRAMES,i)
+        ret,frame=cap.read()
+        if not ret: break
+        img=Image.fromarray(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))
+        _,conf=analyze_medical_image(img)
         scores.append(conf)
     cap.release()
+    return {"duration_sec":round(total/fps,1),"avg_gait_conf":float(np.mean(scores))}
+def compute_q_score(answers):
+    out={}
+    idx=0
     for sec in QUESTIONNAIRE:
+        n=len(sec["questions"])
+        vals=list(map(int,answers[idx:idx+n]))
+        idx+=n
+        out[sec["domain"]]=round(sum(vals)/n,2)
+    return out
 with gr.Blocks(title="🐶 Dog Health & Age Analyzer") as demo:
+    gr.Markdown("## Upload an Image or Video (10–30 s) or Record Live")
+    with gr.Tab("Image"):
+        img = gr.Image(type="pil")
+        br = gr.Textbox(label="Override Breed")
+        ca = gr.Number(label="Chronological Age")
         btn = gr.Button("Analyze")
+        md = gr.Markdown()
+        def run_i(i,b,o):
+            breed,bc,h=classify_breed_and_health(i,o)
+            ml,mc=analyze_medical_image(i)
+            ba=predict_biological_age(i,breed)
+            pace = f"{ba/o:.2f}×" if o else "N/A"
+            rpt = f"**Breed:** {breed} ({bc:.1%})\n\n"
+            rpt+=f"**MedGemma Finding:** {ml} ({mc:.1%})\n\n"
+            rpt+=f"**Bio Age:** {ba} yrs | **Chrono:** {o or 'N/A'} yrs | **Pace:** {pace}\n\n"
+            rpt+="### Health\n"+ "\n".join(f"- **{k}:** {v['assessment']} ({v['confidence']:.1%})" for k,v in h.items())
             return rpt
+        btn.click(run_i, inputs=[img,br,ca], outputs=md)
+    with gr.Tab("Video"):
+        vid=gr.Video()
+        b2=gr.Button("Analyze Video")
+        out2=gr.JSON()
+        b2.click(analyze_video, inputs=vid, outputs=out2)
     with gr.Tab("Questionnaire"):
+        widgets=[]
         for sec in QUESTIONNAIRE:
             gr.Markdown(f"### {sec['domain']}")
             for q in sec["questions"]:
+                w = gr.Radio(SCALE, label=q)
                 widgets.append(w)
+        b3=gr.Button("Score")
+        o3=gr.JSON()
+        b3.click(compute_q_score, inputs=widgets, outputs=o3)
     with gr.Tab("About"):
+        gr.Markdown("**MedGemma-4B-IT** requires HF auth; set HF_TOKEN.  Uses CLIP & SigLIP for medical insight.")
 demo.launch()