CelagenexResearch commited on
Commit
430248b
·
verified ·
1 Parent(s): 61d0a00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -121
app.py CHANGED
@@ -1,189 +1,188 @@
1
  # app.py
2
 
 
3
  import gradio as gr
4
  from PIL import Image
5
  import torch
6
  import numpy as np
7
  import cv2
8
- from transformers import CLIPProcessor, CLIPModel, AutoProcessor, AutoModelForImageClassification
 
 
 
9
 
10
- # Device setup
11
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
-
13
- # Load CLIP for breed, age, basic health
14
- clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16").to(device)
15
- clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
16
 
17
- # Load MedGemma via Hugging Face Transformers
18
- # (no special pip package needed; uses AutoModel APIs)
19
- medgemma_processor = AutoProcessor.from_pretrained("google/medgemma-v1")
20
- medgemma_model = AutoModelForImageClassification.from_pretrained("google/medgemma-v1").to(device)
21
 
22
- # Stanford Dogs breeds & lifespans (abbreviated here; include full lists)
23
- STANFORD_BREEDS = ["afghan hound", "beagle", "golden retriever", "yorkshire terrier"] # etc.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  BREED_LIFESPAN = {
25
  "afghan hound": 11.1,
26
  "beagle": 12.5,
27
  "golden retriever": 13.2,
28
- "yorkshire terrier": 13.3,
29
- # etc.
30
  }
31
 
 
32
  QUESTIONNAIRE = [
33
  {"domain": "Mobility", "questions": [
34
- "Does your dog have difficulty rising from lying down?",
35
- "Does your dog hesitate before jumping up?"
36
  ]},
37
  {"domain": "Energy", "questions": [
38
- "Does your dog tire quickly on walks?",
39
- "Has your dog’s activity level decreased recently?"
40
  ]},
41
- {"domain": "Physical Health", "questions": [
42
- "Does your dog scratch or lick skin frequently?",
43
- "Any noticeable changes in appetite or weight?"
44
  ]},
45
  {"domain": "Cognitive", "questions": [
46
- "Does your dog get lost in familiar rooms?",
47
- "Does your dog stare blankly at walls/windows?"
48
  ]},
49
  {"domain": "Social", "questions": [
50
- "Has your dog’s interest in play declined?",
51
- "Does your dog avoid interaction with family?"
52
  ]}
53
  ]
54
-
55
- SCALE = ["0", "1", "2", "3", "4", "5"]
56
 
57
 
58
- def predict_biological_age(image: Image.Image, breed: str) -> int:
59
  avg = BREED_LIFESPAN.get(breed.lower(), 12)
60
- prompts = [f"a {age}-year-old {breed}" for age in range(1, int(avg * 2) + 1)]
61
- inputs = clip_processor(text=prompts, images=image, return_tensors="pt", padding=True).to(device)
62
  with torch.no_grad():
63
- logits = clip_model(**inputs).logits_per_image.softmax(dim=1)[0].cpu().numpy()
64
- return int(np.argmax(logits) + 1)
65
-
66
 
67
- def analyze_medical_image(image: Image.Image):
68
- inputs = medgemma_processor(images=image, return_tensors="pt").to(device)
69
  with torch.no_grad():
70
- outputs = medgemma_model(**inputs)
71
- probs = outputs.logits.softmax(dim=1)[0].cpu().numpy()
72
- label = medgemma_model.config.id2label[np.argmax(probs)]
73
- conf = float(np.max(probs))
74
  return label, conf
75
 
76
-
77
- def classify_breed_and_health(image: Image.Image, user_breed=None):
78
- inputs = clip_processor(images=image, return_tensors="pt").to(device)
79
  with torch.no_grad():
80
- img_feats = clip_model.get_image_features(**inputs)
81
- texts = [f"a photo of a {b}" for b in STANFORD_BREEDS]
82
- t_in = clip_processor(text=texts, return_tensors="pt", padding=True).to(device)
83
  with torch.no_grad():
84
- text_feats = clip_model.get_text_features(**t_in)
85
- sims = (img_feats @ text_feats.T).softmax(dim=-1)[0].cpu().numpy()
86
- idx = sims.argmax()
87
- breed = user_breed or STANFORD_BREEDS[idx]
88
  breed_conf = float(sims[idx])
89
-
90
  aspects = {
91
- "Coat": ("shiny healthy coat", "dull patchy fur"),
92
- "Eyes": ("bright clear eyes", "cloudy milky eyes"),
93
- "Body": ("ideal muscle tone", "visible ribs or bones"),
94
- "Teeth": ("clean white teeth", "yellow stained teeth")
95
  }
96
  health = {}
97
- for name, (pos, neg) in aspects.items():
98
- txt = clip_processor(text=[pos, neg], return_tensors="pt", padding=True).to(device)
99
  with torch.no_grad():
100
- tf = clip_model.get_text_features(**txt)
101
- sim = (img_feats @ tf.T).softmax(dim=-1)[0].cpu().numpy()
102
- choice = pos if sim[0] > sim[1] else neg
103
- health[name] = {"assessment": choice, "confidence": float(max(sim))}
104
  return breed, breed_conf, health
105
 
106
-
107
- def analyze_video_health(video_path: str):
108
  cap = cv2.VideoCapture(video_path)
109
  fps = cap.get(cv2.CAP_PROP_FPS) or 24
110
  total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
111
- duration = total / fps
112
- indices = np.linspace(0, total - 1, num=10, dtype=int)
113
- scores = []
114
  for i in indices:
115
- cap.set(cv2.CAP_PROP_POS_FRAMES, i)
116
- ret, frame = cap.read()
117
- if not ret:
118
- break
119
- img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
120
- _, conf = analyze_medical_image(img)
121
  scores.append(conf)
122
  cap.release()
123
- return {"duration_sec": round(duration, 1), "avg_gait_confidence": float(np.mean(scores))}
124
 
125
-
126
- def compute_questionnaire_score(answers: list):
127
- scores = {}
128
- idx = 0
129
  for sec in QUESTIONNAIRE:
130
- n = len(sec["questions"])
131
- vals = list(map(int, answers[idx: idx + n]))
132
- idx += n
133
- scores[sec["domain"]] = round(sum(vals) / n, 2)
134
- return scores
135
-
136
 
137
  with gr.Blocks(title="🐶 Dog Health & Age Analyzer") as demo:
138
- gr.Markdown("## Upload Image or Video, or Record a Short Clip")
139
 
140
- with gr.Tab("Image Analysis"):
141
- img_in = gr.Image(type="pil", label="Upload Dog Image")
142
- breed_in = gr.Textbox(label="(Optional) Override Breed")
143
- age_in = gr.Number(label="Chronological Age (years)", precision=1)
144
  btn = gr.Button("Analyze")
145
- out = gr.Markdown()
146
-
147
- def run_image(img, override, chrono):
148
- breed, b_conf, health = classify_breed_and_health(img, override)
149
- med_label, med_conf = analyze_medical_image(img)
150
- bio_age = predict_biological_age(img, breed)
151
- pace = round(bio_age / chrono, 2) if chrono else None
152
- rpt = f"**Breed:** {breed} ({b_conf:.1%}) \n"
153
- rpt += f"**MedGemma Finding:** {med_label} ({med_conf:.1%}) \n\n"
154
- rpt += f"**Biological Age:** {bio_age} yrs \n"
155
- rpt += f"**Chronological Age:** {chrono or 'N/A'} yrs \n"
156
- if pace:
157
- rpt += f"**Pace of Aging:** {pace}× \n\n"
158
- rpt += "### Health Aspects\n"
159
- for k, v in health.items():
160
- rpt += f"- **{k}:** {v['assessment']} ({v['confidence']:.1%})\n"
161
  return rpt
 
162
 
163
- btn.click(run_image, inputs=[img_in, breed_in, age_in], outputs=out)
164
-
165
- with gr.Tab("Video Analysis"):
166
- vid_in = gr.Video(label="Upload or Record Video (10–30s)")
167
- btn2 = gr.Button("Analyze Video")
168
- vid_out = gr.JSON()
169
- btn2.click(lambda v: analyze_video_health(v), inputs=vid_in, outputs=vid_out)
170
 
171
  with gr.Tab("Questionnaire"):
172
- widgets = []
173
  for sec in QUESTIONNAIRE:
174
  gr.Markdown(f"### {sec['domain']}")
175
  for q in sec["questions"]:
176
- w = gr.Radio(choices=SCALE, label=q)
177
  widgets.append(w)
178
- btn3 = gr.Button("Compute Score")
179
- out_q = gr.JSON()
180
- btn3.click(compute_questionnaire_score, inputs=widgets, outputs=out_q)
181
 
182
  with gr.Tab("About"):
183
- gr.Markdown("""
184
- **MedGemma v1**: Veterinary medical image analysis
185
- **Video Module**: Gait & posture confidence score
186
- **Questionnaire**: Healthspan domains (Mobility, Energy, Physical, Cognitive, Social)
187
- """)
188
 
189
  demo.launch()
 
1
  # app.py
2
 
3
+ import os
4
  import gradio as gr
5
  from PIL import Image
6
  import torch
7
  import numpy as np
8
  import cv2
9
+ from transformers import (
10
+ CLIPProcessor, CLIPModel,
11
+ AutoProcessor, AutoModelForImageClassification
12
+ )
13
 
14
+ # —————————————————————————————
15
+ # CONFIG: set your HF token here or via env var HF_TOKEN
16
+ HF_TOKEN = os.getenv("HF_TOKEN", "<YOUR_HF_TOKEN_HERE>")
 
 
 
17
 
18
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
19
 
20
+ # 1. CLIP for breed, age, basic health
21
+ clip_model = CLIPModel.from_pretrained(
22
+ "openai/clip-vit-base-patch16",
23
+ use_auth_token=HF_TOKEN
24
+ ).to(device)
25
+ clip_processor = CLIPProcessor.from_pretrained(
26
+ "openai/clip-vit-base-patch16",
27
+ use_auth_token=HF_TOKEN
28
+ )
29
+
30
+ # 2. MedGemma 4B multimodal (public but gated) for medical findings
31
+ medgemma_processor = AutoProcessor.from_pretrained(
32
+ "google/medgemma-4b-it",
33
+ use_auth_token=HF_TOKEN
34
+ )
35
+ medgemma_model = AutoModelForImageClassification.from_pretrained(
36
+ "google/medgemma-4b-it",
37
+ use_auth_token=HF_TOKEN
38
+ ).to(device)
39
+
40
+ # 3. Stanford Dogs & lifespans (abbreviated; expand as needed)
41
+ STANFORD_BREEDS = [
42
+ "afghan hound", "beagle", "golden retriever", "yorkshire terrier"
43
+ ]
44
  BREED_LIFESPAN = {
45
  "afghan hound": 11.1,
46
  "beagle": 12.5,
47
  "golden retriever": 13.2,
48
+ "yorkshire terrier": 13.3
 
49
  }
50
 
51
+ # 4. Questionnaire
52
  QUESTIONNAIRE = [
53
  {"domain": "Mobility", "questions": [
54
+ "Difficulty rising from lying down?",
55
+ "Hesitate before jumping up?"
56
  ]},
57
  {"domain": "Energy", "questions": [
58
+ "Tire quickly on walks?",
59
+ "Activity level decreased?"
60
  ]},
61
+ {"domain": "Physical", "questions": [
62
+ "Scratch or lick skin frequently?",
63
+ "Changes in appetite or weight?"
64
  ]},
65
  {"domain": "Cognitive", "questions": [
66
+ "Get lost in familiar rooms?",
67
+ "Stare blankly at walls?"
68
  ]},
69
  {"domain": "Social", "questions": [
70
+ "Interest in play declined?",
71
+ "Avoid interaction with family?"
72
  ]}
73
  ]
74
+ SCALE = ["0","1","2","3","4","5"]
 
75
 
76
 
77
+ def predict_biological_age(img: Image.Image, breed: str) -> int:
78
  avg = BREED_LIFESPAN.get(breed.lower(), 12)
79
+ prompts = [f"a {age}-year-old {breed}" for age in range(1, int(avg*2)+1)]
80
+ inputs = clip_processor(text=prompts, images=img, return_tensors="pt", padding=True).to(device)
81
  with torch.no_grad():
82
+ probs = clip_model(**inputs).logits_per_image.softmax(1)[0].cpu().numpy()
83
+ return int(np.argmax(probs)+1)
 
84
 
85
+ def analyze_medical_image(img: Image.Image):
86
+ inputs = medgemma_processor(images=img, return_tensors="pt").to(device)
87
  with torch.no_grad():
88
+ logits = medgemma_model(**inputs).logits.softmax(-1)[0].cpu().numpy()
89
+ label = medgemma_model.config.id2label[int(np.argmax(logits))]
90
+ conf = float(np.max(logits))
 
91
  return label, conf
92
 
93
+ def classify_breed_and_health(img: Image.Image, override=None):
94
+ inp = clip_processor(images=img, return_tensors="pt").to(device)
 
95
  with torch.no_grad():
96
+ feats = clip_model.get_image_features(**inp)
97
+ text_prompts = [f"a photo of a {b}" for b in STANFORD_BREEDS]
98
+ ti = clip_processor(text=text_prompts, return_tensors="pt", padding=True).to(device)
99
  with torch.no_grad():
100
+ tf = clip_model.get_text_features(**ti)
101
+ sims = (feats @ tf.T).softmax(-1)[0].cpu().numpy()
102
+ idx = int(np.argmax(sims))
103
+ breed = override or STANFORD_BREEDS[idx]
104
  breed_conf = float(sims[idx])
 
105
  aspects = {
106
+ "Coat": ("shiny healthy coat","dull patchy fur"),
107
+ "Eyes": ("bright clear eyes","cloudy milky eyes"),
108
+ "Body": ("ideal muscle tone","visible ribs or bones"),
109
+ "Teeth":("clean white teeth","yellow stained teeth")
110
  }
111
  health = {}
112
+ for name,(p,n) in aspects.items():
113
+ ti = clip_processor(text=[p,n], return_tensors="pt", padding=True).to(device)
114
  with torch.no_grad():
115
+ tf2 = clip_model.get_text_features(**ti)
116
+ sim2 = (feats @ tf2.T).softmax(-1)[0].cpu().numpy()
117
+ choice = p if sim2[0]>sim2[1] else n
118
+ health[name] = {"assessment":choice,"confidence":float(max(sim2))}
119
  return breed, breed_conf, health
120
 
121
+ def analyze_video(video_path):
 
122
  cap = cv2.VideoCapture(video_path)
123
  fps = cap.get(cv2.CAP_PROP_FPS) or 24
124
  total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
125
+ indices = np.linspace(0,total-1,10,dtype=int)
126
+ scores=[]
 
127
  for i in indices:
128
+ cap.set(cv2.CAP_PROP_POS_FRAMES,i)
129
+ ret,frame=cap.read()
130
+ if not ret: break
131
+ img=Image.fromarray(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))
132
+ _,conf=analyze_medical_image(img)
 
133
  scores.append(conf)
134
  cap.release()
135
+ return {"duration_sec":round(total/fps,1),"avg_gait_conf":float(np.mean(scores))}
136
 
137
+ def compute_q_score(answers):
138
+ out={}
139
+ idx=0
 
140
  for sec in QUESTIONNAIRE:
141
+ n=len(sec["questions"])
142
+ vals=list(map(int,answers[idx:idx+n]))
143
+ idx+=n
144
+ out[sec["domain"]]=round(sum(vals)/n,2)
145
+ return out
 
146
 
147
  with gr.Blocks(title="🐶 Dog Health & Age Analyzer") as demo:
148
+ gr.Markdown("## Upload an Image or Video (10–30 s) or Record Live")
149
 
150
+ with gr.Tab("Image"):
151
+ img = gr.Image(type="pil")
152
+ br = gr.Textbox(label="Override Breed")
153
+ ca = gr.Number(label="Chronological Age")
154
  btn = gr.Button("Analyze")
155
+ md = gr.Markdown()
156
+ def run_i(i,b,o):
157
+ breed,bc,h=classify_breed_and_health(i,o)
158
+ ml,mc=analyze_medical_image(i)
159
+ ba=predict_biological_age(i,breed)
160
+ pace = f"{ba/o:.2f}×" if o else "N/A"
161
+ rpt = f"**Breed:** {breed} ({bc:.1%})\n\n"
162
+ rpt+=f"**MedGemma Finding:** {ml} ({mc:.1%})\n\n"
163
+ rpt+=f"**Bio Age:** {ba} yrs | **Chrono:** {o or 'N/A'} yrs | **Pace:** {pace}\n\n"
164
+ rpt+="### Health\n"+ "\n".join(f"- **{k}:** {v['assessment']} ({v['confidence']:.1%})" for k,v in h.items())
 
 
 
 
 
 
165
  return rpt
166
+ btn.click(run_i, inputs=[img,br,ca], outputs=md)
167
 
168
+ with gr.Tab("Video"):
169
+ vid=gr.Video()
170
+ b2=gr.Button("Analyze Video")
171
+ out2=gr.JSON()
172
+ b2.click(analyze_video, inputs=vid, outputs=out2)
 
 
173
 
174
  with gr.Tab("Questionnaire"):
175
+ widgets=[]
176
  for sec in QUESTIONNAIRE:
177
  gr.Markdown(f"### {sec['domain']}")
178
  for q in sec["questions"]:
179
+ w = gr.Radio(SCALE, label=q)
180
  widgets.append(w)
181
+ b3=gr.Button("Score")
182
+ o3=gr.JSON()
183
+ b3.click(compute_q_score, inputs=widgets, outputs=o3)
184
 
185
  with gr.Tab("About"):
186
+ gr.Markdown("**MedGemma-4B-IT** requires HF auth; set HF_TOKEN. Uses CLIP & SigLIP for medical insight.")
 
 
 
 
187
 
188
  demo.launch()