hujiecpp commited on
Commit
7e7d949
·
1 Parent(s): 38a29f3

init project

Browse files
Files changed (2) hide show
  1. app.py +10 -25
  2. modules/pe3r/models.py +3 -3
app.py CHANGED
@@ -37,11 +37,9 @@ from modules.mobilesamv2.utils.transforms import ResizeLongestSide
37
  from modules.pe3r.models import Models
38
  import torchvision.transforms as tvf
39
 
40
- from transformers import AutoTokenizer, AutoModel, AutoProcessor, SamModel
41
-
42
  silent = False
43
- # device = 'cuda' if torch.cuda.is_available() else 'cpu'
44
- pe3r = Models('cpu') #
45
 
46
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
47
  cam_color=None, as_pointcloud=False,
@@ -246,9 +244,7 @@ def slerp_multiple(vectors, t_values):
246
  @torch.no_grad
247
  def get_mask_from_img_sam1(sam1_image, yolov8_image, original_size, input_size, transform):
248
 
249
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
250
- pe3r.yolov8.to(device)
251
- pe3r.mobilesamv2.to(device)
252
 
253
  sam_mask=[]
254
  img_area = original_size[0] * original_size[1]
@@ -304,14 +300,7 @@ def get_mask_from_img_sam1(sam1_image, yolov8_image, original_size, input_size,
304
  @torch.no_grad
305
  def get_cog_feats(images):
306
 
307
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
308
- pe3r.sam2.to(device)
309
-
310
- siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
311
- siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)
312
-
313
- # pe3r.siglip_processor.to(device)
314
- # pe3r.siglip.to(device)
315
 
316
  cog_seg_maps = []
317
  rev_cog_seg_maps = []
@@ -405,10 +394,10 @@ def get_cog_feats(images):
405
  seg_imgs = np.stack(seg_img_list, axis=0) # b,H,W,3
406
  seg_imgs = torch.from_numpy(seg_imgs).permute(0,3,1,2) # / 255.0
407
 
408
- inputs = siglip_processor(images=seg_imgs, return_tensors="pt")
409
  inputs = {key: value.to(device) for key, value in inputs.items()}
410
 
411
- image_features = siglip.get_image_features(**inputs)
412
  image_features = image_features / image_features.norm(dim=-1, keepdim=True)
413
  image_features = image_features.detach().cpu()
414
 
@@ -457,8 +446,7 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
457
  then run get_3D_model_from_scene
458
  """
459
 
460
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
461
- pe3r.mast3r.to(device)
462
 
463
  if len(filelist) < 2:
464
  raise gradio.Error("Please input at least 2 images.")
@@ -523,16 +511,13 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
523
  def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
524
  mask_sky, clean_depth, transparent_cams, cam_size):
525
 
526
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
527
- # pe3r.siglip_tokenizer.to(device)
528
- siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
529
- siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
530
 
531
  texts = [text]
532
- inputs = siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
533
  inputs = {key: value.to(device) for key, value in inputs.items()}
534
  with torch.no_grad():
535
- text_feats =siglip.get_text_features(**inputs)
536
  text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
537
  scene.render_image(text_feats, threshold)
538
  scene.ori_imgs = scene.rendered_imgs
 
37
  from modules.pe3r.models import Models
38
  import torchvision.transforms as tvf
39
 
 
 
40
  silent = False
41
+ device = 'cpu' #'cuda' if torch.cuda.is_available() else 'cpu'
42
+ pe3r = Models(device) #
43
 
44
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
45
  cam_color=None, as_pointcloud=False,
 
244
  @torch.no_grad
245
  def get_mask_from_img_sam1(sam1_image, yolov8_image, original_size, input_size, transform):
246
 
247
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
248
 
249
  sam_mask=[]
250
  img_area = original_size[0] * original_size[1]
 
300
  @torch.no_grad
301
  def get_cog_feats(images):
302
 
303
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
 
 
 
 
 
304
 
305
  cog_seg_maps = []
306
  rev_cog_seg_maps = []
 
394
  seg_imgs = np.stack(seg_img_list, axis=0) # b,H,W,3
395
  seg_imgs = torch.from_numpy(seg_imgs).permute(0,3,1,2) # / 255.0
396
 
397
+ inputs = pe3r.siglip_processor(images=seg_imgs, return_tensors="pt")
398
  inputs = {key: value.to(device) for key, value in inputs.items()}
399
 
400
+ image_features = pe3r.siglip.get_image_features(**inputs)
401
  image_features = image_features / image_features.norm(dim=-1, keepdim=True)
402
  image_features = image_features.detach().cpu()
403
 
 
446
  then run get_3D_model_from_scene
447
  """
448
 
449
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
450
 
451
  if len(filelist) < 2:
452
  raise gradio.Error("Please input at least 2 images.")
 
511
  def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
512
  mask_sky, clean_depth, transparent_cams, cam_size):
513
 
514
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
 
515
 
516
  texts = [text]
517
+ inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
518
  inputs = {key: value.to(device) for key, value in inputs.items()}
519
  with torch.no_grad():
520
+ text_feats =pe3r.siglip.get_text_features(**inputs)
521
  text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
522
  scene.render_image(text_feats, threshold)
523
  scene.ori_imgs = scene.rendered_imgs
modules/pe3r/models.py CHANGED
@@ -47,6 +47,6 @@ class Models:
47
  self.yolov8 = ObjectAwareModel(YOLO8_CKP)
48
 
49
  # -- siglip --
50
- # self.siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
51
- # self.siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
52
- # self.siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)
 
47
  self.yolov8 = ObjectAwareModel(YOLO8_CKP)
48
 
49
  # -- siglip --
50
+ self.siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
51
+ self.siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
52
+ self.siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)