File size: 1,756 Bytes
1d37acd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a336e88
1d37acd
a336e88
 
 
 
 
 
1d37acd
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from PIL import Image
import gradio as gr
import numpy as np
import torch


class MidasDepth(object):
    def __init__(self, model_type="DPT_Large", device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
        self.device = device
        self.midas = torch.hub.load("intel-isl/MiDaS", model_type).to(self.device).eval().requires_grad_(False)
        self.transform = torch.hub.load("intel-isl/MiDaS", "transforms").dpt_transform

    def get_depth(self, image):
        if not isinstance(image, np.ndarray):
            image = np.asarray(image)
        if (image > 1).any():
            image /= 255.
        with torch.inference_mode():
            batch = self.transform(image[..., :3]).to(self.device)
            prediction = self.midas(batch)
            prediction = torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=image.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze()
        return prediction.detach().cpu().numpy()


def main():
    midas = MidasDepth()
    interface = gr.Interface(fn=lambda x, *t: [Image.fromarray(midas.get_depth(x[0]).astype("uint8")), ""], inputs=[
        gr.inputs.Image(),
        gr.inputs.Number(label="tx", default=0.0),
        gr.inputs.Number(label="ty", default=0.0),
        gr.inputs.Number(label="tz", default=0.0),
        gr.inputs.Number(label="rx", default=0.0),
        gr.inputs.Number(label="ry", default=0.0),
        gr.inputs.Number(label="rz", default=0.0)
    ], outputs=[
        gr.outputs.Image(),
        gr.outputs.Video()
    ], title="DALL·E 6D", description="Lift DALL·E 2 (or any other model) into 3D!")
    interface.launch()


if __name__ == '__main__':
    main()