wyysf commited on
Commit
0f079b2
·
0 Parent(s):
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +40 -0
  2. .gitignore +1 -0
  3. README.md +11 -0
  4. README_zh.md +173 -0
  5. apps/.vscode/launch.json +15 -0
  6. apps/__pycache__/mv_models.cpython-310.pyc +0 -0
  7. apps/__pycache__/mv_models.cpython-38.pyc +0 -0
  8. apps/__pycache__/utils.cpython-310.pyc +0 -0
  9. apps/__pycache__/utils.cpython-38.pyc +0 -0
  10. apps/examples/1_cute_girl.webp +0 -0
  11. apps/examples/blue_monster.webp +0 -0
  12. apps/examples/boy.webp +0 -0
  13. apps/examples/boy2.webp +0 -0
  14. apps/examples/bulldog.webp +0 -0
  15. apps/examples/catman.webp +0 -0
  16. apps/examples/cyberpunk_man.webp +0 -0
  17. apps/examples/dinosaur_boy.webp +0 -0
  18. apps/examples/dog.webp +0 -0
  19. apps/examples/doraemon.webp +0 -0
  20. apps/examples/dragon.webp +0 -0
  21. apps/examples/elf.webp +0 -0
  22. apps/examples/ghost-eating-burger.webp +0 -0
  23. apps/examples/girl1.webp +0 -0
  24. apps/examples/gun.webp +0 -0
  25. apps/examples/kunkun.webp +0 -0
  26. apps/examples/link.webp +0 -0
  27. apps/examples/mushroom1.webp +0 -0
  28. apps/examples/mushroom2.webp +0 -0
  29. apps/examples/pikachu.webp +0 -0
  30. apps/examples/plants.webp +0 -0
  31. apps/examples/rose.webp +0 -0
  32. apps/examples/shoe.webp +0 -0
  33. apps/examples/sports_girl.webp +0 -0
  34. apps/examples/stone.webp +0 -0
  35. apps/examples/sweater.webp +0 -0
  36. apps/examples/sword.webp +0 -0
  37. apps/examples/teapot.webp +0 -0
  38. apps/examples/toy1.webp +0 -0
  39. apps/examples/toy_bear.webp +0 -0
  40. apps/examples/toy_dog.webp +0 -0
  41. apps/examples/toy_pig.webp +0 -0
  42. apps/examples/toy_rabbit.webp +0 -0
  43. apps/examples/wings.webp +0 -0
  44. apps/gradio_app.py +272 -0
  45. apps/mv_models.py +162 -0
  46. apps/third_party/CRM/.gitignore +155 -0
  47. apps/third_party/CRM/LICENSE +21 -0
  48. apps/third_party/CRM/README.md +85 -0
  49. apps/third_party/CRM/__init__.py +0 -0
  50. apps/third_party/CRM/app.py +228 -0
.gitattributes ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ apps/ckpts/InstantMeshes filter=lfs diff=lfs merge=lfs -text
37
+ apps/third_party/Wonder3D/assets/fig_teaser.png filter=lfs diff=lfs merge=lfs -text
38
+ asset/video_cover.png filter=lfs diff=lfs merge=lfs -text
39
+ apps/InstantMeshes filter=lfs diff=lfs merge=lfs -text
40
+ apps/third_party/InstantMeshes filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ gradio_cached_dir
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: 'CraftsMan: High-fidelity Mesh Generation with 3D Native Generation and Interactive Geometry Refiner'
3
+ emoji: 🚀
4
+ colorFrom: indigo
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 4.31.5
8
+ app_file: gradio_app.py
9
+ pinned: false
10
+ license: agpl-3.0
11
+ ---
README_zh.md ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <img src="asset/logo.png" height=220>
3
+ </p>
4
+
5
+ ### <div align="center">匠心:基于3D原生扩模型和交互式几何优化的高质量网格模型生成<div>
6
+ ##### <p align="center"> [Weiyu Li<sup>1,2</sup>](https://wyysf-98.github.io/), Jiarui Liu<sup>1,2</sup>, [Rui Chen<sup>1,2</sup>](https://aruichen.github.io/), [Yixun Liang<sup>3,2</sup>g](https://yixunliang.github.io/), [Xuelin Chen<sup>4</sup>](https://xuelin-chen.github.io/), [Ping Tan<sup>1,2</sup>](https://ece.hkust.edu.hk/pingtan), [Xiaoxiao Long<sup>5</sup>](https://www.xxlong.site/)</p>
7
+ ##### <p align="center"> <sup>1</sup>香港科技大学, <sup>2</sup>光影幻象, <sup>3</sup>香港科技大学(广州), <sup>4</sup>腾讯 AI Lab, <sup>5</sup>香港大学</p>
8
+ <div align="center">
9
+ <a href="https://github.com/Craftsman3D.github.io/"><img src="https://img.shields.io/static/v1?label=Project%20Page&message=Github&color=blue&logo=github-pages"></a> &ensp;
10
+ <a href="https://huggingface.co/"><img src="https://img.shields.io/static/v1?label=SAM-LLaVA&message=HF&color=yellow"></a> &ensp;
11
+ <a href="https://arxiv.org/abs/xxx"><img src="https://img.shields.io/static/v1?label=Paper&message=Arxiv&color=red&logo=arxiv"></a> &ensp;
12
+ </div>
13
+
14
+ #### TL; DR: <font color="red">**CraftsMan (又名 匠心)**</font> 是一个两阶段的文本/图像到3D网格生成模型。通过模仿艺术家/工匠的建模工作流程,我们提出首先使用3D扩散模型生成一个具有平滑几何形状的粗糙网格(5秒),然后使用2D法线扩散生成的增强型多视图法线图进行细化(20秒),这也可以通过类似Zbrush的交互方式进行。
15
+
16
+
17
+ ## ✨ 总览
18
+ 这个仓库包含了我们3D网格生成项目的源代码(训练/推理)、预训练权重和gradio演示代码,你可以在我们的[项目页面](https://github.com/Craftsman3D.github.io/)找到更多的可视化内容。如果你有高质量的3D数据或其他想法,我们非常欢迎任何形式的合作。
19
+ <details><summary>完整摘要</summary>
20
+ 我们提出了一个新颖的3D建模系统,匠心。它可以生成具有多样形状、规则网格拓扑和光滑表面的高保真3D几何,并且值得注意的是,它可以和人工建模流程一样以交互方式细化几何体。尽管3D生成领域取得了显著进展,但现有方法仍然难以应对漫长的优化过程、不规则的网格拓扑、嘈杂的表面以及难以适应用户编辑的问题,因此阻碍了它们在3D建模软件中的广泛采用和实施。我们的工作受到工匠建模的启发,他们通常会首先粗略地勾勒出作品的整体形状,然后详细描绘表面细节。具体来说,我们采用了一个3D原生扩散模型,该模型在从基于潜在集的3D表示学习到的潜在空间上操作,只需几秒钟就可以生成具有规则网格拓扑的粗糙几何体。特别是,这个过程以文本提示或参考图像作为输入,并利用强大的多视图(MV)二维扩散模型生成粗略几何体的多个视图,这些视图被输入到我们的多视角条件3D扩散模型中,用于生成3D几何,显著提高其了鲁棒性和泛化能力。随后,使用基于法线的几何细化器显著增强表面细节。这种细化可以自动执行,或者通过用户提供的编辑以交互方式进行。广泛的实验表明,我们的方法在生成优于现有方法的高质量3D资产方面十分高效。
21
+ </details>
22
+
23
+ <p align="center">
24
+ <img src="asset/teaser.jpg" >
25
+ </p>
26
+
27
+
28
+ ## 内容
29
+ * [视频](#Video)
30
+ * [预训练模型](##-Pretrained-models)
31
+ * [Gradio & Huggingface 示例](#Gradio-demo)
32
+ * [推理代码](#Inference)
33
+ * [训练代码](#Train)
34
+ * [数据准备](#data)
35
+ * [致谢](#Acknowledgements)
36
+ * [引用](#Bibtex)
37
+
38
+ ## 环境搭建
39
+
40
+ <details> <summary>硬件</summary>
41
+ 我们在32个A800 GPU上以每GPU 32的批量大小训练模型,训练了7天。
42
+
43
+ 网格细化部分在GTX 3080 GPU上执行。
44
+
45
+
46
+ </details>
47
+ <details> <summary>运行环境搭建</summary>
48
+
49
+ :smiley: 为了方便使用,我们提供了docker镜像文件[Setup using Docker](./docker/README.md).
50
+
51
+ - Python 3.10.0
52
+ - PyTorch 2.1.0
53
+ - Cuda Toolkit 11.8.0
54
+ - Ubuntu 22.04
55
+
56
+ 克隆这个仓库.
57
+
58
+ ```sh
59
+ git clone [email protected]:wyysf-98/CraftsMan.git
60
+ ```
61
+
62
+ 安装所需要的依赖包.
63
+
64
+ ```sh
65
+ conda create -n CraftsMan python=3.10
66
+ conda activate CraftsMan
67
+ conda install -c pytorch pytorch=2.3.0 torchvision=0.18.0 cudatoolkit=11.8 && \
68
+ pip install -r docker/requirements.txt
69
+ ```
70
+
71
+ </details>
72
+
73
+
74
+ # 🎥 视频
75
+
76
+ [![观看视频](asset/video_cover.png)](https://www.youtube.com/watch?v=WhEs4tS4mGo)
77
+
78
+
79
+ # 三维原生扩散模型 (Latent Set Diffusion Model)
80
+ 我们在这里提供了训练和推理代码,以便于未来的研究。
81
+ The latent set diffusion model 在很大程度上基于[Michelangelo](https://github.com/NeuralCarver/Michelangelo),
82
+ 采用了 [perceiver](https://github.com/google-deepmind/deepmind-research/blob/master/perceiver/perceiver.py) 架构,并且参数量仅为104M.
83
+
84
+ ## 预训练模型
85
+ 目前,我们提供了以4视图图像作为条件,并通过ModLN将相机信息注入到clip特征提取器的模型。
86
+ 我们将根据实际情况考虑开源进一步的模型。
87
+
88
+ 我们的推理脚本将自动下载模型。或者,您可以手动下载模型并将它们放在ckpts/目录下。
89
+
90
+
91
+ ## Gradio 示例
92
+ 我们提供了不同的文本/图像到多视角图像扩散模型的gradio演示,例如[CRM](https://github.com/thu-ml/CRM), [Wonder3D](https://github.com/xxlong0/Wonder3D/) and [LGM](https://github.com/3DTopia/LGM). 您可以选择不同的模型以获得更好的结果。要在本地机器上运行gradio演示,请简单运行:
93
+
94
+ ```bash
95
+ python app/
96
+ ```
97
+
98
+ ## 模型推理
99
+ 要通过命令行从图像文件夹生成3D网格,简单运行:
100
+
101
+ ```bash
102
+ python launch.py --config .configs/image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6.yaml \
103
+ --validate --gpu 0
104
+ ```
105
+ 我们默认使用 [rembg](https://github.com/danielgatis/rembg) 来通过前景对象分割。如果输入图像已经有alpha蒙版,请指定no_rembg标志符:
106
+
107
+ 如果您有其他视图的图像(左,右,背面),您可以通过下面指令指定图像:
108
+
109
+
110
+ ## 从头开始训练
111
+ 我们提供了我们的训练代码以方便未来的研究。我们将在接下来的几天内提供少量的数据样本。
112
+ 有关更多的训练细节和配置,请参考configs文件夹。
113
+
114
+ ```bash
115
+ ### training the shape-autoencoder
116
+ python launch.py --config ./configs/shape-autoencoder/l256-e64-ne8-nd16.yaml \
117
+ --train --gpu 0
118
+
119
+ ### training the image-to-shape diffusion model
120
+ python launch.py --config .configs/image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6.yaml \
121
+ --train --gpu 0
122
+
123
+ ```
124
+
125
+ # 2D法线增强扩散模型(即将推出)
126
+
127
+ 我们正在努力发布我们的三维网格细化代码。感谢您的耐心等待,我们将为这个激动人心的发展做最后的努力。" 🔧🚀
128
+
129
+ 您也可以在视频中找到网格细化部分的结果。
130
+
131
+
132
+ # ❓常见问题
133
+ 问题: 如何获得更好的结果?
134
+ 1. 匠心模型将多视图图像作为3D扩散模型的条件。通过我们的实验,与像([Wonder3D](https://github.com/xxlong0/Wonder3D/), [InstantMesh](https://github.com/TencentARC/InstantMesh/tree/main))这样的重建模型相比, 我们的方法对多视图不一致性更加稳健。由于我们依赖图像到MV模型,输入图像的面对方向非常重要,并且总是会导致良好的重建。
135
+ 2. 如果您有自己的多视图图像,这将是一个不错的选择来
136
+ 3. 就像2D扩散模型一样,尝试不同的随机数种子,调整CFG比例或不同的调度器。
137
+ 4. 我们将在后期考虑提供一个以文本提示为条件的版本,因此您可以使用一些正面和负面的提示。
138
+
139
+
140
+ # 💪 待办事项
141
+
142
+ - [x] 推理代码
143
+ - [x] 训练代码
144
+ - [x] Gradio & Hugging Face演示
145
+ - [x] 模型库,我们将在未来发布更多的ckpt
146
+ - [ ] 环境设置
147
+ - [ ] 数据样本
148
+ - [ ] Google Colab示例
149
+ - [ ] 网格细化代码
150
+
151
+
152
+ # 🤗 致谢
153
+
154
+ - 感谢[光影幻像](https://www.lightillusions.com/)提供计算资源和潘建雄进行数据预处理。如果您对高质量的3D生成有任何想法,欢迎与我们联系!
155
+ - Thanks to [Hugging Face](https://github.com/huggingface) for sponsoring the nicely demo!
156
+ - Thanks to [3DShape2VecSet](https://github.com/1zb/3DShape2VecSet/tree/master) for their amazing work, the latent set representation provides an efficient way to represent 3D shape!
157
+ - Thanks to [Michelangelo](https://github.com/NeuralCarver/Michelangelo) for their great work, our model structure is heavily build on this repo!
158
+ - Thanks to [CRM](https://github.com/thu-ml/CRM), [Wonder3D](https://github.com/xxlong0/Wonder3D/) and [LGM](https://github.com/3DTopia/LGM) for their released model about multi-view images generation. If you have a more advanced version and want to contribute to the community, we are welcome to update.
159
+ - 感谢 [Objaverse](https://objaverse.allenai.org/), [Objaverse-MIX](https://huggingface.co/datasets/BAAI/Objaverse-MIX/tree/main) 开源的数据,这帮助我们进行了许多验证实验。
160
+ - 感谢 [ThreeStudio](https://github.com/threestudio-project/threestudio) 实现了一个完整的框架,我们参考他们出色且易于使用的代码结构。
161
+
162
+ # 📑许可证
163
+ CraftsMan在[AGPL-3.0](https://www.gnu.org/licenses/agpl-3.0.en.html)下,因此任何包含CraftsMan代码或训练模型(无论是预训练还是自定义训练)的下游解决方案和产品(包括云服务)都应该是开源的,以符合AGPL的条件。如果您对CraftsMan的使用有任何疑问,请先与我们联系。
164
+
165
+ # 📖 BibTeX
166
+
167
+ @misc{li2024craftsman,
168
+ title = {CraftsMan: High-fidelity Mesh Generation with 3D Native Generation and Interactive Geometry Refiner},
169
+ author = {Weiyu Li and Jiarui Liu and Rui Chen and Yixun Liang and Xuelin Chen and Ping Tan and Xiaoxiao Long},
170
+ year = {2024},
171
+ archivePrefix = {arXiv},
172
+ primaryClass = {cs.CG}
173
+ }
apps/.vscode/launch.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ // Use IntelliSense to learn about possible attributes.
3
+ // Hover to view descriptions of existing attributes.
4
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
+ "version": "0.2.0",
6
+ "configurations": [
7
+ {
8
+ "name": "Python Debugger: Current File",
9
+ "type": "debugpy",
10
+ "request": "launch",
11
+ "program": "${file}",
12
+ "console": "integratedTerminal"
13
+ }
14
+ ]
15
+ }
apps/__pycache__/mv_models.cpython-310.pyc ADDED
Binary file (5.38 kB). View file
 
apps/__pycache__/mv_models.cpython-38.pyc ADDED
Binary file (5.33 kB). View file
 
apps/__pycache__/utils.cpython-310.pyc ADDED
Binary file (7.54 kB). View file
 
apps/__pycache__/utils.cpython-38.pyc ADDED
Binary file (7.52 kB). View file
 
apps/examples/1_cute_girl.webp ADDED
apps/examples/blue_monster.webp ADDED
apps/examples/boy.webp ADDED
apps/examples/boy2.webp ADDED
apps/examples/bulldog.webp ADDED
apps/examples/catman.webp ADDED
apps/examples/cyberpunk_man.webp ADDED
apps/examples/dinosaur_boy.webp ADDED
apps/examples/dog.webp ADDED
apps/examples/doraemon.webp ADDED
apps/examples/dragon.webp ADDED
apps/examples/elf.webp ADDED
apps/examples/ghost-eating-burger.webp ADDED
apps/examples/girl1.webp ADDED
apps/examples/gun.webp ADDED
apps/examples/kunkun.webp ADDED
apps/examples/link.webp ADDED
apps/examples/mushroom1.webp ADDED
apps/examples/mushroom2.webp ADDED
apps/examples/pikachu.webp ADDED
apps/examples/plants.webp ADDED
apps/examples/rose.webp ADDED
apps/examples/shoe.webp ADDED
apps/examples/sports_girl.webp ADDED
apps/examples/stone.webp ADDED
apps/examples/sweater.webp ADDED
apps/examples/sword.webp ADDED
apps/examples/teapot.webp ADDED
apps/examples/toy1.webp ADDED
apps/examples/toy_bear.webp ADDED
apps/examples/toy_dog.webp ADDED
apps/examples/toy_pig.webp ADDED
apps/examples/toy_rabbit.webp ADDED
apps/examples/wings.webp ADDED
apps/gradio_app.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import json
4
+ import torch
5
+ import sys
6
+ import time
7
+ import importlib
8
+ import numpy as np
9
+ from omegaconf import OmegaConf
10
+ from huggingface_hub import hf_hub_download
11
+
12
+ from collections import OrderedDict
13
+ import trimesh
14
+ from einops import repeat, rearrange
15
+ import pytorch_lightning as pl
16
+ from typing import Dict, Optional, Tuple, List
17
+ import gradio as gr
18
+ from utils import *
19
+
20
+ proj_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
21
+ sys.path.append(os.path.join(proj_dir))
22
+
23
+ import craftsman
24
+ from craftsman.systems.base import BaseSystem
25
+ from craftsman.utils.config import ExperimentConfig, load_config
26
+
27
+ from mv_models import GenMVImage
28
+
29
+ _TITLE = '''CraftsMan: High-fidelity Mesh Generation with 3D Native Generation and Interactive Geometry Refiner'''
30
+ _DESCRIPTION = '''
31
+ <div>
32
+ Select or upload a image, then just click 'Generate'.
33
+ <br>
34
+ By mimicking the artist/craftsman modeling workflow, we propose CraftsMan (aka 匠心) that uses 3D Latent Set Diffusion Model that directly generate coarse meshes,
35
+ then a multi-view normal enhanced image generation model is used to refine the mesh.
36
+ We provide the coarse 3D diffusion part here.
37
+ <br>
38
+ If you found Crafts is helpful, please help to ⭐ the <a href='https://github.com/wyysf-98/CraftsMan/' target='_blank'>Github Repo</a>. Thanks!
39
+ <a style="display:inline-block; margin-left: .5em" href='https://github.com/wyysf-98/CraftsMan/'><img src='https://img.shields.io/github/stars/wyysf-98/CraftsMan?style=social' /></a>
40
+ <br>
41
+ *please note that the model is fliped due to the gradio viewer, please download the obj file and you will get the correct mesh.
42
+ <br>
43
+ *If you have your own multi-view images, you can directly upload it.
44
+ </div>
45
+ '''
46
+ _CITE_ = r"""
47
+ ---
48
+ 📝 **Citation**
49
+ If you find our work useful for your research or applications, please cite using this bibtex:
50
+ ```bibtex
51
+ @article{craftsman,
52
+ author = {Weiyu Li and Jiarui Liu and Rui Chen and Yixun Liang and Xuelin Chen and Ping Tan and Xiaoxiao Long},
53
+ title = {CraftsMan: High-fidelity Mesh Generation with 3D Native Generation and Interactive Geometry Refiner},
54
+ journal = {arxiv:xxx},
55
+ year = {2024},
56
+ }
57
+ ```
58
+ 🤗 **Acknowledgements**
59
+ We use <a href='https://github.com/wjakob/instant-meshes' target='_blank'>Instant Meshes</a> to remesh the generated mesh to a lower face count, thanks to the authors for the great work.
60
+ 📋 **License**
61
+ CraftsMan is under [AGPL-3.0](https://www.gnu.org/licenses/agpl-3.0.en.html), so any downstream solution and products (including cloud services) that include CraftsMan code or a trained model (both pretrained or custom trained) inside it should be open-sourced to comply with the AGPL conditions. If you have any questions about the usage of CraftsMan, please contact us first.
62
+ 📧 **Contact**
63
+ If you have any questions, feel free to open a discussion or contact us at <b>[email protected]</b>.
64
+ """
65
+
66
+ model = None
67
+ cached_dir = None
68
+
69
+ def image2mesh(view_front: np.ndarray,
70
+ view_right: np.ndarray,
71
+ view_back: np.ndarray,
72
+ view_left: np.ndarray,
73
+ more: bool = False,
74
+ scheluder_name: str ="DDIMScheduler",
75
+ guidance_scale: int = 7.5,
76
+ seed: int = 4,
77
+ octree_depth: int = 7):
78
+
79
+ sample_inputs = {
80
+ "mvimages": [[
81
+ Image.fromarray(view_front),
82
+ Image.fromarray(view_right),
83
+ Image.fromarray(view_back),
84
+ Image.fromarray(view_left)
85
+ ]]
86
+ }
87
+
88
+ global model
89
+ latents = model.sample(
90
+ sample_inputs,
91
+ sample_times=1,
92
+ guidance_scale=guidance_scale,
93
+ return_intermediates=False,
94
+ seed=seed
95
+
96
+ )[0]
97
+
98
+ # decode the latents to mesh
99
+ box_v = 1.1
100
+ mesh_outputs, _ = model.shape_model.extract_geometry(
101
+ latents,
102
+ bounds=[-box_v, -box_v, -box_v, box_v, box_v, box_v],
103
+ octree_depth=octree_depth
104
+ )
105
+ assert len(mesh_outputs) == 1, "Only support single mesh output for gradio demo"
106
+ mesh = trimesh.Trimesh(mesh_outputs[0][0], mesh_outputs[0][1])
107
+ filepath = f"{cached_dir}/{time.time()}.obj"
108
+ mesh.export(filepath, include_normals=True)
109
+
110
+ if 'Remesh' in more:
111
+ print("Remeshing with Instant Meshes...")
112
+ target_face_count = int(len(mesh.faces)/10)
113
+ command = f"{proj_dir}/apps/third_party/InstantMeshes {filepath} -f {target_face_count} -d -S 0 -r 6 -p 6 -o {filepath.replace('.obj', '_remeshed.obj')}"
114
+ os.system(command)
115
+ filepath = filepath.replace('.obj', '_remeshed.obj')
116
+
117
+ return filepath
118
+
119
+ if __name__=="__main__":
120
+ parser = argparse.ArgumentParser()
121
+ # parser.add_argument("--model_path", type=str, required=True, help="Path to the object file",)
122
+ parser.add_argument("--cached_dir", type=str, default="./gradio_cached_dir")
123
+ parser.add_argument("--device", type=int, default=0)
124
+ args = parser.parse_args()
125
+
126
+ cached_dir = args.cached_dir
127
+ os.makedirs(args.cached_dir, exist_ok=True)
128
+ device = torch.device(f"cuda:{args.device}" if torch.cuda.is_available() else "cpu")
129
+ print(f"using device: {device}")
130
+
131
+ # for multi-view images generation
132
+ background_choice = OrderedDict({
133
+ "Alpha as Mask": "Alpha as Mask",
134
+ "Auto Remove Background": "Auto Remove Background",
135
+ "Original Image": "Original Image",
136
+ })
137
+ mvimg_model_config_list = ["CRM", "ImageDream", "Wonder3D"]
138
+
139
+ # for 3D latent set diffusion
140
+ # for 3D latent set diffusion
141
+ ckpt_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/model.ckpt", repo_type="model")
142
+ config_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/config.yaml", repo_type="model")
143
+ scheluder_dict = OrderedDict({
144
+ "DDIMScheduler": 'diffusers.schedulers.DDIMScheduler',
145
+ # "DPMSolverMultistepScheduler": 'diffusers.schedulers.DPMSolverMultistepScheduler', # not support yet
146
+ # "UniPCMultistepScheduler": 'diffusers.schedulers.UniPCMultistepScheduler', # not support yet
147
+ })
148
+
149
+ # main GUI
150
+ custom_theme = gr.themes.Soft(primary_hue="blue").set(
151
+ button_secondary_background_fill="*neutral_100",
152
+ button_secondary_background_fill_hover="*neutral_200")
153
+ custom_css = '''#disp_image {
154
+ text-align: center; /* Horizontally center the content */
155
+ }'''
156
+
157
+ with gr.Blocks(title=_TITLE, theme=custom_theme, css=custom_css) as demo:
158
+ with gr.Row():
159
+ with gr.Column(scale=1):
160
+ gr.Markdown('# ' + _TITLE)
161
+ gr.Markdown(_DESCRIPTION)
162
+
163
+ with gr.Row():
164
+ with gr.Column(scale=2):
165
+ with gr.Row():
166
+ image_input = gr.Image(
167
+ label="Image Input",
168
+ image_mode="RGBA",
169
+ sources="upload",
170
+ type="pil",
171
+ )
172
+ with gr.Row():
173
+ text = gr.Textbox(label="Prompt (Optional, only works for mvdream)", visible=False)
174
+ with gr.Row():
175
+ gr.Markdown('''Try a different <b>seed</b> if the result is unsatisfying. Good Luck :)''')
176
+ with gr.Row():
177
+ seed = gr.Number(42, label='Seed', show_label=True)
178
+ more = gr.CheckboxGroup(["Remesh", "Symmetry(TBD)"], label="More", show_label=False)
179
+ # remesh = gr.Checkbox(value=False, label='Remesh')
180
+ # symmetry = gr.Checkbox(value=False, label='Symmetry(TBD)', interactive=False)
181
+ run_btn = gr.Button('Generate', variant='primary', interactive=True)
182
+
183
+ with gr.Row():
184
+ gr.Examples(
185
+ examples=[os.path.join("./apps/examples", i) for i in os.listdir("./apps/examples")],
186
+ inputs=[image_input],
187
+ examples_per_page=8
188
+ )
189
+
190
+ with gr.Column(scale=4):
191
+ with gr.Row():
192
+ output_model_obj = gr.Model3D(
193
+ label="Output Model (OBJ Format)",
194
+ camera_position=(90.0, 90.0, 3.5),
195
+ interactive=False,
196
+ )
197
+
198
+ with gr.Row():
199
+ view_front = gr.Image(label="Front", interactive=True, show_label=True)
200
+ view_right = gr.Image(label="Right", interactive=True, show_label=True)
201
+ view_back = gr.Image(label="Back", interactive=True, show_label=True)
202
+ view_left = gr.Image(label="Left", interactive=True, show_label=True)
203
+
204
+ with gr.Accordion('Advanced options', open=False):
205
+ with gr.Row(equal_height=True):
206
+ run_mv_btn = gr.Button('Only Generate 2D', interactive=True)
207
+ run_3d_btn = gr.Button('Only Generate 3D', interactive=True)
208
+
209
+ with gr.Accordion('Advanced options (2D)', open=False):
210
+ with gr.Row():
211
+ crop_size = gr.Number(224, label='Crop size')
212
+ mvimg_model = gr.Dropdown(value="CRM", label="MV Image Model", choices=mvimg_model_config_list)
213
+
214
+ with gr.Row():
215
+ foreground_ratio = gr.Slider(
216
+ label="Foreground Ratio",
217
+ minimum=0.5,
218
+ maximum=1.0,
219
+ value=1.0,
220
+ step=0.05,
221
+ )
222
+
223
+ with gr.Row():
224
+ background_choice = gr.Dropdown(label="Backgroud Choice", value="Auto Remove Background",choices=list(background_choice.keys()))
225
+ rmbg_type = gr.Dropdown(label="Backgroud Remove Type", value="rembg",choices=['sam', "rembg"])
226
+ backgroud_color = gr.ColorPicker(label="Background Color", value="#FFFFFF", interactive=True)
227
+
228
+ with gr.Row():
229
+ mvimg_guidance_scale = gr.Number(value=3.5, minimum=3, maximum=10, label="2D Guidance Scale")
230
+ mvimg_steps = gr.Number(value=50, minimum=20, maximum=100, label="2D Sample Steps", precision=0)
231
+
232
+ with gr.Accordion('Advanced options (3D)', open=False):
233
+ with gr.Row():
234
+ guidance_scale = gr.Number(label="3D Guidance Scale", value=7.5, minimum=3.0, maximum=10.0)
235
+ steps = gr.Number(value=50, minimum=20, maximum=100, label="3D Sample Steps", precision=0)
236
+
237
+ with gr.Row():
238
+ scheduler = gr.Dropdown(label="scheluder", value="DDIMScheduler",choices=list(scheluder_dict.keys()))
239
+ octree_depth = gr.Slider(label="Octree Depth", value=7, minimum=4, maximum=8, step=1)
240
+
241
+ gr.Markdown(_CITE_)
242
+
243
+ outputs = [output_model_obj]
244
+ rmbg = RMBG(device)
245
+
246
+ gen_mvimg = GenMVImage(device)
247
+ model = load_model(ckpt_path, config_path, device)
248
+
249
+ run_btn.click(fn=check_input_image, inputs=[image_input]
250
+ ).success(
251
+ fn=rmbg.run,
252
+ inputs=[rmbg_type, image_input, crop_size, foreground_ratio, background_choice, backgroud_color],
253
+ outputs=[image_input]
254
+ ).success(
255
+ fn=gen_mvimg.run,
256
+ inputs=[mvimg_model, text, image_input, crop_size, seed, mvimg_guidance_scale, mvimg_steps],
257
+ outputs=[view_front, view_right, view_back, view_left]
258
+ ).success(
259
+ fn=image2mesh,
260
+ inputs=[view_front, view_right, view_back, view_left, more, scheduler, guidance_scale, seed, octree_depth],
261
+ outputs=outputs,
262
+ api_name="generate_img2obj")
263
+ run_mv_btn.click(fn=gen_mvimg.run,
264
+ inputs=[mvimg_model, text, image_input, crop_size, seed, mvimg_guidance_scale, mvimg_steps],
265
+ outputs=[view_front, view_right, view_back, view_left]
266
+ )
267
+ run_3d_btn.click(fn=image2mesh,
268
+ inputs=[view_front, view_right, view_back, view_left, more, scheduler, guidance_scale, seed, octree_depth],
269
+ outputs=outputs,
270
+ api_name="generate_img2obj")
271
+
272
+ demo.queue().launch(share=True, allowed_paths=[args.cached_dir])
apps/mv_models.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import torch
4
+ import PIL
5
+ from PIL import Image
6
+ import os
7
+ import sys
8
+ import rembg
9
+ import time
10
+ import json
11
+ import cv2
12
+ from datetime import datetime
13
+ from einops import repeat, rearrange
14
+ from omegaconf import OmegaConf
15
+ from typing import Dict, Optional, Tuple, List
16
+ from dataclasses import dataclass
17
+ from .utils import *
18
+ from huggingface_hub import hf_hub_download
19
+
20
+ parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
21
+
22
+ class GenMVImage(object):
23
+ def __init__(self, device):
24
+ self.seed = 1024
25
+ self.guidance_scale = 7.5
26
+ self.step = 50
27
+ self.pipelines = {}
28
+ self.device = device
29
+
30
+ def gen_image_from_crm(self, image):
31
+
32
+ from .third_party.CRM.pipelines import TwoStagePipeline
33
+ specs = json.load(open(f"{parent_dir}/apps/third_party/CRM/configs/specs_objaverse_total.json"))
34
+ stage1_config = OmegaConf.load(f"{parent_dir}/apps/third_party/CRM/configs/nf7_v3_SNR_rd_size_stroke.yaml").config
35
+ stage1_sampler_config = stage1_config.sampler
36
+ stage1_model_config = stage1_config.models
37
+ stage1_model_config.resume = hf_hub_download(repo_id="Zhengyi/CRM", filename="pixel-diffusion.pth", repo_type="model")
38
+ stage1_model_config.config = f"{parent_dir}/apps/third_party/CRM/" + stage1_model_config.config
39
+ if "crm" in self.pipelines.keys():
40
+ pipeline = self.pipelines['crm']
41
+ else:
42
+ self.pipelines['crm'] = TwoStagePipeline(
43
+ stage1_model_config,
44
+ stage1_sampler_config,
45
+ device=self.device,
46
+ dtype=torch.float16
47
+ )
48
+ pipeline = self.pipelines['crm']
49
+ pipeline.set_seed(self.seed)
50
+ rt_dict = pipeline(image, scale=self.guidance_scale, step=self.step)
51
+ mv_imgs = rt_dict["stage1_images"]
52
+ return mv_imgs[5], mv_imgs[3], mv_imgs[2], mv_imgs[0]
53
+
54
+ def gen_image_from_mvdream(self, image, text):
55
+ from .third_party.mvdream_diffusers.pipeline_mvdream import MVDreamPipeline
56
+ if image is None:
57
+ if "mvdream" in self.pipelines.keys():
58
+ pipe_MVDream = self.pipelines['mvdream']
59
+ else:
60
+ self.pipelines['mvdream'] = MVDreamPipeline.from_pretrained(
61
+ "ashawkey/mvdream-sd2.1-diffusers", # remote weights
62
+ torch_dtype=torch.float16,
63
+ trust_remote_code=True,
64
+ )
65
+ self.pipelines['mvdream'] = self.pipelines['mvdream'].to(self.device)
66
+ pipe_MVDream = self.pipelines['mvdream']
67
+ mv_imgs = pipe_MVDream(
68
+ text,
69
+ negative_prompt="ugly, deformed, disfigured, poor details, bad anatomy",
70
+ num_inference_steps=self.step,
71
+ guidance_scale=self.guidance_scale,
72
+ generator = torch.Generator(self.device).manual_seed(self.seed)
73
+ )
74
+ else:
75
+ image = np.array(image)
76
+ image = image.astype(np.float32) / 255.0
77
+ image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
78
+ if "imagedream" in self.pipelines.keys():
79
+ pipe_imagedream = self.pipelines['imagedream']
80
+ else:
81
+ self.pipelines['imagedream'] = MVDreamPipeline.from_pretrained(
82
+ "ashawkey/imagedream-ipmv-diffusers", # remote weights
83
+ torch_dtype=torch.float16,
84
+ trust_remote_code=True,
85
+ )
86
+ self.pipelines['imagedream'] = self.pipelines['imagedream'].to(self.device)
87
+ pipe_imagedream = self.pipelines['imagedream']
88
+ mv_imgs = pipe_imagedream(
89
+ text,
90
+ image,
91
+ negative_prompt="ugly, deformed, disfigured, poor details, bad anatomy",
92
+ num_inference_steps=self.step,
93
+ guidance_scale=self.guidance_scale,
94
+ generator = torch.Generator(self.device).manual_seed(self.seed)
95
+ )
96
+ return mv_imgs[1], mv_imgs[2], mv_imgs[3], mv_imgs[0]
97
+
98
+ def gen_image_from_wonder3d(self, image, crop_size):
99
+ sys.path.append(f"{parent_dir}/apps/third_party/Wonder3D")
100
+ from .third_party.Wonder3D.mvdiffusion.pipelines.pipeline_mvdiffusion_image import MVDiffusionImagePipeline
101
+ weight_dtype = torch.float16
102
+ batch = prepare_data(image, crop_size)
103
+
104
+ if "wonder3d" in self.pipelines.keys():
105
+ pipeline = self.pipelines['wonder3d']
106
+ else:
107
+ self.pipelines['wonder3d'] = MVDiffusionImagePipeline.from_pretrained(
108
+ 'flamehaze1115/wonder3d-v1.0',
109
+ custom_pipeline=f'{parent_dir}/apps/third_party/Wonder3D/mvdiffusion/pipelines/pipeline_mvdiffusion_image.py',
110
+ torch_dtype=weight_dtype
111
+ )
112
+ self.pipelines['wonder3d'].unet.enable_xformers_memory_efficient_attention()
113
+ self.pipelines['wonder3d'].to(self.device)
114
+ self.pipelines['wonder3d'].set_progress_bar_config(disable=True)
115
+ pipeline = self.pipelines['wonder3d']
116
+
117
+ generator = torch.Generator(device=pipeline.unet.device).manual_seed(self.seed)
118
+ # repeat (2B, Nv, 3, H, W)
119
+ imgs_in = torch.cat([batch['imgs_in']] * 2, dim=0).to(weight_dtype)
120
+
121
+ # (2B, Nv, Nce)
122
+ camera_embeddings = torch.cat([batch['camera_embeddings']] * 2, dim=0).to(weight_dtype)
123
+
124
+ task_embeddings = torch.cat([batch['normal_task_embeddings'], batch['color_task_embeddings']], dim=0).to(weight_dtype)
125
+
126
+ camera_embeddings = torch.cat([camera_embeddings, task_embeddings], dim=-1).to(weight_dtype)
127
+
128
+ # (B*Nv, 3, H, W)
129
+ imgs_in = rearrange(imgs_in, "Nv C H W -> (Nv) C H W")
130
+ # (B*Nv, Nce)
131
+
132
+ out = pipeline(
133
+ imgs_in,
134
+ # camera_embeddings,
135
+ generator=generator,
136
+ guidance_scale=self.guidance_scale,
137
+ num_inference_steps=self.step,
138
+ output_type='pt',
139
+ num_images_per_prompt=1,
140
+ **{'eta': 1.0},
141
+ ).images
142
+
143
+ bsz = out.shape[0] // 2
144
+ normals_pred = out[:bsz]
145
+ images_pred = out[bsz:]
146
+
147
+ normals_pred = [save_image(normals_pred[i]) for i in range(bsz)]
148
+ images_pred = [save_image(images_pred[i]) for i in range(bsz)]
149
+
150
+ mv_imgs = images_pred
151
+ return mv_imgs[0], mv_imgs[2], mv_imgs[4], mv_imgs[5]
152
+
153
+ def run(self, mvimg_model, text, image, crop_size, seed, guidance_scale, step):
154
+ self.seed = seed
155
+ self.guidance_scale = guidance_scale
156
+ self.step = step
157
+ if mvimg_model.upper() == "CRM":
158
+ return self.gen_image_from_crm(image)
159
+ elif mvimg_model.upper() == "IMAGEDREAM":
160
+ return self.gen_image_from_mvdream(image, text)
161
+ elif mvimg_model.upper() == "WONDER3D":
162
+ return self.gen_image_from_wonder3d(image, crop_size)
apps/third_party/CRM/.gitignore ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ out/
apps/third_party/CRM/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 TSAIL group
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
apps/third_party/CRM/README.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Convolutional Reconstruction Model
2
+
3
+ Official implementation for *CRM: Single Image to 3D Textured Mesh with Convolutional Reconstruction Model*.
4
+
5
+ **CRM is a feed-forward model which can generate 3D textured mesh in 10 seconds.**
6
+
7
+ ## [Project Page](https://ml.cs.tsinghua.edu.cn/~zhengyi/CRM/) | [Arxiv](https://arxiv.org/abs/2403.05034) | [HF-Demo](https://huggingface.co/spaces/Zhengyi/CRM) | [Weights](https://huggingface.co/Zhengyi/CRM)
8
+
9
+ https://github.com/thu-ml/CRM/assets/40787266/8b325bc0-aa74-4c26-92e8-a8f0c1079382
10
+
11
+ ## Try CRM 🍻
12
+ * Try CRM at [Huggingface Demo](https://huggingface.co/spaces/Zhengyi/CRM).
13
+ * Try CRM at [Replicate Demo](https://replicate.com/camenduru/crm). Thanks [@camenduru](https://github.com/camenduru)!
14
+
15
+ ## Install
16
+
17
+ ### Step 1 - Base
18
+
19
+ Install package one by one, we use **python 3.9**
20
+
21
+ ```bash
22
+ pip install torch==1.13.0+cu117 torchvision==0.14.0+cu117 torchaudio==0.13.0 --extra-index-url https://download.pytorch.org/whl/cu117
23
+ pip install torch-scatter==2.1.1 -f https://data.pyg.org/whl/torch-1.13.1+cu117.html
24
+ pip install kaolin==0.14.0 -f https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-1.13.1_cu117.html
25
+ pip install -r requirements.txt
26
+ ```
27
+
28
+ besides, one by one need to install xformers manually according to the official [doc](https://github.com/facebookresearch/xformers?tab=readme-ov-file#installing-xformers) (**conda no need**), e.g.
29
+
30
+ ```bash
31
+ pip install ninja
32
+ pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
33
+ ```
34
+
35
+ ### Step 2 - Nvdiffrast
36
+
37
+ Install nvdiffrast according to the official [doc](https://nvlabs.github.io/nvdiffrast/#installation), e.g.
38
+
39
+ ```bash
40
+ pip install git+https://github.com/NVlabs/nvdiffrast
41
+ ```
42
+
43
+
44
+
45
+ ## Inference
46
+
47
+ We suggest gradio for a visualized inference.
48
+
49
+ ```
50
+ gradio app.py
51
+ ```
52
+
53
+ ![image](https://github.com/thu-ml/CRM/assets/40787266/4354d22a-a641-4531-8408-c761ead8b1a2)
54
+
55
+ For inference in command lines, simply run
56
+ ```bash
57
+ CUDA_VISIBLE_DEVICES="0" python run.py --inputdir "examples/kunkun.webp"
58
+ ```
59
+ It will output the preprocessed image, generated 6-view images and CCMs and a 3D model in obj format.
60
+
61
+ **Tips:** (1) If the result is unsatisfatory, please check whether the input image is correctly pre-processed into a grey background. Otherwise the results will be unpredictable.
62
+ (2) Different from the [Huggingface Demo](https://huggingface.co/spaces/Zhengyi/CRM), this official implementation uses UV texture instead of vertex color. It has better texture than the online demo but longer generating time owing to the UV texturing.
63
+
64
+ ## Todo List
65
+ - [x] Release inference code.
66
+ - [x] Release pretrained models.
67
+ - [ ] Optimize inference code to fit in low memery GPU.
68
+ - [ ] Upload training code.
69
+
70
+ ## Acknowledgement
71
+ - [ImageDream](https://github.com/bytedance/ImageDream)
72
+ - [nvdiffrast](https://github.com/NVlabs/nvdiffrast)
73
+ - [kiuikit](https://github.com/ashawkey/kiuikit)
74
+ - [GET3D](https://github.com/nv-tlabs/GET3D)
75
+
76
+ ## Citation
77
+
78
+ ```
79
+ @article{wang2024crm,
80
+ title={CRM: Single Image to 3D Textured Mesh with Convolutional Reconstruction Model},
81
+ author={Zhengyi Wang and Yikai Wang and Yifei Chen and Chendong Xiang and Shuo Chen and Dajiang Yu and Chongxuan Li and Hang Su and Jun Zhu},
82
+ journal={arXiv preprint arXiv:2403.05034},
83
+ year={2024}
84
+ }
85
+ ```
apps/third_party/CRM/__init__.py ADDED
File without changes
apps/third_party/CRM/app.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Not ready to use yet
2
+ import argparse
3
+ import numpy as np
4
+ import gradio as gr
5
+ from omegaconf import OmegaConf
6
+ import torch
7
+ from PIL import Image
8
+ import PIL
9
+ from pipelines import TwoStagePipeline
10
+ from huggingface_hub import hf_hub_download
11
+ import os
12
+ import rembg
13
+ from typing import Any
14
+ import json
15
+ import os
16
+ import json
17
+ import argparse
18
+
19
+ from model import CRM
20
+ from inference import generate3d
21
+
22
+ pipeline = None
23
+ rembg_session = rembg.new_session()
24
+
25
+
26
+ def expand_to_square(image, bg_color=(0, 0, 0, 0)):
27
+ # expand image to 1:1
28
+ width, height = image.size
29
+ if width == height:
30
+ return image
31
+ new_size = (max(width, height), max(width, height))
32
+ new_image = Image.new("RGBA", new_size, bg_color)
33
+ paste_position = ((new_size[0] - width) // 2, (new_size[1] - height) // 2)
34
+ new_image.paste(image, paste_position)
35
+ return new_image
36
+
37
+ def check_input_image(input_image):
38
+ if input_image is None:
39
+ raise gr.Error("No image uploaded!")
40
+
41
+
42
+ def remove_background(
43
+ image: PIL.Image.Image,
44
+ rembg_session = None,
45
+ force: bool = False,
46
+ **rembg_kwargs,
47
+ ) -> PIL.Image.Image:
48
+ do_remove = True
49
+ if image.mode == "RGBA" and image.getextrema()[3][0] < 255:
50
+ # explain why current do not rm bg
51
+ print("alhpa channl not enpty, skip remove background, using alpha channel as mask")
52
+ background = Image.new("RGBA", image.size, (0, 0, 0, 0))
53
+ image = Image.alpha_composite(background, image)
54
+ do_remove = False
55
+ do_remove = do_remove or force
56
+ if do_remove:
57
+ image = rembg.remove(image, session=rembg_session, **rembg_kwargs)
58
+ return image
59
+
60
+ def do_resize_content(original_image: Image, scale_rate):
61
+ # resize image content wile retain the original image size
62
+ if scale_rate != 1:
63
+ # Calculate the new size after rescaling
64
+ new_size = tuple(int(dim * scale_rate) for dim in original_image.size)
65
+ # Resize the image while maintaining the aspect ratio
66
+ resized_image = original_image.resize(new_size)
67
+ # Create a new image with the original size and black background
68
+ padded_image = Image.new("RGBA", original_image.size, (0, 0, 0, 0))
69
+ paste_position = ((original_image.width - resized_image.width) // 2, (original_image.height - resized_image.height) // 2)
70
+ padded_image.paste(resized_image, paste_position)
71
+ return padded_image
72
+ else:
73
+ return original_image
74
+
75
+ def add_background(image, bg_color=(255, 255, 255)):
76
+ # given an RGBA image, alpha channel is used as mask to add background color
77
+ background = Image.new("RGBA", image.size, bg_color)
78
+ return Image.alpha_composite(background, image)
79
+
80
+
81
+ def preprocess_image(image, background_choice, foreground_ratio, backgroud_color):
82
+ """
83
+ input image is a pil image in RGBA, return RGB image
84
+ """
85
+ print(background_choice)
86
+ if background_choice == "Alpha as mask":
87
+ background = Image.new("RGBA", image.size, (0, 0, 0, 0))
88
+ image = Image.alpha_composite(background, image)
89
+ else:
90
+ image = remove_background(image, rembg_session, force_remove=True)
91
+ image = do_resize_content(image, foreground_ratio)
92
+ image = expand_to_square(image)
93
+ image = add_background(image, backgroud_color)
94
+ return image.convert("RGB")
95
+
96
+
97
+ def gen_image(input_image, seed, scale, step):
98
+ global pipeline, model, args
99
+ pipeline.set_seed(seed)
100
+ rt_dict = pipeline(input_image, scale=scale, step=step)
101
+ stage1_images = rt_dict["stage1_images"]
102
+ stage2_images = rt_dict["stage2_images"]
103
+ np_imgs = np.concatenate(stage1_images, 1)
104
+ np_xyzs = np.concatenate(stage2_images, 1)
105
+
106
+ glb_path, obj_path = generate3d(model, np_imgs, np_xyzs, args.device)
107
+ return Image.fromarray(np_imgs), Image.fromarray(np_xyzs), glb_path, obj_path
108
+
109
+
110
+ parser = argparse.ArgumentParser()
111
+ parser.add_argument(
112
+ "--stage1_config",
113
+ type=str,
114
+ default="configs/nf7_v3_SNR_rd_size_stroke.yaml",
115
+ help="config for stage1",
116
+ )
117
+ parser.add_argument(
118
+ "--stage2_config",
119
+ type=str,
120
+ default="configs/stage2-v2-snr.yaml",
121
+ help="config for stage2",
122
+ )
123
+
124
+ parser.add_argument("--device", type=str, default="cuda")
125
+ args = parser.parse_args()
126
+
127
+ crm_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="CRM.pth")
128
+ specs = json.load(open("configs/specs_objaverse_total.json"))
129
+ model = CRM(specs).to(args.device)
130
+ model.load_state_dict(torch.load(crm_path, map_location = args.device), strict=False)
131
+
132
+ stage1_config = OmegaConf.load(args.stage1_config).config
133
+ stage2_config = OmegaConf.load(args.stage2_config).config
134
+ stage2_sampler_config = stage2_config.sampler
135
+ stage1_sampler_config = stage1_config.sampler
136
+
137
+ stage1_model_config = stage1_config.models
138
+ stage2_model_config = stage2_config.models
139
+
140
+ xyz_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="ccm-diffusion.pth")
141
+ pixel_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="pixel-diffusion.pth")
142
+ stage1_model_config.resume = pixel_path
143
+ stage2_model_config.resume = xyz_path
144
+
145
+ pipeline = TwoStagePipeline(
146
+ stage1_model_config,
147
+ stage2_model_config,
148
+ stage1_sampler_config,
149
+ stage2_sampler_config,
150
+ device=args.device,
151
+ dtype=torch.float16
152
+ )
153
+
154
+ with gr.Blocks() as demo:
155
+ gr.Markdown("# CRM: Single Image to 3D Textured Mesh with Convolutional Reconstruction Model")
156
+ with gr.Row():
157
+ with gr.Column():
158
+ with gr.Row():
159
+ image_input = gr.Image(
160
+ label="Image input",
161
+ image_mode="RGBA",
162
+ sources="upload",
163
+ type="pil",
164
+ )
165
+ processed_image = gr.Image(label="Processed Image", interactive=False, type="pil", image_mode="RGB")
166
+ with gr.Row():
167
+ with gr.Column():
168
+ with gr.Row():
169
+ background_choice = gr.Radio([
170
+ "Alpha as mask",
171
+ "Auto Remove background"
172
+ ], value="Auto Remove background",
173
+ label="backgroud choice")
174
+ # do_remove_background = gr.Checkbox(label=, value=True)
175
+ # force_remove = gr.Checkbox(label=, value=False)
176
+ back_groud_color = gr.ColorPicker(label="Background Color", value="#7F7F7F", interactive=False)
177
+ foreground_ratio = gr.Slider(
178
+ label="Foreground Ratio",
179
+ minimum=0.5,
180
+ maximum=1.0,
181
+ value=1.0,
182
+ step=0.05,
183
+ )
184
+
185
+ with gr.Column():
186
+ seed = gr.Number(value=1234, label="seed", precision=0)
187
+ guidance_scale = gr.Number(value=5.5, minimum=3, maximum=10, label="guidance_scale")
188
+ step = gr.Number(value=50, minimum=30, maximum=100, label="sample steps", precision=0)
189
+ text_button = gr.Button("Generate 3D shape")
190
+ gr.Examples(
191
+ examples=[os.path.join("examples", i) for i in os.listdir("examples")],
192
+ inputs=[image_input],
193
+ )
194
+ with gr.Column():
195
+ image_output = gr.Image(interactive=False, label="Output RGB image")
196
+ xyz_ouput = gr.Image(interactive=False, label="Output CCM image")
197
+
198
+ output_model = gr.Model3D(
199
+ label="Output GLB",
200
+ interactive=False,
201
+ )
202
+ gr.Markdown("Note: The GLB model shown here has a darker lighting and enlarged UV seams. Download for correct results.")
203
+ output_obj = gr.File(interactive=False, label="Output OBJ")
204
+
205
+ inputs = [
206
+ processed_image,
207
+ seed,
208
+ guidance_scale,
209
+ step,
210
+ ]
211
+ outputs = [
212
+ image_output,
213
+ xyz_ouput,
214
+ output_model,
215
+ output_obj,
216
+ ]
217
+
218
+
219
+ text_button.click(fn=check_input_image, inputs=[image_input]).success(
220
+ fn=preprocess_image,
221
+ inputs=[image_input, background_choice, foreground_ratio, back_groud_color],
222
+ outputs=[processed_image],
223
+ ).success(
224
+ fn=gen_image,
225
+ inputs=inputs,
226
+ outputs=outputs,
227
+ )
228
+ demo.queue().launch()