Update README.md
Browse files
README.md
CHANGED
@@ -5,8 +5,12 @@
|
|
5 |
|
6 |
|
7 |
|
|
|
|
|
|
|
|
|
8 |
# 1. Introduction 📚
|
9 |
-
**TL;DR: ChatRex is
|
10 |
|
11 |
ChatRex is a Multimodal Large Language Model (MLLM) designed to seamlessly integrate fine-grained object perception and robust language understanding. By adopting a decoupled architecture with a retrieval-based approach for object detection and leveraging high-resolution visual inputs, ChatRex addresses key challenges in perception tasks. It is powered by the Rexverse-2M dataset with diverse image-region-text annotations. ChatRex can be applied to various scenarios requiring fine-grained perception, such as object detection, grounded conversation, grounded image captioning and region
|
12 |
understanding.
|
@@ -29,7 +33,7 @@ pip install -v -e .
|
|
29 |
|
30 |
## 2.1 Download Pre-trained Models
|
31 |
We provide model checkpoints for both the ***Universal Proposal Network (UPN)*** and the ***ChatRex model***. You can download the pre-trained models from the following links:
|
32 |
-
- [UPN Checkpoint](https://
|
33 |
- [ChatRex-7B Checkpoint](https://huggingface.co/IDEA-Research/ChatRex-7B)
|
34 |
|
35 |
Or you can also using the following command to download the pre-trained models:
|
@@ -37,7 +41,7 @@ Or you can also using the following command to download the pre-trained models:
|
|
37 |
mkdir checkpoints
|
38 |
mkdir checkpoints/upn
|
39 |
# download UPN checkpoint
|
40 |
-
wget -O checkpoints/upn/upn_large.pth https://
|
41 |
# download ChatRex checkpoint from huggingface IDEA-Research/ChatRex-7B
|
42 |
# Download ChatRex checkpoint from Hugging Face
|
43 |
git lfs install
|
@@ -174,7 +178,7 @@ from chatrex.upn import UPNWrapper
|
|
174 |
if __name__ == "__main__":
|
175 |
# load the processor
|
176 |
processor = AutoProcessor.from_pretrained(
|
177 |
-
"
|
178 |
trust_remote_code=True,
|
179 |
device_map="cuda",
|
180 |
)
|
@@ -182,7 +186,7 @@ if __name__ == "__main__":
|
|
182 |
print(f"loading chatrex model...")
|
183 |
# load chatrex model
|
184 |
model = AutoModelForCausalLM.from_pretrained(
|
185 |
-
"
|
186 |
trust_remote_code=True,
|
187 |
use_safetensors=True,
|
188 |
).to("cuda")
|
@@ -292,7 +296,7 @@ from chatrex.upn import UPNWrapper
|
|
292 |
if __name__ == "__main__":
|
293 |
# load the processor
|
294 |
processor = AutoProcessor.from_pretrained(
|
295 |
-
"
|
296 |
trust_remote_code=True,
|
297 |
device_map="cuda",
|
298 |
)
|
@@ -300,7 +304,7 @@ if __name__ == "__main__":
|
|
300 |
print(f"loading chatrex model...")
|
301 |
# load chatrex model
|
302 |
model = AutoModelForCausalLM.from_pretrained(
|
303 |
-
"
|
304 |
trust_remote_code=True,
|
305 |
use_safetensors=True,
|
306 |
).to("cuda")
|
@@ -386,7 +390,7 @@ from chatrex.upn import UPNWrapper
|
|
386 |
if __name__ == "__main__":
|
387 |
# load the processor
|
388 |
processor = AutoProcessor.from_pretrained(
|
389 |
-
"
|
390 |
trust_remote_code=True,
|
391 |
device_map="cuda",
|
392 |
)
|
@@ -394,7 +398,7 @@ if __name__ == "__main__":
|
|
394 |
print(f"loading chatrex model...")
|
395 |
# load chatrex model
|
396 |
model = AutoModelForCausalLM.from_pretrained(
|
397 |
-
"
|
398 |
trust_remote_code=True,
|
399 |
use_safetensors=True,
|
400 |
).to("cuda")
|
@@ -490,7 +494,7 @@ from chatrex.upn import UPNWrapper
|
|
490 |
if __name__ == "__main__":
|
491 |
# load the processor
|
492 |
processor = AutoProcessor.from_pretrained(
|
493 |
-
"
|
494 |
trust_remote_code=True,
|
495 |
device_map="cuda",
|
496 |
)
|
@@ -498,7 +502,7 @@ if __name__ == "__main__":
|
|
498 |
print(f"loading chatrex model...")
|
499 |
# load chatrex model
|
500 |
model = AutoModelForCausalLM.from_pretrained(
|
501 |
-
"
|
502 |
trust_remote_code=True,
|
503 |
use_safetensors=True,
|
504 |
).to("cuda")
|
@@ -572,35 +576,6 @@ The visualization of the output is like:
|
|
572 |
|
573 |
----
|
574 |
|
575 |
-
# 4. Gradio Demos 🎨
|
576 |
-
## 4.1 Gradio Demo for UPN
|
577 |
-
We provide a gradio demo for UPN to visualize the object proposals generated by UPN. You can run the following command to start the gradio demo:
|
578 |
-
```bash
|
579 |
-
python gradio_demos/upn_demo.py
|
580 |
-
# if there is permission error, please run the following command
|
581 |
-
mkdir tmp
|
582 |
-
TMPDIR='/tmp' python gradio_demos/upn_demo.py
|
583 |
-
```
|
584 |
-
|
585 |
-
<div align=center>
|
586 |
-
<img src="assets/upn_gradio.jpg" width=600 >
|
587 |
-
</div>
|
588 |
-
|
589 |
-
|
590 |
-
## 4.2 Gradio Demo for ChatRex
|
591 |
-
We also provide a gradio demo for ChatRex.
|
592 |
-
```bash
|
593 |
-
python gradio_demos/chatrex_demo.py
|
594 |
-
# if there is permission error, please run the following command
|
595 |
-
mkdir tmp
|
596 |
-
TMPDIR='/tmp' python gradio_demos/upn_demo.py
|
597 |
-
```
|
598 |
-
|
599 |
-
<div align=center>
|
600 |
-
<img src="assets/chatrex_gradio.jpg" width=600 >
|
601 |
-
</div>
|
602 |
-
|
603 |
-
|
604 |
|
605 |
# 5. LICENSE
|
606 |
|
|
|
5 |
|
6 |
|
7 |
|
8 |
+
<div align=center>
|
9 |
+
|
10 |
+
----
|
11 |
+
|
12 |
# 1. Introduction 📚
|
13 |
+
**TL;DR: ChatRex is an MLLM skilled in perception that can respond to questions while simultaneously grounding its answers to the referenced objects.**
|
14 |
|
15 |
ChatRex is a Multimodal Large Language Model (MLLM) designed to seamlessly integrate fine-grained object perception and robust language understanding. By adopting a decoupled architecture with a retrieval-based approach for object detection and leveraging high-resolution visual inputs, ChatRex addresses key challenges in perception tasks. It is powered by the Rexverse-2M dataset with diverse image-region-text annotations. ChatRex can be applied to various scenarios requiring fine-grained perception, such as object detection, grounded conversation, grounded image captioning and region
|
16 |
understanding.
|
|
|
33 |
|
34 |
## 2.1 Download Pre-trained Models
|
35 |
We provide model checkpoints for both the ***Universal Proposal Network (UPN)*** and the ***ChatRex model***. You can download the pre-trained models from the following links:
|
36 |
+
- [UPN Checkpoint](https://github.com/IDEA-Research/ChatRex/releases/download/upn-large/upn_large.pth)
|
37 |
- [ChatRex-7B Checkpoint](https://huggingface.co/IDEA-Research/ChatRex-7B)
|
38 |
|
39 |
Or you can also using the following command to download the pre-trained models:
|
|
|
41 |
mkdir checkpoints
|
42 |
mkdir checkpoints/upn
|
43 |
# download UPN checkpoint
|
44 |
+
wget -O checkpoints/upn/upn_large.pth https://github.com/IDEA-Research/ChatRex/releases/download/upn-large/upn_large.pth
|
45 |
# download ChatRex checkpoint from huggingface IDEA-Research/ChatRex-7B
|
46 |
# Download ChatRex checkpoint from Hugging Face
|
47 |
git lfs install
|
|
|
178 |
if __name__ == "__main__":
|
179 |
# load the processor
|
180 |
processor = AutoProcessor.from_pretrained(
|
181 |
+
"IDEA-Research/ChatRex-7B",
|
182 |
trust_remote_code=True,
|
183 |
device_map="cuda",
|
184 |
)
|
|
|
186 |
print(f"loading chatrex model...")
|
187 |
# load chatrex model
|
188 |
model = AutoModelForCausalLM.from_pretrained(
|
189 |
+
"IDEA-Research/ChatRex-7B",
|
190 |
trust_remote_code=True,
|
191 |
use_safetensors=True,
|
192 |
).to("cuda")
|
|
|
296 |
if __name__ == "__main__":
|
297 |
# load the processor
|
298 |
processor = AutoProcessor.from_pretrained(
|
299 |
+
"IDEA-Research/ChatRex-7B",
|
300 |
trust_remote_code=True,
|
301 |
device_map="cuda",
|
302 |
)
|
|
|
304 |
print(f"loading chatrex model...")
|
305 |
# load chatrex model
|
306 |
model = AutoModelForCausalLM.from_pretrained(
|
307 |
+
"IDEA-Research/ChatRex-7B",
|
308 |
trust_remote_code=True,
|
309 |
use_safetensors=True,
|
310 |
).to("cuda")
|
|
|
390 |
if __name__ == "__main__":
|
391 |
# load the processor
|
392 |
processor = AutoProcessor.from_pretrained(
|
393 |
+
"IDEA-Research/ChatRex-7B",
|
394 |
trust_remote_code=True,
|
395 |
device_map="cuda",
|
396 |
)
|
|
|
398 |
print(f"loading chatrex model...")
|
399 |
# load chatrex model
|
400 |
model = AutoModelForCausalLM.from_pretrained(
|
401 |
+
"IDEA-Research/ChatRex-7B",
|
402 |
trust_remote_code=True,
|
403 |
use_safetensors=True,
|
404 |
).to("cuda")
|
|
|
494 |
if __name__ == "__main__":
|
495 |
# load the processor
|
496 |
processor = AutoProcessor.from_pretrained(
|
497 |
+
"IDEA-Research/ChatRex-7B",
|
498 |
trust_remote_code=True,
|
499 |
device_map="cuda",
|
500 |
)
|
|
|
502 |
print(f"loading chatrex model...")
|
503 |
# load chatrex model
|
504 |
model = AutoModelForCausalLM.from_pretrained(
|
505 |
+
"IDEA-Research/ChatRex-7B",
|
506 |
trust_remote_code=True,
|
507 |
use_safetensors=True,
|
508 |
).to("cuda")
|
|
|
576 |
|
577 |
----
|
578 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
579 |
|
580 |
# 5. LICENSE
|
581 |
|