qqc1989 commited on
Commit
03a2d97
·
verified ·
1 Parent(s): 451c293

Upload 21 files

Browse files
.gitattributes CHANGED
@@ -35,3 +35,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.axmodel filter=lfs diff=lfs merge=lfs -text
37
 
 
 
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.axmodel filter=lfs diff=lfs merge=lfs -text
37
 
38
+ gradio_01.png filter=lfs diff=lfs merge=lfs -text
39
+ install/lib/aarch64/libclip.so filter=lfs diff=lfs merge=lfs -text
40
+ pyclip/gradio_example.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,152 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - en
5
+ - zh
6
+ base_model:
7
+ - OFA-Sys/chinese-clip-vit-large-patch14-336px
8
+ - AXERA-TECH/cnclip
9
+ tags:
10
+ - CLIP
11
+ - CN_CLIP
12
+ ---
13
+
14
+ # LibCLIP
15
+
16
+ This SDK enables efficient text-to-image retrieval using CLIP (Contrastive Language–Image Pretraining), optimized for Axera’s NPU-based SoC platforms including AX650, AX650C, AX8850, and AX650A, or Axera's dedicated AI accelerator.
17
+
18
+ With this SDK, you can:
19
+
20
+ - Perform semantic image search by providing natural language queries.
21
+ - Utilize CLIP to embed text queries and compare them against a pre-computed set of image embeddings.
22
+ - Run all inference processes directly on Axera NPUs for low-latency, high-throughput performance at the edge.
23
+
24
+ This solution is well-suited for smart cameras, content filtering, AI-powered user interfaces, and other edge AI scenarios where natural language-based image retrieval is required.
25
+
26
+ ## References links:
27
+
28
+ For those who are interested in model conversion, you can try to export axmodel through
29
+
30
+ - [The github repo of libclip's open source](https://github.com/AXERA-TECH/libclip.axera)
31
+
32
+ - [Pulsar2 Link, How to Convert ONNX to axmodel](https://pulsar2-docs.readthedocs.io/en/latest/pulsar2/introduction.html)
33
+
34
+ - https://huggingface.co/AXERA-TECH/cnclip
35
+
36
+
37
+ ## Support Platform
38
+
39
+ - AX650
40
+ - [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
41
+ - [M.2 Accelerator card](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html)
42
+
43
+ ## Performance
44
+
45
+ | Model | Input Shape | Latency (ms) | CMM Usage (MB) |
46
+ | ----------------------------------------- | ----------------- | ------------ | -------------- |
47
+ | cnclip_vit_l14_336px_vision_u16u8.axmodel | 1 x 3 x 336 x 336 | 88.475 ms | 304 MB |
48
+ | cnclip_vit_l14_336px_text_u16.axmodel | 1 x 52 | 4.576 ms | 122 MB |
49
+
50
+ ## How to use
51
+
52
+ Download all files from this repository to the device
53
+
54
+ ```
55
+ (base) axera@raspberrypi:~/samples/AXERA-TECH/libclip.axera $ tree -L 2
56
+ .
57
+ ├── cnclip
58
+ │   ├── cnclip_vit_l14_336px_text_u16.axmodel
59
+ │   ├── cnclip_vit_l14_336px_vision_u16u8.axmodel
60
+ │   └── cn_vocab.txt
61
+ ├── coco_1000.tar
62
+ ├── config.json
63
+ ├── gradio_01.png
64
+ ├── install
65
+ │   ├── examples
66
+ │   ├── include
67
+ │   └── lib
68
+ ├── pyclip
69
+ │   ├── example.py
70
+ │   ├── gradio_example.png
71
+ │   ├── gradio_example.py
72
+ │   ├── libclip.so
73
+ │   ├── __pycache__
74
+ │   ├── pyclip.py
75
+ │   └── requirements.txt
76
+ └── README.md
77
+
78
+ 8 directories, 13 files
79
+ ```
80
+
81
+ ### python env requirement
82
+
83
+ #### pyaxengine
84
+
85
+ https://github.com/AXERA-TECH/pyaxengine
86
+
87
+ ```
88
+ wget https://github.com/AXERA-TECH/pyaxengine/releases/download/0.1.3.rc1/axengine-0.1.3-py3-none-any.whl
89
+ pip install axengine-0.1.3-py3-none-any.whl
90
+ ```
91
+
92
+ #### others
93
+
94
+ ```
95
+ pip install -r pyclip/requirements.txt
96
+ ```
97
+
98
+ #### Inference with AX650 Host, such as M4N-Dock(爱芯派Pro)
99
+
100
+ TODO
101
+
102
+ #### Inference with M.2 Accelerator card
103
+ [What is M.2 Accelerator card?](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html), Show this DEMO based on Raspberry PI 5.
104
+
105
+ ```
106
+ (py312) axera@raspberrypi:~/samples/AXERA-TECH/libclip.axera $ export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libstdc++.so.6
107
+ (py312) axera@raspberrypi:~/samples/AXERA-TECH/libclip.axera $ cp install/lib/aarch64/libclip.so pyclip/
108
+ (py312) axera@raspberrypi:~/samples/AXERA-TECH/libclip.axera $ tar xf coco_1000.tar
109
+ (py312) axera@raspberrypi:~/samples/AXERA-TECH/libclip.axera $ python pyclip/gradio_example.py --ienc cnclip/cnclip_vit_l14_336px_vision_u16u8.axmodel --tenc cnclip/cnclip_vit_l14_336px_text_u16.axmodel --vocab cnclip/cn_vocab.txt --isCN 1 --db_path clip_feat_db_coco --image_folder coco_1000/
110
+ Trying to load: /home/axera/samples/AXERA-TECH/libclip.axera/pyclip/aarch64/libclip.so
111
+
112
+ ❌ Failed to load: /home/axera/samples/AXERA-TECH/libclip.axera/pyclip/aarch64/libclip.so
113
+ /home/axera/samples/AXERA-TECH/libclip.axera/pyclip/aarch64/libclip.so: cannot open shared object file: No such file or directory
114
+ 🔍 File not found. Please verify that libclip.so exists and the path is correct.
115
+
116
+ Trying to load: /home/axera/samples/AXERA-TECH/libclip.axera/pyclip/libclip.so
117
+ open libax_sys.so failed
118
+ open libax_engine.so failed
119
+ ✅ Successfully loaded: /home/axera/samples/AXERA-TECH/libclip.axera/pyclip/libclip.so
120
+ 可用设备: {'host': {'available': True, 'version': '', 'mem_info': {'remain': 0, 'total': 0}}, 'devices': {'host_version': 'V3.6.2_20250603154858', 'dev_version': 'V3.6.2_20250603154858', 'count': 1, 'devices_info': [{'temp': 37, 'cpu_usage': 1, 'npu_usage': 0, 'mem_info': {'remain': 7022, 'total': 7040}}]}}
121
+ [I][ run][ 31]: AXCLWorker start with devid 0
122
+
123
+ input size: 1
124
+ name: image [unknown] [unknown]
125
+ 1 x 3 x 336 x 336
126
+
127
+
128
+ output size: 1
129
+ name: unnorm_image_features
130
+ 1 x 768
131
+
132
+ [I][ load_image_encoder][ 50]: nchw 336 336
133
+ [I][ load_image_encoder][ 60]: image feature len 768
134
+
135
+ input size: 1
136
+ name: text [unknown] [unknown]
137
+ 1 x 52
138
+
139
+
140
+ output size: 1
141
+ name: unnorm_text_features
142
+ 1 x 768
143
+
144
+ [I][ load_text_encoder][ 44]: text feature len 768
145
+ [I][ load_tokenizer][ 60]: text token len 52
146
+ 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [01:40<00:00, 9.93it/s]
147
+ * Running on local URL: http://0.0.0.0:7860
148
+ ```
149
+
150
+ If your Raspberry PI 5 ip is 192.168.1.100, so using this URL `http://192.168.1.100:7860` with your WebApp.
151
+
152
+ ![](gradio_01.png)
cnclip/cn_vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
cnclip/cnclip_vit_l14_336px_text_u16.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f33786ab988ca22dbc883c9fa6ebfa842a425445e4dac945c30069a6d9d5cf8
3
+ size 127341129
cnclip/cnclip_vit_l14_336px_vision_u16u8.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6278e99001c198082b59219b163f7136d3049bca223be9176678ac6031348cde
3
+ size 334708454
coco_1000.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3e18a198658270e19ced079de7a404e3478e69c2ef94fb47c87ddf056e6a541
3
+ size 163112960
config.json ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "architectures": [
4
+ "InternVLChatModel"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
8
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
9
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
10
+ },
11
+ "downsample_ratio": 0.5,
12
+ "dynamic_image_size": true,
13
+ "force_image_size": 448,
14
+ "llm_config": {
15
+ "_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
16
+ "add_cross_attention": false,
17
+ "architectures": [
18
+ "Qwen2ForCausalLM"
19
+ ],
20
+ "_attn_implementation": "flash_attention_2",
21
+ "attention_dropout": 0.0,
22
+ "bad_words_ids": null,
23
+ "begin_suppress_tokens": null,
24
+ "bos_token_id": 151643,
25
+ "chunk_size_feed_forward": 0,
26
+ "cross_attention_hidden_size": null,
27
+ "decoder_start_token_id": null,
28
+ "diversity_penalty": 0.0,
29
+ "do_sample": false,
30
+ "early_stopping": false,
31
+ "encoder_no_repeat_ngram_size": 0,
32
+ "eos_token_id": 151645,
33
+ "exponential_decay_length_penalty": null,
34
+ "finetuning_task": null,
35
+ "forced_bos_token_id": null,
36
+ "forced_eos_token_id": null,
37
+ "hidden_act": "silu",
38
+ "hidden_size": 896,
39
+ "id2label": {
40
+ "0": "LABEL_0",
41
+ "1": "LABEL_1"
42
+ },
43
+ "initializer_range": 0.02,
44
+ "intermediate_size": 4864,
45
+ "is_decoder": false,
46
+ "is_encoder_decoder": false,
47
+ "label2id": {
48
+ "LABEL_0": 0,
49
+ "LABEL_1": 1
50
+ },
51
+ "length_penalty": 1.0,
52
+ "max_length": 20,
53
+ "max_position_embeddings": 32768,
54
+ "max_window_layers": 21,
55
+ "min_length": 0,
56
+ "model_type": "qwen2",
57
+ "no_repeat_ngram_size": 0,
58
+ "num_attention_heads": 14,
59
+ "num_beam_groups": 1,
60
+ "num_beams": 1,
61
+ "num_hidden_layers": 24,
62
+ "num_key_value_heads": 2,
63
+ "num_return_sequences": 1,
64
+ "output_attentions": false,
65
+ "output_hidden_states": false,
66
+ "output_scores": false,
67
+ "pad_token_id": null,
68
+ "prefix": null,
69
+ "problem_type": null,
70
+ "pruned_heads": {},
71
+ "remove_invalid_values": false,
72
+ "repetition_penalty": 1.0,
73
+ "return_dict": true,
74
+ "return_dict_in_generate": false,
75
+ "rms_norm_eps": 1e-06,
76
+ "rope_theta": 1000000.0,
77
+ "sep_token_id": null,
78
+ "sliding_window": 32768,
79
+ "suppress_tokens": null,
80
+ "task_specific_params": null,
81
+ "temperature": 1.0,
82
+ "tf_legacy_loss": false,
83
+ "tie_encoder_decoder": false,
84
+ "tie_word_embeddings": false,
85
+ "tokenizer_class": null,
86
+ "top_k": 50,
87
+ "top_p": 1.0,
88
+ "torch_dtype": "bfloat16",
89
+ "torchscript": false,
90
+ "transformers_version": "4.37.2",
91
+ "typical_p": 1.0,
92
+ "use_bfloat16": true,
93
+ "use_cache": true,
94
+ "use_sliding_window": false,
95
+ "vocab_size": 151674
96
+ },
97
+ "max_dynamic_patch": 12,
98
+ "min_dynamic_patch": 1,
99
+ "model_type": "internvl_chat",
100
+ "ps_version": "v2",
101
+ "select_layer": -1,
102
+ "template": "internvl2_5",
103
+ "torch_dtype": "bfloat16",
104
+ "use_backbone_lora": 0,
105
+ "use_llm_lora": 0,
106
+ "use_thumbnail": true,
107
+ "vision_config": {
108
+ "architectures": [
109
+ "InternVisionModel"
110
+ ],
111
+ "attention_dropout": 0.0,
112
+ "drop_path_rate": 0.0,
113
+ "dropout": 0.0,
114
+ "hidden_act": "gelu",
115
+ "hidden_size": 1024,
116
+ "image_size": 448,
117
+ "initializer_factor": 1.0,
118
+ "initializer_range": 0.02,
119
+ "intermediate_size": 4096,
120
+ "layer_norm_eps": 1e-06,
121
+ "model_type": "intern_vit_6b",
122
+ "norm_type": "layer_norm",
123
+ "num_attention_heads": 16,
124
+ "num_channels": 3,
125
+ "num_hidden_layers": 24,
126
+ "output_attentions": false,
127
+ "output_hidden_states": false,
128
+ "patch_size": 14,
129
+ "qk_normalization": false,
130
+ "qkv_bias": true,
131
+ "return_dict": true,
132
+ "torch_dtype": "bfloat16",
133
+ "transformers_version": "4.37.2",
134
+ "use_bfloat16": true,
135
+ "use_flash_attn": true
136
+ }
137
+ }
gradio_01.png ADDED

Git LFS Details

  • SHA256: 12c161b2eebd187c1d0ddb12a487ad66a376cb460533a1ffd268434af3617c54
  • Pointer size: 131 Bytes
  • Size of remote file: 797 kB
install/examples/cmdline.hpp ADDED
@@ -0,0 +1,732 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ Copyright (c) 2009, Hideyuki Tanaka
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+ * Redistributions of source code must retain the above copyright
8
+ notice, this list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright
10
+ notice, this list of conditions and the following disclaimer in the
11
+ documentation and/or other materials provided with the distribution.
12
+ * Neither the name of the <organization> nor the
13
+ names of its contributors may be used to endorse or promote products
14
+ derived from this software without specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY <copyright holder> ''AS IS'' AND ANY
17
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY
20
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ */
27
+
28
+ #pragma once
29
+
30
+ #include <cxxabi.h>
31
+
32
+ #include <algorithm>
33
+ #include <cstdlib>
34
+ #include <cstring>
35
+ #include <iostream>
36
+ #include <map>
37
+ #include <sstream>
38
+ #include <stdexcept>
39
+ #include <string>
40
+ #include <typeinfo>
41
+ #include <vector>
42
+
43
+ namespace cmdline {
44
+
45
+ namespace detail {
46
+
47
+ template <typename Target, typename Source, bool Same>
48
+ class lexical_cast_t {
49
+ public:
50
+ static Target cast(const Source &arg) {
51
+ Target ret;
52
+ std::stringstream ss;
53
+ if (!(ss << arg && ss >> ret && ss.eof())) throw std::bad_cast();
54
+
55
+ return ret;
56
+ }
57
+ };
58
+
59
+ template <typename Target, typename Source>
60
+ class lexical_cast_t<Target, Source, true> {
61
+ public:
62
+ static Target cast(const Source &arg) { return arg; }
63
+ };
64
+
65
+ template <typename Source>
66
+ class lexical_cast_t<std::string, Source, false> {
67
+ public:
68
+ static std::string cast(const Source &arg) {
69
+ std::ostringstream ss;
70
+ ss << arg;
71
+ return ss.str();
72
+ }
73
+ };
74
+
75
+ template <typename Target>
76
+ class lexical_cast_t<Target, std::string, false> {
77
+ public:
78
+ static Target cast(const std::string &arg) {
79
+ Target ret;
80
+ std::istringstream ss(arg);
81
+ if (!(ss >> ret && ss.eof())) throw std::bad_cast();
82
+ return ret;
83
+ }
84
+ };
85
+
86
+ template <typename T1, typename T2>
87
+ struct is_same {
88
+ static const bool value = false;
89
+ };
90
+
91
+ template <typename T>
92
+ struct is_same<T, T> {
93
+ static const bool value = true;
94
+ };
95
+
96
+ template <typename Target, typename Source>
97
+ Target lexical_cast(const Source &arg) {
98
+ return lexical_cast_t<Target, Source,
99
+ detail::is_same<Target, Source>::value>::cast(arg);
100
+ }
101
+
102
+ static inline std::string demangle(const std::string &name) {
103
+ int status = 0;
104
+ char *p = abi::__cxa_demangle(name.c_str(), 0, 0, &status);
105
+ std::string ret(p);
106
+ free(p);
107
+ return ret;
108
+ }
109
+
110
+ template <class T>
111
+ std::string readable_typename() {
112
+ return demangle(typeid(T).name());
113
+ }
114
+
115
+ template <class T>
116
+ std::string default_value(T def) {
117
+ return detail::lexical_cast<std::string>(def);
118
+ }
119
+
120
+ template <>
121
+ inline std::string readable_typename<std::string>() {
122
+ return "string";
123
+ }
124
+
125
+ } // namespace detail
126
+
127
+ //-----
128
+
129
+ class cmdline_error : public std::exception {
130
+ public:
131
+ cmdline_error(const std::string &msg) : msg(msg) {}
132
+ ~cmdline_error() throw() {}
133
+ const char *what() const throw() { return msg.c_str(); }
134
+
135
+ private:
136
+ std::string msg;
137
+ };
138
+
139
+ template <class T>
140
+ struct default_reader {
141
+ T operator()(const std::string &str) { return detail::lexical_cast<T>(str); }
142
+ };
143
+
144
+ template <class T>
145
+ struct range_reader {
146
+ range_reader(const T &low, const T &high) : low(low), high(high) {}
147
+ T operator()(const std::string &s) const {
148
+ T ret = default_reader<T>()(s);
149
+ if (!(ret >= low && ret <= high))
150
+ throw cmdline::cmdline_error("range_error");
151
+ return ret;
152
+ }
153
+
154
+ private:
155
+ T low, high;
156
+ };
157
+
158
+ template <class T>
159
+ range_reader<T> range(const T &low, const T &high) {
160
+ return range_reader<T>(low, high);
161
+ }
162
+
163
+ template <class T>
164
+ struct oneof_reader {
165
+ T operator()(const std::string &s) {
166
+ T ret = default_reader<T>()(s);
167
+ if (std::find(alt.begin(), alt.end(), ret) == alt.end())
168
+ throw cmdline_error("");
169
+ return ret;
170
+ }
171
+ void add(const T &v) { alt.push_back(v); }
172
+
173
+ private:
174
+ std::vector<T> alt;
175
+ };
176
+
177
+ template <class T>
178
+ oneof_reader<T> oneof(T a1) {
179
+ oneof_reader<T> ret;
180
+ ret.add(a1);
181
+ return ret;
182
+ }
183
+
184
+ template <class T>
185
+ oneof_reader<T> oneof(T a1, T a2) {
186
+ oneof_reader<T> ret;
187
+ ret.add(a1);
188
+ ret.add(a2);
189
+ return ret;
190
+ }
191
+
192
+ template <class T>
193
+ oneof_reader<T> oneof(T a1, T a2, T a3) {
194
+ oneof_reader<T> ret;
195
+ ret.add(a1);
196
+ ret.add(a2);
197
+ ret.add(a3);
198
+ return ret;
199
+ }
200
+
201
+ template <class T>
202
+ oneof_reader<T> oneof(T a1, T a2, T a3, T a4) {
203
+ oneof_reader<T> ret;
204
+ ret.add(a1);
205
+ ret.add(a2);
206
+ ret.add(a3);
207
+ ret.add(a4);
208
+ return ret;
209
+ }
210
+
211
+ template <class T>
212
+ oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5) {
213
+ oneof_reader<T> ret;
214
+ ret.add(a1);
215
+ ret.add(a2);
216
+ ret.add(a3);
217
+ ret.add(a4);
218
+ ret.add(a5);
219
+ return ret;
220
+ }
221
+
222
+ template <class T>
223
+ oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6) {
224
+ oneof_reader<T> ret;
225
+ ret.add(a1);
226
+ ret.add(a2);
227
+ ret.add(a3);
228
+ ret.add(a4);
229
+ ret.add(a5);
230
+ ret.add(a6);
231
+ return ret;
232
+ }
233
+
234
+ template <class T>
235
+ oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7) {
236
+ oneof_reader<T> ret;
237
+ ret.add(a1);
238
+ ret.add(a2);
239
+ ret.add(a3);
240
+ ret.add(a4);
241
+ ret.add(a5);
242
+ ret.add(a6);
243
+ ret.add(a7);
244
+ return ret;
245
+ }
246
+
247
+ template <class T>
248
+ oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8) {
249
+ oneof_reader<T> ret;
250
+ ret.add(a1);
251
+ ret.add(a2);
252
+ ret.add(a3);
253
+ ret.add(a4);
254
+ ret.add(a5);
255
+ ret.add(a6);
256
+ ret.add(a7);
257
+ ret.add(a8);
258
+ return ret;
259
+ }
260
+
261
+ template <class T>
262
+ oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8, T a9) {
263
+ oneof_reader<T> ret;
264
+ ret.add(a1);
265
+ ret.add(a2);
266
+ ret.add(a3);
267
+ ret.add(a4);
268
+ ret.add(a5);
269
+ ret.add(a6);
270
+ ret.add(a7);
271
+ ret.add(a8);
272
+ ret.add(a9);
273
+ return ret;
274
+ }
275
+
276
+ template <class T>
277
+ oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8, T a9,
278
+ T a10) {
279
+ oneof_reader<T> ret;
280
+ ret.add(a1);
281
+ ret.add(a2);
282
+ ret.add(a3);
283
+ ret.add(a4);
284
+ ret.add(a5);
285
+ ret.add(a6);
286
+ ret.add(a7);
287
+ ret.add(a8);
288
+ ret.add(a9);
289
+ ret.add(a10);
290
+ return ret;
291
+ }
292
+
293
+ //-----
294
+
295
+ class parser {
296
+ public:
297
+ parser() {}
298
+ ~parser() {
299
+ for (std::map<std::string, option_base *>::iterator p = options.begin();
300
+ p != options.end(); p++)
301
+ delete p->second;
302
+ }
303
+
304
+ void add(const std::string &name, char short_name = 0,
305
+ const std::string &desc = "") {
306
+ if (options.count(name))
307
+ throw cmdline_error("multiple definition: " + name);
308
+ options[name] = new option_without_value(name, short_name, desc);
309
+ ordered.push_back(options[name]);
310
+ }
311
+
312
+ template <class T>
313
+ void add(const std::string &name, char short_name = 0,
314
+ const std::string &desc = "", bool need = true, const T def = T()) {
315
+ add(name, short_name, desc, need, def, default_reader<T>());
316
+ }
317
+
318
+ template <class T, class F>
319
+ void add(const std::string &name, char short_name = 0,
320
+ const std::string &desc = "", bool need = true, const T def = T(),
321
+ F reader = F()) {
322
+ if (options.count(name))
323
+ throw cmdline_error("multiple definition: " + name);
324
+ options[name] = new option_with_value_with_reader<T, F>(
325
+ name, short_name, need, def, desc, reader);
326
+ ordered.push_back(options[name]);
327
+ }
328
+
329
+ void footer(const std::string &f) { ftr = f; }
330
+
331
+ void set_program_name(const std::string &name) { prog_name = name; }
332
+
333
+ bool exist(const std::string &name) const {
334
+ if (options.count(name) == 0)
335
+ throw cmdline_error("there is no flag: --" + name);
336
+ return options.find(name)->second->has_set();
337
+ }
338
+
339
+ template <class T>
340
+ const T &get(const std::string &name) const {
341
+ if (options.count(name) == 0)
342
+ throw cmdline_error("there is no flag: --" + name);
343
+ const option_with_value<T> *p =
344
+ dynamic_cast<const option_with_value<T> *>(options.find(name)->second);
345
+ if (p == NULL) throw cmdline_error("type mismatch flag '" + name + "'");
346
+ return p->get();
347
+ }
348
+
349
+ const std::vector<std::string> &rest() const { return others; }
350
+
351
+ bool parse(const std::string &arg) {
352
+ std::vector<std::string> args;
353
+
354
+ std::string buf;
355
+ bool in_quote = false;
356
+ for (std::string::size_type i = 0; i < arg.length(); i++) {
357
+ if (arg[i] == '\"') {
358
+ in_quote = !in_quote;
359
+ continue;
360
+ }
361
+
362
+ if (arg[i] == ' ' && !in_quote) {
363
+ args.push_back(buf);
364
+ buf = "";
365
+ continue;
366
+ }
367
+
368
+ if (arg[i] == '\\') {
369
+ i++;
370
+ if (i >= arg.length()) {
371
+ errors.push_back("unexpected occurrence of '\\' at end of string");
372
+ return false;
373
+ }
374
+ }
375
+
376
+ buf += arg[i];
377
+ }
378
+
379
+ if (in_quote) {
380
+ errors.push_back("quote is not closed");
381
+ return false;
382
+ }
383
+
384
+ if (buf.length() > 0) args.push_back(buf);
385
+
386
+ for (size_t i = 0; i < args.size(); i++)
387
+ std::cout << "\"" << args[i] << "\"" << std::endl;
388
+
389
+ return parse(args);
390
+ }
391
+
392
+ bool parse(const std::vector<std::string> &args) {
393
+ int argc = static_cast<int>(args.size());
394
+ std::vector<const char *> argv(argc);
395
+
396
+ for (int i = 0; i < argc; i++) argv[i] = args[i].c_str();
397
+
398
+ return parse(argc, &argv[0]);
399
+ }
400
+
401
+ bool parse(int argc, const char *const argv[]) {
402
+ errors.clear();
403
+ others.clear();
404
+
405
+ if (argc < 1) {
406
+ errors.push_back("argument number must be longer than 0");
407
+ return false;
408
+ }
409
+ if (prog_name == "") prog_name = argv[0];
410
+
411
+ std::map<char, std::string> lookup;
412
+ for (std::map<std::string, option_base *>::iterator p = options.begin();
413
+ p != options.end(); p++) {
414
+ if (p->first.length() == 0) continue;
415
+ char initial = p->second->short_name();
416
+ if (initial) {
417
+ if (lookup.count(initial) > 0) {
418
+ lookup[initial] = "";
419
+ errors.push_back(std::string("short option '") + initial +
420
+ "' is ambiguous");
421
+ return false;
422
+ } else
423
+ lookup[initial] = p->first;
424
+ }
425
+ }
426
+
427
+ for (int i = 1; i < argc; i++) {
428
+ if (strncmp(argv[i], "--", 2) == 0) {
429
+ const char *p = strchr(argv[i] + 2, '=');
430
+ if (p) {
431
+ std::string name(argv[i] + 2, p);
432
+ std::string val(p + 1);
433
+ set_option(name, val);
434
+ } else {
435
+ std::string name(argv[i] + 2);
436
+ if (options.count(name) == 0) {
437
+ errors.push_back("undefined option: --" + name);
438
+ continue;
439
+ }
440
+ if (options[name]->has_value()) {
441
+ if (i + 1 >= argc) {
442
+ errors.push_back("option needs value: --" + name);
443
+ continue;
444
+ } else {
445
+ i++;
446
+ set_option(name, argv[i]);
447
+ }
448
+ } else {
449
+ set_option(name);
450
+ }
451
+ }
452
+ } else if (strncmp(argv[i], "-", 1) == 0) {
453
+ if (!argv[i][1]) continue;
454
+ char last = argv[i][1];
455
+ for (int j = 2; argv[i][j]; j++) {
456
+ last = argv[i][j];
457
+ if (lookup.count(argv[i][j - 1]) == 0) {
458
+ errors.push_back(std::string("undefined short option: -") +
459
+ argv[i][j - 1]);
460
+ continue;
461
+ }
462
+ if (lookup[argv[i][j - 1]] == "") {
463
+ errors.push_back(std::string("ambiguous short option: -") +
464
+ argv[i][j - 1]);
465
+ continue;
466
+ }
467
+ set_option(lookup[argv[i][j - 1]]);
468
+ }
469
+
470
+ if (lookup.count(last) == 0) {
471
+ errors.push_back(std::string("undefined short option: -") + last);
472
+ continue;
473
+ }
474
+ if (lookup[last] == "") {
475
+ errors.push_back(std::string("ambiguous short option: -") + last);
476
+ continue;
477
+ }
478
+
479
+ if (i + 1 < argc && options[lookup[last]]->has_value()) {
480
+ set_option(lookup[last], argv[i + 1]);
481
+ i++;
482
+ } else {
483
+ set_option(lookup[last]);
484
+ }
485
+ } else {
486
+ others.push_back(argv[i]);
487
+ }
488
+ }
489
+
490
+ for (std::map<std::string, option_base *>::iterator p = options.begin();
491
+ p != options.end(); p++)
492
+ if (!p->second->valid())
493
+ errors.push_back("need option: --" + std::string(p->first));
494
+
495
+ return errors.size() == 0;
496
+ }
497
+
498
+ void parse_check(const std::string &arg) {
499
+ if (!options.count("help")) add("help", '?', "print this message");
500
+ check(0, parse(arg));
501
+ }
502
+
503
+ void parse_check(const std::vector<std::string> &args) {
504
+ if (!options.count("help")) add("help", '?', "print this message");
505
+ check(args.size(), parse(args));
506
+ }
507
+
508
+ void parse_check(int argc, char *argv[]) {
509
+ if (!options.count("help")) add("help", '?', "print this message");
510
+ check(argc, parse(argc, argv));
511
+ }
512
+
513
+ std::string error() const { return errors.size() > 0 ? errors[0] : ""; }
514
+
515
+ std::string error_full() const {
516
+ std::ostringstream oss;
517
+ for (size_t i = 0; i < errors.size(); i++) oss << errors[i] << std::endl;
518
+ return oss.str();
519
+ }
520
+
521
+ std::string usage() const {
522
+ std::ostringstream oss;
523
+ oss << "usage: " << prog_name << " ";
524
+ for (size_t i = 0; i < ordered.size(); i++) {
525
+ if (ordered[i]->must()) oss << ordered[i]->short_description() << " ";
526
+ }
527
+
528
+ oss << "[options] ... " << ftr << std::endl;
529
+ oss << "options:" << std::endl;
530
+
531
+ size_t max_width = 0;
532
+ for (size_t i = 0; i < ordered.size(); i++) {
533
+ max_width = std::max(max_width, ordered[i]->name().length());
534
+ }
535
+ for (size_t i = 0; i < ordered.size(); i++) {
536
+ if (ordered[i]->short_name()) {
537
+ oss << " -" << ordered[i]->short_name() << ", ";
538
+ } else {
539
+ oss << " ";
540
+ }
541
+
542
+ oss << "--" << ordered[i]->name();
543
+ for (size_t j = ordered[i]->name().length(); j < max_width + 4; j++)
544
+ oss << ' ';
545
+ oss << ordered[i]->description() << std::endl;
546
+ }
547
+ return oss.str();
548
+ }
549
+
550
+ private:
551
+ void check(int argc, bool ok) {
552
+ if ((argc == 1 && !ok) || exist("help")) {
553
+ std::cerr << usage();
554
+ exit(0);
555
+ }
556
+
557
+ if (!ok) {
558
+ std::cerr << error() << std::endl << usage();
559
+ exit(1);
560
+ }
561
+ }
562
+
563
+ void set_option(const std::string &name) {
564
+ if (options.count(name) == 0) {
565
+ errors.push_back("undefined option: --" + name);
566
+ return;
567
+ }
568
+ if (!options[name]->set()) {
569
+ errors.push_back("option needs value: --" + name);
570
+ return;
571
+ }
572
+ }
573
+
574
+ void set_option(const std::string &name, const std::string &value) {
575
+ if (options.count(name) == 0) {
576
+ errors.push_back("undefined option: --" + name);
577
+ return;
578
+ }
579
+ if (!options[name]->set(value)) {
580
+ errors.push_back("option value is invalid: --" + name + "=" + value);
581
+ return;
582
+ }
583
+ }
584
+
585
+ class option_base {
586
+ public:
587
+ virtual ~option_base() {}
588
+
589
+ virtual bool has_value() const = 0;
590
+ virtual bool set() = 0;
591
+ virtual bool set(const std::string &value) = 0;
592
+ virtual bool has_set() const = 0;
593
+ virtual bool valid() const = 0;
594
+ virtual bool must() const = 0;
595
+
596
+ virtual const std::string &name() const = 0;
597
+ virtual char short_name() const = 0;
598
+ virtual const std::string &description() const = 0;
599
+ virtual std::string short_description() const = 0;
600
+ };
601
+
602
+ class option_without_value : public option_base {
603
+ public:
604
+ option_without_value(const std::string &name, char short_name,
605
+ const std::string &desc)
606
+ : nam(name), snam(short_name), desc(desc), has(false) {}
607
+ ~option_without_value() {}
608
+
609
+ bool has_value() const { return false; }
610
+
611
+ bool set() {
612
+ has = true;
613
+ return true;
614
+ }
615
+
616
+ bool set(const std::string &) { return false; }
617
+
618
+ bool has_set() const { return has; }
619
+
620
+ bool valid() const { return true; }
621
+
622
+ bool must() const { return false; }
623
+
624
+ const std::string &name() const { return nam; }
625
+
626
+ char short_name() const { return snam; }
627
+
628
+ const std::string &description() const { return desc; }
629
+
630
+ std::string short_description() const { return "--" + nam; }
631
+
632
+ private:
633
+ std::string nam;
634
+ char snam;
635
+ std::string desc;
636
+ bool has;
637
+ };
638
+
639
+ template <class T>
640
+ class option_with_value : public option_base {
641
+ public:
642
+ option_with_value(const std::string &name, char short_name, bool need,
643
+ const T &def, const std::string &desc)
644
+ : nam(name),
645
+ snam(short_name),
646
+ need(need),
647
+ has(false),
648
+ def(def),
649
+ actual(def) {
650
+ this->desc = full_description(desc);
651
+ }
652
+ ~option_with_value() {}
653
+
654
+ const T &get() const { return actual; }
655
+
656
+ bool has_value() const { return true; }
657
+
658
+ bool set() { return false; }
659
+
660
+ bool set(const std::string &value) {
661
+ try {
662
+ actual = read(value);
663
+ has = true;
664
+ } catch (const std::exception &e) {
665
+ return false;
666
+ }
667
+ return true;
668
+ }
669
+
670
+ bool has_set() const { return has; }
671
+
672
+ bool valid() const {
673
+ if (need && !has) return false;
674
+ return true;
675
+ }
676
+
677
+ bool must() const { return need; }
678
+
679
+ const std::string &name() const { return nam; }
680
+
681
+ char short_name() const { return snam; }
682
+
683
+ const std::string &description() const { return desc; }
684
+
685
+ std::string short_description() const {
686
+ return "--" + nam + "=" + detail::readable_typename<T>();
687
+ }
688
+
689
+ protected:
690
+ std::string full_description(const std::string &desc) {
691
+ return desc + " (" + detail::readable_typename<T>() +
692
+ (need ? "" : " [=" + detail::default_value<T>(def) + "]") + ")";
693
+ }
694
+
695
+ virtual T read(const std::string &s) = 0;
696
+
697
+ std::string nam;
698
+ char snam;
699
+ bool need;
700
+ std::string desc;
701
+
702
+ bool has;
703
+ T def;
704
+ T actual;
705
+ };
706
+
707
+ template <class T, class F>
708
+ class option_with_value_with_reader : public option_with_value<T> {
709
+ public:
710
+ option_with_value_with_reader(const std::string &name, char short_name,
711
+ bool need, const T def,
712
+ const std::string &desc, F reader)
713
+ : option_with_value<T>(name, short_name, need, def, desc),
714
+ reader(reader) {}
715
+
716
+ private:
717
+ T read(const std::string &s) { return reader(s); }
718
+
719
+ F reader;
720
+ };
721
+
722
+ std::map<std::string, option_base *> options;
723
+ std::vector<option_base *> ordered;
724
+ std::string ftr;
725
+
726
+ std::string prog_name;
727
+ std::vector<std::string> others;
728
+
729
+ std::vector<std::string> errors;
730
+ };
731
+
732
+ } // namespace cmdline
install/examples/test_ax_api.cpp ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "runner/ax650/ax_api_loader.h"
2
+ #include "runner/ax650/ax_model_runner_ax650.hpp"
3
+
4
+ #include <fstream>
5
+ #include <vector>
6
+ #include <cstring>
7
+
8
+ AxSysApiLoader &get_ax_sys_loader();
9
+
10
+ AxEngineApiLoader &get_ax_engine_loader();
11
+
12
+ int main()
13
+ {
14
+ AxSysApiLoader &ax_sys_loader = get_ax_sys_loader();
15
+
16
+ AxEngineApiLoader &ax_engine_loader = get_ax_engine_loader();
17
+
18
+ ax_sys_loader.AX_SYS_Init();
19
+ AX_ENGINE_NPU_ATTR_T npu_attr;
20
+ memset(&npu_attr, 0, sizeof(AX_ENGINE_NPU_ATTR_T));
21
+ npu_attr.eHardMode = AX_ENGINE_VIRTUAL_NPU_DISABLE;
22
+ ax_engine_loader.AX_ENGINE_Init(&npu_attr);
23
+
24
+ ax_runner_ax650 runner;
25
+ std::ifstream file("cnclip/cnclip_vit_l14_336px_text_u16.axmodel", std::ios::binary);
26
+ if (!file.is_open())
27
+ {
28
+ printf("open file failed\n");
29
+ return -1;
30
+ }
31
+ std::vector<uint8_t> model_data((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
32
+ runner.init(model_data.data(), model_data.size(), 0);
33
+
34
+ ax_engine_loader.AX_ENGINE_Deinit();
35
+ ax_sys_loader.AX_SYS_Deinit();
36
+ return 0;
37
+ }
install/examples/test_axcl_api.cpp ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "runner/axcl/axcl_manager.h"
2
+ #include "runner/axcl/ax_model_runner_axcl.hpp"
3
+
4
+ #include <fstream>
5
+
6
+ int main()
7
+ {
8
+ auto ret = axclInit();
9
+ if (ret != 0)
10
+ {
11
+ printf("axclInit failed\n");
12
+ return -1;
13
+ }
14
+
15
+ axcl_Dev_Init(0);
16
+
17
+ ax_runner_axcl runner;
18
+ std::ifstream file("cnclip/cnclip_vit_l14_336px_text_u16.axmodel", std::ios::binary);
19
+ if (!file.is_open())
20
+ {
21
+ printf("open file failed\n");
22
+ return -1;
23
+ }
24
+ std::vector<uint8_t> model_data((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
25
+ runner.init(model_data.data(), model_data.size(), 0);
26
+
27
+ runner.deinit();
28
+
29
+ axcl_Dev_Exit(0);
30
+ axclFinalize();
31
+ return 0;
32
+ }
install/examples/test_enum_devices.cpp ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "clip.h"
2
+ #include <iostream>
3
+ #include <cstring>
4
+
5
+ int main()
6
+ {
7
+ clip_devices_t clip_devices;
8
+ memset(&clip_devices, 0, sizeof(clip_devices_t));
9
+ if (clip_enum_devices(&clip_devices) != 0)
10
+ {
11
+ printf("enum devices failed\n");
12
+ return -1;
13
+ }
14
+
15
+ std::cout << "host npu avaiable:" << static_cast<int>(clip_devices.host.available) << " version:" << clip_devices.host.version << std::endl;
16
+ std::cout << "host mem total:" << clip_devices.host.mem_info.total << " MiB remain:" << clip_devices.host.mem_info.remain << " MiB" << std::endl;
17
+
18
+ std::cout << "Host Version: " << clip_devices.devices.host_version << std::endl;
19
+ std::cout << "Dev Version: " << clip_devices.devices.dev_version << std::endl;
20
+ std::cout << "Detected Devices Count: " << static_cast<int>(clip_devices.devices.count) << std::endl;
21
+
22
+ for (unsigned char i = 0; i < clip_devices.devices.count; ++i)
23
+ {
24
+ std::cout << " Device " << static_cast<int>(i) << ":" << std::endl;
25
+ std::cout << " Temperature: " << clip_devices.devices.devices_info[i].temp << "C" << std::endl;
26
+ std::cout << " CPU Usage: " << clip_devices.devices.devices_info[i].cpu_usage << "%" << std::endl;
27
+ std::cout << " NPU Usage: " << clip_devices.devices.devices_info[i].npu_usage << "%" << std::endl;
28
+ std::cout << " Memory Remaining: " << clip_devices.devices.devices_info[i].mem_info.remain << " MiB" << std::endl;
29
+ std::cout << " Memory Total: " << clip_devices.devices.devices_info[i].mem_info.total << " MiB" << std::endl;
30
+ }
31
+
32
+ if (clip_devices.host.available)
33
+ {
34
+ clip_sys_init(host_device, -1);
35
+ }
36
+
37
+ if (clip_devices.devices.count > 0)
38
+ {
39
+ for (unsigned char i = 0; i < clip_devices.devices.count; ++i)
40
+ {
41
+ clip_sys_init(axcl_device, i);
42
+ }
43
+ }
44
+
45
+ if (clip_devices.host.available)
46
+ {
47
+ clip_sys_deinit(host_device, -1);
48
+ }
49
+
50
+ if (clip_devices.devices.count > 0)
51
+ {
52
+ for (unsigned char i = 0; i < clip_devices.devices.count; ++i)
53
+ {
54
+ clip_sys_deinit(axcl_device, i);
55
+ }
56
+ }
57
+
58
+ return 0;
59
+ }
install/examples/test_load_model.cpp ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "clip.h"
2
+ #include "cmdline.hpp"
3
+ #include <fstream>
4
+ #include <cstring>
5
+
6
+ int main(int argc, char *argv[])
7
+ {
8
+ clip_devices_t clip_devices;
9
+ memset(&clip_devices, 0, sizeof(clip_devices_t));
10
+ if (clip_enum_devices(&clip_devices) != 0)
11
+ {
12
+ printf("enum devices failed\n");
13
+ return -1;
14
+ }
15
+
16
+ if (clip_devices.host.available)
17
+ {
18
+ clip_sys_init(host_device, -1);
19
+ }
20
+ else if (clip_devices.devices.count > 0)
21
+ {
22
+ clip_sys_init(axcl_device, 0);
23
+ }
24
+ else
25
+ {
26
+ printf("no device available\n");
27
+ return -1;
28
+ }
29
+
30
+ clip_init_t init_info;
31
+ memset(&init_info, 0, sizeof(init_info));
32
+
33
+ cmdline::parser parser;
34
+ parser.add<std::string>("ienc", 0, "encoder model(onnx model or axmodel)", true, "cnclip/cnclip_vit_l14_336px_vision_u16u8.axmodel");
35
+ parser.add<std::string>("tenc", 0, "text encoder model(onnx model or axmodel)", true, "cnclip/cnclip_vit_l14_336px_text_u16.axmodel");
36
+ parser.add<std::string>("vocab", 'v', "vocab path", true, "cnclip/cn_vocab.txt");
37
+ parser.add<int>("language", 'l', "language choose, 0:english 1:chinese", false, 1);
38
+ parser.add<std::string>("db_path", 'd', "db path", false, "");
39
+ parser.parse_check(argc, argv);
40
+
41
+ sprintf(init_info.image_encoder_path, "%s", parser.get<std::string>("ienc").c_str());
42
+ sprintf(init_info.text_encoder_path, "%s", parser.get<std::string>("tenc").c_str());
43
+ sprintf(init_info.tokenizer_path, "%s", parser.get<std::string>("vocab").c_str());
44
+ init_info.isCN = parser.get<int>("language");
45
+ sprintf(init_info.db_path, "%s", parser.get<std::string>("db_path").c_str());
46
+
47
+ printf("image_encoder_path: %s\n", init_info.image_encoder_path);
48
+ printf("text_encoder_path: %s\n", init_info.text_encoder_path);
49
+ printf("tokenizer_path: %s\n", init_info.tokenizer_path);
50
+ printf("isCN: %d\n", init_info.isCN);
51
+ printf("db_path: %s\n", init_info.db_path);
52
+
53
+ if (clip_devices.host.available)
54
+ {
55
+ init_info.dev_type = host_device;
56
+ }
57
+ else if (clip_devices.devices.count > 0)
58
+ {
59
+ init_info.dev_type = axcl_device;
60
+ init_info.devid = 0;
61
+ }
62
+
63
+ clip_handle_t handle;
64
+ int ret = clip_create(&init_info, &handle);
65
+ if (ret != clip_errcode_success)
66
+ {
67
+ printf("clip_create failed\n");
68
+ return -1;
69
+ }
70
+
71
+ clip_destroy(handle);
72
+
73
+ if (clip_devices.host.available)
74
+ {
75
+ clip_sys_deinit(host_device, -1);
76
+ }
77
+ else if (clip_devices.devices.count > 0)
78
+ {
79
+
80
+ clip_sys_deinit(axcl_device, 0);
81
+ }
82
+
83
+ return 0;
84
+ }
install/examples/test_match_by_text.cpp ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "clip.h"
2
+ #include "cmdline.hpp"
3
+ #include "timer.hpp"
4
+ #include <fstream>
5
+ #include <cstring>
6
+ #include <opencv2/opencv.hpp>
7
+
8
+ int main(int argc, char *argv[])
9
+ {
10
+ clip_devices_t clip_devices;
11
+ memset(&clip_devices, 0, sizeof(clip_devices_t));
12
+ if (clip_enum_devices(&clip_devices) != 0)
13
+ {
14
+ printf("enum devices failed\n");
15
+ return -1;
16
+ }
17
+
18
+ if (clip_devices.host.available)
19
+ {
20
+ clip_sys_init(host_device, -1);
21
+ }
22
+ else if (clip_devices.devices.count > 0)
23
+ {
24
+ clip_sys_init(axcl_device, 0);
25
+ }
26
+ else
27
+ {
28
+ printf("no device available\n");
29
+ return -1;
30
+ }
31
+
32
+ clip_init_t init_info;
33
+ memset(&init_info, 0, sizeof(init_info));
34
+
35
+ cmdline::parser parser;
36
+ parser.add<std::string>("ienc", 0, "encoder model(onnx model or axmodel)", true, "cnclip/cnclip_vit_l14_336px_vision_u16u8.axmodel");
37
+ parser.add<std::string>("tenc", 0, "text encoder model(onnx model or axmodel)", true, "cnclip/cnclip_vit_l14_336px_text_u16.axmodel");
38
+ parser.add<std::string>("vocab", 'v', "vocab path", true, "cnclip/cn_vocab.txt");
39
+ parser.add<int>("language", 'l', "language choose, 0:english 1:chinese", false, 1);
40
+ parser.add<std::string>("db_path", 'd', "db path", false, "clip_feat_db");
41
+
42
+ parser.add<std::string>("image", 'i', "image folder(jpg png etc....)", true);
43
+ parser.add<std::string>("text", 't', "text or txt file", true);
44
+ parser.parse_check(argc, argv);
45
+
46
+ sprintf(init_info.image_encoder_path, "%s", parser.get<std::string>("ienc").c_str());
47
+ sprintf(init_info.text_encoder_path, "%s", parser.get<std::string>("tenc").c_str());
48
+ sprintf(init_info.tokenizer_path, "%s", parser.get<std::string>("vocab").c_str());
49
+ init_info.isCN = parser.get<int>("language");
50
+ sprintf(init_info.db_path, "%s", parser.get<std::string>("db_path").c_str());
51
+
52
+ printf("image_encoder_path: %s\n", init_info.image_encoder_path);
53
+ printf("text_encoder_path: %s\n", init_info.text_encoder_path);
54
+ printf("tokenizer_path: %s\n", init_info.tokenizer_path);
55
+ printf("isCN: %d\n", init_info.isCN);
56
+ printf("db_path: %s\n", init_info.db_path);
57
+
58
+ if (clip_devices.host.available)
59
+ {
60
+ init_info.dev_type = host_device;
61
+ }
62
+ else if (clip_devices.devices.count > 0)
63
+ {
64
+ init_info.dev_type = axcl_device;
65
+ init_info.devid = 0;
66
+ }
67
+
68
+ clip_handle_t handle;
69
+ int ret = clip_create(&init_info, &handle);
70
+ if (ret != clip_errcode_success)
71
+ {
72
+ printf("clip_create failed\n");
73
+ return -1;
74
+ }
75
+
76
+ std::string image_src = parser.get<std::string>("image");
77
+ std::string text = parser.get<std::string>("text");
78
+
79
+ std::vector<std::string> image_paths;
80
+ cv::glob(image_src + "/*.*", image_paths);
81
+
82
+ for (size_t i = 0; i < image_paths.size(); i++)
83
+ {
84
+ std::string image_path = image_paths[i];
85
+ std::string image_name = image_path.substr(image_path.find_last_of("/") + 1);
86
+ char key[CLIP_KEY_MAX_LEN];
87
+ sprintf(key, "%s", image_name.c_str());
88
+ if (clip_contain(handle, key))
89
+ {
90
+ // printf("%s is exist %04ld/%04ld\n", key, i, image_paths.size());
91
+ continue;
92
+ }
93
+
94
+ cv::Mat src = cv::imread(image_path);
95
+ cv::cvtColor(src, src, cv::COLOR_BGR2RGB);
96
+ clip_image_t image;
97
+ image.data = src.data;
98
+ image.width = src.cols;
99
+ image.height = src.rows;
100
+ image.channels = src.channels();
101
+ image.stride = src.step;
102
+
103
+ timer t;
104
+ clip_add(handle, key, &image, 0);
105
+ // printf("add image %s %04ld/%04ld %6.2fms\n", image_name.c_str(), i, image_paths.size(), t.cost());
106
+ }
107
+ int topk = 10;
108
+ std::vector<clip_result_item_t> results(topk);
109
+ timer t;
110
+ clip_match_text(handle, text.c_str(), results.data(), topk);
111
+ printf("match text \"%s\" %6.2fms\n", text.c_str(), t.cost());
112
+ printf("|%32s | %6s|\n", "key", "score");
113
+ for (size_t i = 0; i < results.size(); i++)
114
+ {
115
+ printf("|%32s | %6.2f|\n", results[i].key, results[i].score);
116
+ }
117
+
118
+ clip_destroy(handle);
119
+
120
+ if (clip_devices.host.available)
121
+ {
122
+ clip_sys_deinit(host_device, -1);
123
+ }
124
+ else if (clip_devices.devices.count > 0)
125
+ {
126
+
127
+ clip_sys_deinit(axcl_device, 0);
128
+ }
129
+
130
+ return 0;
131
+ }
install/examples/timer.hpp ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * AXERA is pleased to support the open source community by making ax-samples available.
3
+ *
4
+ * Copyright (c) 2022, AXERA Semiconductor (Shanghai) Co., Ltd. All rights reserved.
5
+ *
6
+ * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
7
+ * in compliance with the License. You may obtain a copy of the License at
8
+ *
9
+ * https://opensource.org/licenses/BSD-3-Clause
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software distributed
12
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
13
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
14
+ * specific language governing permissions and limitations under the License.
15
+ */
16
+
17
+ /*
18
+ * Author: ls.wang
19
+ */
20
+
21
+ #pragma once
22
+
23
+ #include <chrono>
24
+
25
+ class timer
26
+ {
27
+ private:
28
+ std::chrono::system_clock::time_point start_time, end_time;
29
+
30
+ public:
31
+ timer()
32
+ {
33
+ start();
34
+ }
35
+
36
+ void start()
37
+ {
38
+ stop();
39
+ this->start_time = this->end_time;
40
+ }
41
+
42
+ void stop()
43
+ {
44
+ #ifdef _MSC_VER
45
+ this->end_time = std::chrono::system_clock::now();
46
+ #else
47
+ this->end_time = std::chrono::high_resolution_clock::now();
48
+ #endif
49
+ }
50
+
51
+ float cost()
52
+ {
53
+ if (this->end_time <= this->start_time)
54
+ {
55
+ this->stop();
56
+ }
57
+
58
+ auto ms = std::chrono::duration_cast<std::chrono::microseconds>(this->end_time - this->start_time).count();
59
+ return static_cast<float>(ms) / 1000.f;
60
+ }
61
+ };
install/include/clip.h ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef __CLIP_H__
2
+ #define __CLIP_H__
3
+
4
+ #if defined(__cplusplus)
5
+ extern "C"
6
+ {
7
+ #endif
8
+ #define CLIP_DEVICES_COUNT 16
9
+ #define CLIP_VERSION_LEN 32
10
+ #define CLIP_KEY_MAX_LEN 64
11
+ #define CLIP_PATH_LEN 128
12
+
13
+ typedef enum
14
+ {
15
+ clip_errcode_failed = -1,
16
+ clip_errcode_success = 0,
17
+
18
+ clip_errcode_invalid_ptr,
19
+ clip_errcode_sysinit_failed,
20
+ clip_errcode_sysdeinit_failed,
21
+ clip_errcode_axcl_sysinit_failed,
22
+ clip_errcode_axcl_sysdeinit_failed,
23
+
24
+ clip_errcode_create_failed = 0x10000,
25
+ clip_errcode_create_failed_sys,
26
+ clip_errcode_create_failed_ienc,
27
+ clip_errcode_create_failed_tenc,
28
+ clip_errcode_create_failed_vocab,
29
+ clip_errcode_create_failed_db,
30
+
31
+ clip_errcode_destroy_failed = 0x20000,
32
+
33
+ clip_errcode_add_failed = 0x30000,
34
+ clip_errcode_add_failed_key_exist,
35
+ clip_errcode_add_failed_encode_image,
36
+ clip_errcode_add_failed_push_db,
37
+
38
+ clip_errcode_remove_failed = 0x40000,
39
+ clip_errcode_remove_failed_key_not_exist,
40
+ clip_errcode_remove_failed_del_db,
41
+
42
+ clip_errcode_match_failed = 0x50000,
43
+ clip_errcode_match_failed_encode_text,
44
+ clip_errcode_match_failed_encode_image,
45
+ } clip_errcode_e;
46
+
47
+ typedef enum
48
+ {
49
+ unknown_device = 0,
50
+ host_device = 1,
51
+ axcl_device = 2
52
+ } clip_devive_e;
53
+
54
+ typedef void *clip_handle_t;
55
+
56
+ typedef struct
57
+ {
58
+ struct
59
+ {
60
+ char available;
61
+ char version[CLIP_VERSION_LEN];
62
+ struct
63
+ {
64
+ int remain;
65
+ int total;
66
+ } mem_info;
67
+ } host;
68
+
69
+ struct
70
+ {
71
+ char host_version[CLIP_VERSION_LEN];
72
+ char dev_version[CLIP_VERSION_LEN];
73
+ unsigned char count;
74
+ struct
75
+ {
76
+ int temp;
77
+ int cpu_usage;
78
+ int npu_usage;
79
+ struct
80
+ {
81
+ int remain;
82
+ int total;
83
+ } mem_info;
84
+ } devices_info[CLIP_DEVICES_COUNT];
85
+
86
+ } devices;
87
+ } clip_devices_t;
88
+
89
+ typedef struct
90
+ {
91
+ clip_devive_e dev_type; // Device type
92
+ char devid; // axcl device ID
93
+ char text_encoder_path[CLIP_PATH_LEN]; // Text encoder model path
94
+ char image_encoder_path[CLIP_PATH_LEN]; // Image encoder model path
95
+ char tokenizer_path[CLIP_PATH_LEN]; // Tokenizer model path
96
+ char isCN; // Whether it's a Chinese model (0: English, 1: Chinese)
97
+ char db_path[CLIP_PATH_LEN]; // Database path (if empty path is specified, a folder will be created)
98
+ } clip_init_t;
99
+
100
+ typedef struct
101
+ {
102
+ unsigned char *data;
103
+ int width;
104
+ int height;
105
+ int channels;
106
+ int stride;
107
+ } clip_image_t;
108
+
109
+ typedef struct
110
+ {
111
+ char key[CLIP_KEY_MAX_LEN];
112
+ float score;
113
+ } clip_result_item_t;
114
+
115
+ /**
116
+ * @brief Enumerate available devices in the current system
117
+ * @param devices Pointer to device information structure
118
+ * @return int Returns 0 on success, -1 on failure
119
+ */
120
+ int clip_enum_devices(clip_devices_t *devices);
121
+
122
+ /**
123
+ * @brief Initialize CLIP system resources
124
+ * @param dev_type Device type
125
+ * @param devid Device ID
126
+ * @return clip_errcode_e Returns 0 on success, error codes see clip_errcode_e
127
+ */
128
+ int clip_sys_init(clip_devive_e dev_type, char devid);
129
+
130
+ /**
131
+ * @brief Deinitialize CLIP system resources
132
+ * @param dev_type Device type
133
+ * @param devid Device ID
134
+ * @return clip_errcode_e Returns 0 on success, error codes see clip_errcode_e
135
+ */
136
+ int clip_sys_deinit(clip_devive_e dev_type, char devid);
137
+
138
+ /**
139
+ * @brief Create CLIP handle
140
+ * @param init_info Pointer to initialization information structure
141
+ * @param handle Handle pointer
142
+ * @return clip_errcode_e Returns 0 on success, error codes see clip_errcode_e
143
+ */
144
+ int clip_create(clip_init_t *init_info, clip_handle_t *handle);
145
+
146
+ /**
147
+ * @brief Destroy CLIP handle
148
+ * @param handle Handle
149
+ * @return clip_errcode_e Returns 0 on success, error codes see clip_errcode_e
150
+ */
151
+ int clip_destroy(clip_handle_t handle);
152
+
153
+ /**
154
+ * @brief Add image to CLIP database
155
+ * @param handle Handle
156
+ * @param key Image key
157
+ * @param image Pointer to image structure
158
+ * @param overwrite Whether to overwrite
159
+ * @return clip_errcode_e Returns 0 on success, error codes see clip_errcode_e
160
+ */
161
+ int clip_add(clip_handle_t handle, char key[CLIP_KEY_MAX_LEN], clip_image_t *image, char overwrite);
162
+
163
+ /**
164
+ * @brief Remove image from CLIP database
165
+ * @param handle Handle
166
+ * @param key Image key
167
+ * @return clip_errcode_e Returns 0 on success, error codes see clip_errcode_e
168
+ */
169
+ int clip_remove(clip_handle_t handle, char key[CLIP_KEY_MAX_LEN]);
170
+
171
+ /**
172
+ * @brief Check if image exists in CLIP database
173
+ * @param handle Handle
174
+ * @param key Image key
175
+ * @return clip_errcode_e Returns 0 on success, error codes see clip_errcode_e
176
+ */
177
+ int clip_contain(clip_handle_t handle, char key[CLIP_KEY_MAX_LEN]);
178
+
179
+ /**
180
+ * @brief Text match CLIP database images (softmax)
181
+ * @param handle Handle
182
+ * @param text Text
183
+ * @param results Pointer to result structure
184
+ * @param top_k Top k results
185
+ * @return clip_errcode_e Returns 0 on success, error codes see clip_errcode_e
186
+ */
187
+ int clip_match_text(clip_handle_t handle, const char *text, clip_result_item_t *results, int top_k);
188
+
189
+ /**
190
+ * @brief Image match CLIP database images (cosine similarity)
191
+ * @param handle Handle
192
+ * @param image Pointer to image structure
193
+ * @param results Pointer to result structure
194
+ * @param top_k Top k results
195
+ * @return clip_errcode_e Returns 0 on success, error codes see clip_errcode_e
196
+ */
197
+ int clip_match_image(clip_handle_t handle, clip_image_t *image, clip_result_item_t *results, int top_k);
198
+
199
+ #if defined(__cplusplus)
200
+ }
201
+ #endif
202
+
203
+ #endif // __CLIP_H__
install/lib/aarch64/libclip.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d26c8940d14c0d749ece83b4be6ed13eb86487c31acb0c7591fffb6e21fe3ad
3
+ size 4309856
pyclip/example.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pyclip import Clip, enum_devices, sys_init, sys_deinit, ClipDeviceType
3
+ import cv2
4
+ import glob
5
+ import argparse
6
+ import tqdm
7
+
8
+ if __name__ == '__main__':
9
+ parser = argparse.ArgumentParser()
10
+ parser.add_argument('--ienc', type=str, default='cnclip/cnclip_vit_l14_336px_vision_u16u8.axmodel')
11
+ parser.add_argument('--tenc', type=str, default='cnclip/cnclip_vit_l14_336px_text_u16.axmodel')
12
+ parser.add_argument('--vocab', type=str, default='cnclip/cn_vocab.txt')
13
+ parser.add_argument('--isCN', type=int, default=1)
14
+ parser.add_argument('--db_path', type=str, default='clip_feat_db_coco')
15
+ parser.add_argument('--image_folder', type=str, default='coco_1000')
16
+ args = parser.parse_args()
17
+
18
+ image_folder = args.image_folder
19
+
20
+ # 枚举设备
21
+ print("可用设备:", enum_devices())
22
+
23
+ # 初始化系统
24
+ sys_init(ClipDeviceType.axcl_device, 0)
25
+
26
+ try:
27
+ # 创建CLIP实例
28
+ clip = Clip({
29
+ 'text_encoder_path': args.tenc,
30
+ 'image_encoder_path': args.ienc,
31
+ 'tokenizer_path': args.vocab,
32
+ 'db_path': args.db_path,
33
+ 'isCN': args.isCN
34
+ })
35
+
36
+
37
+ # 添加图像
38
+ image_files = glob.glob(os.path.join(image_folder, '*.jpg'))
39
+ for image_file in tqdm.tqdm(image_files):
40
+ img = cv2.imread(image_file)
41
+ cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
42
+ filename = os.path.basename(image_file)
43
+ clip.add_image(filename, img)
44
+
45
+ # 文本匹配
46
+ results = clip.match_text('dog', top_k=10)
47
+ print("匹配结果:", results)
48
+
49
+ finally:
50
+ # 反初始化系统
51
+ sys_deinit(ClipDeviceType.axcl_device, 0)
pyclip/gradio_example.png ADDED

Git LFS Details

  • SHA256: 5462a8506d2c6c335bcc778486b82ab8665958d711545182429addc3565f7fe1
  • Pointer size: 132 Bytes
  • Size of remote file: 1.53 MB
pyclip/gradio_example.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from pyclip import Clip, enum_devices, sys_init, sys_deinit, ClipDeviceType
4
+ import cv2
5
+ import glob
6
+ from PIL import Image
7
+ import tqdm
8
+ import argparse
9
+
10
+ if __name__ == '__main__':
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument('--ienc', type=str, default='cnclip/cnclip_vit_l14_336px_vision_u16u8.axmodel')
13
+ parser.add_argument('--tenc', type=str, default='cnclip/cnclip_vit_l14_336px_text_u16.axmodel')
14
+ parser.add_argument('--vocab', type=str, default='cnclip/cn_vocab.txt')
15
+ parser.add_argument('--isCN', type=int, default=1)
16
+ parser.add_argument('--db_path', type=str, default='clip_feat_db_coco')
17
+ parser.add_argument('--image_folder', type=str, default='coco_1000')
18
+ args = parser.parse_args()
19
+
20
+ image_folder = args.image_folder
21
+
22
+ # 初始化
23
+ print("可用设备:", enum_devices())
24
+ sys_init(ClipDeviceType.axcl_device, 0)
25
+
26
+ clip = Clip({
27
+ 'text_encoder_path': args.tenc,
28
+ 'image_encoder_path': args.ienc,
29
+ 'tokenizer_path': args.vocab,
30
+ 'db_path': args.db_path,
31
+ 'isCN': args.isCN
32
+ })
33
+
34
+
35
+ # 加载图片数据库(只做一次)
36
+ image_files = glob.glob(os.path.join(image_folder, '*.jpg'))
37
+ for image_file in tqdm.tqdm(image_files):
38
+ filename = os.path.basename(image_file)
39
+ if clip.contains_image(filename) == 1:
40
+ continue
41
+ img = cv2.imread(image_file)
42
+ cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
43
+ clip.add_image(filename, img)
44
+
45
+ # 工具函数:图片转 base64
46
+ def img_to_pil(img_path):
47
+ return Image.open(img_path).convert("RGB")
48
+
49
+ # 主搜索函数
50
+ def search_images(query, top_k):
51
+ results = clip.match_text(query, top_k=top_k)
52
+ images = []
53
+ for filename, score in results:
54
+ img_path = os.path.join(image_folder, filename)
55
+ if os.path.exists(img_path):
56
+ img = img_to_pil(img_path)
57
+ images.append((img, f"{filename} Score: {score:.4f}"))
58
+ return images
59
+
60
+
61
+ # Gradio界面
62
+ with gr.Blocks() as demo:
63
+ gr.Markdown("# 🔍 文搜图 Demo")
64
+
65
+ with gr.Row():
66
+ query_input = gr.Textbox(label="请输入文本查询")
67
+ topk_input = gr.Number(value=25, precision=0, label="Top-K")
68
+ search_btn = gr.Button("搜图")
69
+
70
+ gallery = gr.Gallery(label="匹配结果", show_label=True, columns=4)
71
+
72
+ search_btn.click(fn=search_images, inputs=[query_input, topk_input], outputs=gallery)
73
+
74
+ # 启动
75
+ ip = "0.0.0.0"
76
+ demo.launch(server_name=ip, server_port=7860)
77
+
78
+ # 关闭系统(你可加信号处理来自动关闭)
79
+ import atexit
80
+ atexit.register(lambda: sys_deinit(ClipDeviceType.axcl_device, 0))
pyclip/pyclip.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctypes
2
+ import os
3
+ from typing import List, Tuple, Optional
4
+ import numpy as np
5
+ import platform
6
+
7
+ base_dir = os.path.dirname(__file__)
8
+ arch = platform.machine()
9
+
10
+ if arch == 'x86_64':
11
+ arch_dir = 'x86_64'
12
+ elif arch in ('aarch64', 'arm64'):
13
+ arch_dir = 'aarch64'
14
+ else:
15
+ raise RuntimeError(f"Unsupported architecture: {arch}")
16
+
17
+ lib_paths = [
18
+ os.path.join(base_dir, arch_dir, 'libclip.so'),
19
+ os.path.join(base_dir, 'libclip.so')
20
+ ]
21
+
22
+ last_error = None
23
+ diagnostic_shown = set()
24
+
25
+ for lib_path in lib_paths:
26
+ try:
27
+ print(f"Trying to load: {lib_path}")
28
+ _lib = ctypes.CDLL(lib_path)
29
+ print(f"✅ Successfully loaded: {lib_path}")
30
+ break
31
+ except OSError as e:
32
+ last_error = e
33
+ err_str = str(e)
34
+ print(f"\n❌ Failed to load: {lib_path}")
35
+ print(f" {err_str}")
36
+
37
+ # Only show GLIBCXX tip once
38
+ if "GLIBCXX" in err_str and "not found" in err_str:
39
+ if "missing_glibcxx" not in diagnostic_shown:
40
+ diagnostic_shown.add("missing_glibcxx")
41
+ print("🔍 Detected missing GLIBCXX version in libstdc++.so.6")
42
+ print("💡 This usually happens when your environment (like Conda) uses an older libstdc++")
43
+ print(f"👉 Try running with system libstdc++ preloaded:")
44
+ print(f" export LD_PRELOAD=/usr/lib/{arch_dir}-linux-gnu/libstdc++.so.6\n")
45
+ elif "No such file" in err_str:
46
+ if "file_not_found" not in diagnostic_shown:
47
+ diagnostic_shown.add("file_not_found")
48
+ print("🔍 File not found. Please verify that libclip.so exists and the path is correct.\n")
49
+ elif "wrong ELF class" in err_str:
50
+ if "elf_mismatch" not in diagnostic_shown:
51
+ diagnostic_shown.add("elf_mismatch")
52
+ print("🔍 ELF class mismatch — likely due to architecture conflict (e.g., loading x86_64 .so on aarch64).")
53
+ print(f"👉 Run `file {lib_path}` to verify the binary architecture.\n")
54
+ else:
55
+ if "generic_error" not in diagnostic_shown:
56
+ diagnostic_shown.add("generic_error")
57
+ print("📎 Tip: Use `ldd` to inspect missing dependencies:")
58
+ print(f" ldd {lib_path}\n")
59
+ else:
60
+ raise RuntimeError(f"\n❗ Failed to load libclip.so.\nLast error:\n{last_error}")
61
+
62
+
63
+ # 定义枚举类型
64
+ class ClipDeviceType(ctypes.c_int):
65
+ unknown_device = 0
66
+ host_device = 1
67
+ axcl_device = 2
68
+
69
+ # 定义结构体
70
+ class ClipMemInfo(ctypes.Structure):
71
+ _fields_ = [
72
+ ('remain', ctypes.c_int),
73
+ ('total', ctypes.c_int)
74
+ ]
75
+
76
+ class ClipHostInfo(ctypes.Structure):
77
+ _fields_ = [
78
+ ('available', ctypes.c_char),
79
+ ('version', ctypes.c_char * 32),
80
+ ('mem_info', ClipMemInfo)
81
+ ]
82
+
83
+ class ClipDeviceInfo(ctypes.Structure):
84
+ _fields_ = [
85
+ ('temp', ctypes.c_int),
86
+ ('cpu_usage', ctypes.c_int),
87
+ ('npu_usage', ctypes.c_int),
88
+ ('mem_info', ClipMemInfo)
89
+ ]
90
+
91
+ class ClipDevices(ctypes.Structure):
92
+ _fields_ = [
93
+ ('host', ClipHostInfo),
94
+ ('host_version', ctypes.c_char * 32),
95
+ ('dev_version', ctypes.c_char * 32),
96
+ ('count', ctypes.c_ubyte),
97
+ ('devices_info', ClipDeviceInfo * 16)
98
+ ]
99
+
100
+ class ClipInit(ctypes.Structure):
101
+ _fields_ = [
102
+ ('dev_type', ClipDeviceType),
103
+ ('devid', ctypes.c_char),
104
+ ('text_encoder_path', ctypes.c_char * 128),
105
+ ('image_encoder_path', ctypes.c_char * 128),
106
+ ('tokenizer_path', ctypes.c_char * 128),
107
+ ('isCN', ctypes.c_char),
108
+ ('db_path', ctypes.c_char * 128)
109
+ ]
110
+
111
+ class ClipImage(ctypes.Structure):
112
+ _fields_ = [
113
+ ('data', ctypes.POINTER(ctypes.c_ubyte)),
114
+ ('width', ctypes.c_int),
115
+ ('height', ctypes.c_int),
116
+ ('channels', ctypes.c_int),
117
+ ('stride', ctypes.c_int)
118
+ ]
119
+
120
+ class ClipResultItem(ctypes.Structure):
121
+ _fields_ = [
122
+ ('key', ctypes.c_char * 64),
123
+ ('score', ctypes.c_float)
124
+ ]
125
+
126
+ # 设置函数参数和返回类型
127
+ _lib.clip_enum_devices.argtypes = [ctypes.POINTER(ClipDevices)]
128
+ _lib.clip_enum_devices.restype = ctypes.c_int
129
+
130
+ _lib.clip_sys_init.argtypes = [ClipDeviceType, ctypes.c_char]
131
+ _lib.clip_sys_init.restype = ctypes.c_int
132
+
133
+ _lib.clip_sys_deinit.argtypes = [ClipDeviceType, ctypes.c_char]
134
+ _lib.clip_sys_deinit.restype = ctypes.c_int
135
+
136
+ _lib.clip_create.argtypes = [ctypes.POINTER(ClipInit), ctypes.POINTER(ctypes.c_void_p)]
137
+ _lib.clip_create.restype = ctypes.c_int
138
+
139
+ _lib.clip_destroy.argtypes = [ctypes.c_void_p]
140
+ _lib.clip_destroy.restype = ctypes.c_int
141
+
142
+ _lib.clip_add.argtypes = [ctypes.c_void_p, ctypes.c_char_p, ctypes.POINTER(ClipImage), ctypes.c_char]
143
+ _lib.clip_add.restype = ctypes.c_int
144
+
145
+ _lib.clip_remove.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
146
+ _lib.clip_remove.restype = ctypes.c_int
147
+
148
+ _lib.clip_contain.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
149
+ _lib.clip_contain.restype = ctypes.c_int
150
+
151
+ _lib.clip_match_text.argtypes = [ctypes.c_void_p, ctypes.c_char_p, ctypes.POINTER(ClipResultItem), ctypes.c_int]
152
+ _lib.clip_match_text.restype = ctypes.c_int
153
+
154
+ _lib.clip_match_image.argtypes = [ctypes.c_void_p, ctypes.POINTER(ClipImage), ctypes.POINTER(ClipResultItem), ctypes.c_int]
155
+ _lib.clip_match_image.restype = ctypes.c_int
156
+
157
+ class ClipError(Exception):
158
+ pass
159
+
160
+ def check_error(code: int) -> None:
161
+ if code != 0:
162
+ raise ClipError(f"CLIP API错误: {code}")
163
+
164
+ class Clip:
165
+ def __init__(self, init_info: dict):
166
+ self.handle = None
167
+ self.init_info = ClipInit()
168
+
169
+ # 设置初始化参数
170
+ self.init_info.dev_type = init_info.get('dev_type', ClipDeviceType.axcl_device)
171
+ self.init_info.devid = init_info.get('devid', 0)
172
+ self.init_info.isCN = init_info.get('isCN', 1)
173
+
174
+ # 设置路径
175
+ for path_name in ['text_encoder_path', 'image_encoder_path', 'tokenizer_path', 'db_path']:
176
+ if path_name in init_info:
177
+ setattr(self.init_info, path_name, init_info[path_name].encode('utf-8'))
178
+
179
+ # 创建CLIP实例
180
+ handle = ctypes.c_void_p()
181
+ check_error(_lib.clip_create(ctypes.byref(self.init_info), ctypes.byref(handle)))
182
+ self.handle = handle
183
+
184
+ def __del__(self):
185
+ if self.handle:
186
+ _lib.clip_destroy(self.handle)
187
+
188
+ def add_image(self, key: str, image_data: np.ndarray) -> None:
189
+ if self.contains_image(key):
190
+ return
191
+ image = ClipImage()
192
+ image.data = ctypes.cast(image_data.ctypes.data, ctypes.POINTER(ctypes.c_ubyte))
193
+ image.width = image_data.shape[1]
194
+ image.height = image_data.shape[0]
195
+ image.channels = image_data.shape[2]
196
+ image.stride = image_data.shape[1] * image_data.shape[2]
197
+
198
+ check_error(_lib.clip_add(self.handle, key.encode('utf-8'), ctypes.byref(image), 0))
199
+
200
+ def remove_image(self, key: str) -> None:
201
+ check_error(_lib.clip_remove(self.handle, key.encode('utf-8')))
202
+
203
+ def contains_image(self, key: str) -> bool:
204
+ return _lib.clip_contain(self.handle, key.encode('utf-8')) == 1
205
+
206
+ def match_text(self, text: str, top_k: int = 10) -> List[Tuple[str, float]]:
207
+ results = (ClipResultItem * top_k)()
208
+ check_error(_lib.clip_match_text(self.handle, text.encode('utf-8'), results, top_k))
209
+
210
+ return [(item.key.decode('utf-8'), item.score) for item in results]
211
+
212
+ def match_image(self, image_data: bytes, width: int, height: int, channels: int = 3, top_k: int = 10) -> List[Tuple[str, float]]:
213
+ image = ClipImage()
214
+ image.data = ctypes.cast(ctypes.create_string_buffer(image_data), ctypes.POINTER(ctypes.c_ubyte))
215
+ image.width = width
216
+ image.height = height
217
+ image.channels = channels
218
+ image.stride = width * channels
219
+
220
+ results = (ClipResultItem * top_k)()
221
+ check_error(_lib.clip_match_image(self.handle, ctypes.byref(image), ctypes.byref(results), top_k))
222
+
223
+ return [(item.key.decode('utf-8'), item.score) for item in results]
224
+
225
+ def enum_devices() -> dict:
226
+ devices = ClipDevices()
227
+ check_error(_lib.clip_enum_devices(ctypes.byref(devices)))
228
+
229
+ return {
230
+ 'host': {
231
+ 'available': bool(devices.host.available),
232
+ 'version': devices.host.version.decode('utf-8'),
233
+ 'mem_info': {
234
+ 'remain': devices.host.mem_info.remain,
235
+ 'total': devices.host.mem_info.total
236
+ }
237
+ },
238
+ 'devices': {
239
+ 'host_version': devices.host_version.decode('utf-8'),
240
+ 'dev_version': devices.dev_version.decode('utf-8'),
241
+ 'count': devices.count,
242
+ 'devices_info': [{
243
+ 'temp': dev.temp,
244
+ 'cpu_usage': dev.cpu_usage,
245
+ 'npu_usage': dev.npu_usage,
246
+ 'mem_info': {
247
+ 'remain': dev.mem_info.remain,
248
+ 'total': dev.mem_info.total
249
+ }
250
+ } for dev in devices.devices_info[:devices.count]]
251
+ }
252
+ }
253
+
254
+
255
+ def sys_init(dev_type: ClipDeviceType = ClipDeviceType.axcl_device, devid: int = 0) -> None:
256
+ check_error(_lib.clip_sys_init(dev_type, devid))
257
+
258
+
259
+ def sys_deinit(dev_type: ClipDeviceType = ClipDeviceType.axcl_device, devid: int = 0) -> None:
260
+ check_error(_lib.clip_sys_deinit(dev_type, devid))
pyclip/requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ opencv-python
3
+ tqdm
4
+ Pillow