phi-3.5-onnx-qnn / genai_config_removed_sliding_window.json
doberst's picture
Upload 20 files
7f81323 verified
{
"model": {
"bos_token_id": 1,
"context_length": 4096,
"decoder": {
"session_options": {
"log_id": "onnxruntime-genai",
"provider_options": [],
"log_severity_level": 0
},
"filename": "model.onnx",
"head_size": 96,
"hidden_size": 3072,
"inputs": {
"input_ids": "input_ids",
"attention_mask": "attention_mask_before_processor",
"position_ids": "position_ids",
"past_key_names": "past_key_%d_in",
"past_value_names": "past_value_%d_in"
},
"outputs": {
"logits": "logits_dequantized",
"present_key_names": "past_key_%d_out",
"present_value_names": "past_value_%d_out"
},
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pipeline": [
{
"position_processor": {
"filename": "position-processor.onnx",
"inputs": [
"attention_mask_before_processor",
"position_ids"
],
"outputs": [
"attention_mask_before_quantizer",
"position_ids_cos_before_quantizer",
"position_ids_sin_before_quantizer"
],
"session_options": {
"log_id": "onnxruntime-genai.position_processor",
"provider_options": [
{}
]
}
},
"quantizer": {
"filename": "quantizer.onnx",
"inputs": [
"attention_mask_before_quantizer",
"position_ids_cos_before_quantizer",
"position_ids_sin_before_quantizer"
],
"outputs": [
"attention_mask",
"position_ids_cos",
"position_ids_sin"
],
"session_options": {
"log_id": "onnxruntime-genai.quantizer",
"provider_options": [
{}
]
}
},
"prompt-processor-1": {
"filename": "ar128_cl4096_1_of_4_qnn_ctx.onnx",
"inputs": [
"input_ids",
"past_key_0_in",
"past_key_5_in",
"past_value_5_in",
"past_value_0_in",
"past_key_6_in",
"past_value_6_in",
"past_key_7_in",
"past_value_7_in",
"past_key_1_in",
"past_value_1_in",
"past_key_2_in",
"past_value_2_in",
"past_key_3_in",
"past_value_3_in",
"past_key_4_in",
"past_value_4_in",
"position_ids_cos",
"position_ids_sin",
"attention_mask"
],
"outputs": [
"past_value_0_out",
"past_key_0_out",
"past_value_1_out",
"past_key_1_out",
"past_value_2_out",
"past_key_2_out",
"past_value_3_out",
"past_key_3_out",
"past_value_4_out",
"past_key_4_out",
"past_value_5_out",
"past_key_5_out",
"past_value_6_out",
"past_key_6_out",
"past_value_7_out",
"past_key_7_out",
"_model_layers_7_Add_1_Add_output_0"
],
"session_options": {
"log_id": "onnxruntime-genai.pp1",
"provider_options": [
{
"qnn": {
"backend_path": "QnnHtp.dll",
"htp_performance_mode": "burst",
"enable_htp_shared_memory_allocator": "1",
"qnn_context_priority": "high"
}
}
]
},
"run_on_token_gen": false
},
"prompt-processor-2": {
"filename": "ar128_cl4096_2_of_4_qnn_ctx.onnx",
"inputs": [
"_model_layers_7_Add_1_Add_output_0",
"past_key_8_in",
"past_key_13_in",
"past_value_13_in",
"past_value_8_in",
"past_key_14_in",
"past_value_14_in",
"past_key_15_in",
"past_value_15_in",
"past_key_9_in",
"past_value_9_in",
"past_key_10_in",
"past_value_10_in",
"past_key_11_in",
"past_value_11_in",
"past_key_12_in",
"past_value_12_in",
"position_ids_cos",
"position_ids_sin",
"attention_mask"
],
"outputs": [
"past_value_8_out",
"past_key_8_out",
"past_value_9_out",
"past_key_9_out",
"past_value_10_out",
"past_key_10_out",
"past_value_11_out",
"past_key_11_out",
"past_value_12_out",
"past_key_12_out",
"past_value_13_out",
"past_key_13_out",
"past_value_14_out",
"past_key_14_out",
"past_value_15_out",
"past_key_15_out",
"_model_layers_15_Add_1_Add_output_0"
],
"session_options": {
"log_id": "onnxruntime-genai.pp2",
"provider_options": [
{
"qnn": {
"backend_path": "QnnHtp.dll",
"htp_performance_mode": "burst",
"enable_htp_shared_memory_allocator": "1",
"qnn_context_priority": "high"
}
}
]
},
"run_on_token_gen": false
},
"prompt-processor-3": {
"filename": "ar128_cl4096_3_of_4_qnn_ctx.onnx",
"inputs": [
"_model_layers_15_Add_1_Add_output_0",
"past_key_16_in",
"past_key_21_in",
"past_value_21_in",
"past_value_16_in",
"past_key_22_in",
"past_value_22_in",
"past_key_23_in",
"past_value_23_in",
"past_key_17_in",
"past_value_17_in",
"past_key_18_in",
"past_value_18_in",
"past_key_19_in",
"past_value_19_in",
"past_key_20_in",
"past_value_20_in",
"position_ids_cos",
"position_ids_sin",
"attention_mask"
],
"outputs": [
"past_value_16_out",
"past_key_16_out",
"past_value_17_out",
"past_key_17_out",
"past_value_18_out",
"past_key_18_out",
"past_value_19_out",
"past_key_19_out",
"past_value_20_out",
"past_key_20_out",
"past_value_21_out",
"past_key_21_out",
"past_value_22_out",
"past_key_22_out",
"past_value_23_out",
"past_key_23_out",
"_model_layers_23_Add_1_Add_output_0"
],
"session_options": {
"log_id": "onnxruntime-genai.pp3",
"provider_options": [
{
"qnn": {
"backend_path": "QnnHtp.dll",
"htp_performance_mode": "burst",
"enable_htp_shared_memory_allocator": "1",
"qnn_context_priority": "high"
}
}
]
},
"run_on_token_gen": false
},
"prompt-processor-4": {
"filename": "ar128_cl4096_4_of_4_qnn_ctx.onnx",
"inputs": [
"_model_layers_23_Add_1_Add_output_0",
"past_key_24_in",
"past_key_29_in",
"past_value_29_in",
"past_value_24_in",
"past_key_30_in",
"past_value_30_in",
"past_key_31_in",
"past_value_31_in",
"past_key_25_in",
"past_value_25_in",
"past_key_26_in",
"past_value_26_in",
"past_key_27_in",
"past_value_27_in",
"past_key_28_in",
"past_value_28_in",
"position_ids_cos",
"position_ids_sin",
"attention_mask"
],
"outputs": [
"past_value_24_out",
"past_key_24_out",
"past_value_25_out",
"past_key_25_out",
"past_value_26_out",
"past_key_26_out",
"past_value_27_out",
"past_key_27_out",
"past_value_28_out",
"past_key_28_out",
"past_value_29_out",
"past_key_29_out",
"past_value_30_out",
"past_key_30_out",
"past_value_31_out",
"past_key_31_out",
"logits"
],
"session_options": {
"log_id": "onnxruntime-genai.pp4",
"provider_options": [
{
"qnn": {
"backend_path": "QnnHtp.dll",
"htp_performance_mode": "burst",
"enable_htp_shared_memory_allocator": "1",
"qnn_context_priority": "high"
}
}
]
},
"run_on_token_gen": false
},
"token-generator-1": {
"filename": "ar1_cl4096_1_of_4_qnn_ctx.onnx",
"inputs": [
"input_ids",
"past_key_0_in",
"past_key_5_in",
"past_value_5_in",
"past_value_0_in",
"past_key_6_in",
"past_value_6_in",
"past_key_7_in",
"past_value_7_in",
"past_key_1_in",
"past_value_1_in",
"past_key_2_in",
"past_value_2_in",
"past_key_3_in",
"past_value_3_in",
"past_key_4_in",
"past_value_4_in",
"position_ids_cos",
"position_ids_sin",
"attention_mask"
],
"outputs": [
"past_value_0_out",
"past_key_0_out",
"past_value_1_out",
"past_key_1_out",
"past_value_2_out",
"past_key_2_out",
"past_value_3_out",
"past_key_3_out",
"past_value_4_out",
"past_key_4_out",
"past_value_5_out",
"past_key_5_out",
"past_value_6_out",
"past_key_6_out",
"past_value_7_out",
"past_key_7_out",
"_model_layers_7_Add_1_Add_output_0"
],
"session_options": {
"log_id": "onnxruntime-genai.tg1",
"provider_options": [
{
"qnn": {
"backend_path": "QnnHtp.dll",
"htp_performance_mode": "burst",
"enable_htp_shared_memory_allocator": "1",
"qnn_context_priority": "high"
}
}
]
},
"run_on_prompt": false
},
"token-generator-2": {
"filename": "ar1_cl4096_2_of_4_qnn_ctx.onnx",
"inputs": [
"_model_layers_7_Add_1_Add_output_0",
"past_key_8_in",
"past_key_13_in",
"past_value_13_in",
"past_value_8_in",
"past_key_14_in",
"past_value_14_in",
"past_key_15_in",
"past_value_15_in",
"past_key_9_in",
"past_value_9_in",
"past_key_10_in",
"past_value_10_in",
"past_key_11_in",
"past_value_11_in",
"past_key_12_in",
"past_value_12_in",
"position_ids_cos",
"position_ids_sin",
"attention_mask"
],
"outputs": [
"past_value_8_out",
"past_key_8_out",
"past_value_9_out",
"past_key_9_out",
"past_value_10_out",
"past_key_10_out",
"past_value_11_out",
"past_key_11_out",
"past_value_12_out",
"past_key_12_out",
"past_value_13_out",
"past_key_13_out",
"past_value_14_out",
"past_key_14_out",
"past_value_15_out",
"past_key_15_out",
"_model_layers_15_Add_1_Add_output_0"
],
"session_options": {
"log_id": "onnxruntime-genai.tg2",
"provider_options": [
{
"qnn": {
"backend_path": "QnnHtp.dll",
"htp_performance_mode": "burst",
"enable_htp_shared_memory_allocator": "1",
"qnn_context_priority": "high"
}
}
]
},
"run_on_prompt": false
},
"token-generator-3": {
"filename": "ar1_cl4096_3_of_4_qnn_ctx.onnx",
"inputs": [
"_model_layers_15_Add_1_Add_output_0",
"past_key_16_in",
"past_key_21_in",
"past_value_21_in",
"past_value_16_in",
"past_key_22_in",
"past_value_22_in",
"past_key_23_in",
"past_value_23_in",
"past_key_17_in",
"past_value_17_in",
"past_key_18_in",
"past_value_18_in",
"past_key_19_in",
"past_value_19_in",
"past_key_20_in",
"past_value_20_in",
"position_ids_cos",
"position_ids_sin",
"attention_mask"
],
"outputs": [
"past_value_16_out",
"past_key_16_out",
"past_value_17_out",
"past_key_17_out",
"past_value_18_out",
"past_key_18_out",
"past_value_19_out",
"past_key_19_out",
"past_value_20_out",
"past_key_20_out",
"past_value_21_out",
"past_key_21_out",
"past_value_22_out",
"past_key_22_out",
"past_value_23_out",
"past_key_23_out",
"_model_layers_23_Add_1_Add_output_0"
],
"session_options": {
"log_id": "onnxruntime-genai.tg3",
"provider_options": [
{
"qnn": {
"backend_path": "QnnHtp.dll",
"htp_performance_mode": "burst",
"enable_htp_shared_memory_allocator": "1",
"qnn_context_priority": "high"
}
}
]
},
"run_on_prompt": false
},
"token-generator-4": {
"filename": "ar1_cl4096_4_of_4_qnn_ctx.onnx",
"inputs": [
"_model_layers_23_Add_1_Add_output_0",
"past_key_24_in",
"past_key_29_in",
"past_value_29_in",
"past_value_24_in",
"past_key_30_in",
"past_value_30_in",
"past_key_31_in",
"past_value_31_in",
"past_key_25_in",
"past_value_25_in",
"past_key_26_in",
"past_value_26_in",
"past_key_27_in",
"past_value_27_in",
"past_key_28_in",
"past_value_28_in",
"position_ids_cos",
"position_ids_sin",
"attention_mask"
],
"outputs": [
"past_value_24_out",
"past_key_24_out",
"past_value_25_out",
"past_key_25_out",
"past_value_26_out",
"past_key_26_out",
"past_value_27_out",
"past_key_27_out",
"past_value_28_out",
"past_key_28_out",
"past_value_29_out",
"past_key_29_out",
"past_value_30_out",
"past_key_30_out",
"past_value_31_out",
"past_key_31_out",
"logits"
],
"session_options": {
"log_id": "onnxruntime-genai.tg4",
"provider_options": [
{
"qnn": {
"backend_path": "QnnHtp.dll",
"htp_performance_mode": "burst",
"enable_htp_shared_memory_allocator": "1",
"qnn_context_priority": "high"
}
}
]
},
"run_on_prompt": false
},
"dequantizer": {
"filename": "dequantizer.onnx",
"inputs": [
"logits"
],
"outputs": [
"logits_dequantized"
],
"session_options": {
"log_id": "onnxruntime-genai.dequantizer",
"provider_options": [
{}
]
}
}
}
]
},
"eos_token_id": [
32007,
32001,
32000,
2
],
"pad_token_id": 32000,
"type": "decoder-pipeline",
"vocab_size": 32064
},
"search": {
"diversity_penalty": 0.0,
"do_sample": true,
"early_stopping": true,
"length_penalty": 1.0,
"max_length": 2048,
"min_length": 0,
"no_repeat_ngram_size": 0,
"num_beams": 1,
"num_return_sequences": 1,
"past_present_share_buffer": true,
"repetition_penalty": 1.0,
"temperature": 0.6,
"top_k": 1,
"top_p": 1.0
}
}