209 lines
3.3 KiB
Plaintext
209 lines
3.3 KiB
Plaintext
name: "fastertransformer"
|
|
backend: "fastertransformer"
|
|
default_model_filename: "gptneox"
|
|
max_batch_size: 1024
|
|
|
|
model_transaction_policy {
|
|
decoupled: False
|
|
}
|
|
|
|
input [
|
|
{
|
|
name: "input_ids"
|
|
data_type: TYPE_UINT32
|
|
dims: [ -1 ]
|
|
},
|
|
{
|
|
name: "start_id"
|
|
data_type: TYPE_UINT32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "end_id"
|
|
data_type: TYPE_UINT32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "input_lengths"
|
|
data_type: TYPE_UINT32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
},
|
|
{
|
|
name: "request_output_len"
|
|
data_type: TYPE_UINT32
|
|
dims: [ -1 ]
|
|
},
|
|
{
|
|
name: "runtime_top_k"
|
|
data_type: TYPE_UINT32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "runtime_top_p"
|
|
data_type: TYPE_FP32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "beam_search_diversity_rate"
|
|
data_type: TYPE_FP32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "temperature"
|
|
data_type: TYPE_FP32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "len_penalty"
|
|
data_type: TYPE_FP32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "repetition_penalty"
|
|
data_type: TYPE_FP32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "random_seed"
|
|
data_type: TYPE_UINT64
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "is_return_log_probs"
|
|
data_type: TYPE_BOOL
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "beam_width"
|
|
data_type: TYPE_UINT32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "bad_words_list"
|
|
data_type: TYPE_INT32
|
|
dims: [ 2, -1 ]
|
|
optional: true
|
|
},
|
|
{
|
|
name: "stop_words_list"
|
|
data_type: TYPE_INT32
|
|
dims: [ 2, -1 ]
|
|
optional: true
|
|
},
|
|
{
|
|
name: "prompt_learning_task_name_ids"
|
|
data_type: TYPE_UINT32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "top_p_decay"
|
|
data_type: TYPE_FP32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "top_p_min"
|
|
data_type: TYPE_FP32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
},
|
|
{
|
|
name: "top_p_reset_ids"
|
|
data_type: TYPE_UINT32
|
|
dims: [ 1 ]
|
|
reshape: { shape: [ ] }
|
|
optional: true
|
|
}
|
|
]
|
|
output [
|
|
{
|
|
name: "output_ids"
|
|
data_type: TYPE_UINT32
|
|
dims: [ -1, -1 ]
|
|
},
|
|
{
|
|
name: "sequence_length"
|
|
data_type: TYPE_UINT32
|
|
dims: [ -1 ]
|
|
},
|
|
{
|
|
name: "cum_log_probs"
|
|
data_type: TYPE_FP32
|
|
dims: [ -1 ]
|
|
},
|
|
{
|
|
name: "output_log_probs"
|
|
data_type: TYPE_FP32
|
|
dims: [ -1, -1 ]
|
|
}
|
|
]
|
|
instance_group [
|
|
{
|
|
count: 1
|
|
kind: KIND_CPU
|
|
}
|
|
]
|
|
parameters {
|
|
key: "tensor_para_size"
|
|
value: {
|
|
string_value: "1"
|
|
}
|
|
}
|
|
parameters {
|
|
key: "pipeline_para_size"
|
|
value: {
|
|
string_value: "1"
|
|
}
|
|
}
|
|
parameters {
|
|
key: "data_type"
|
|
value: {
|
|
string_value: "fp32"
|
|
}
|
|
}
|
|
parameters {
|
|
key: "model_type"
|
|
value: {
|
|
string_value: "GPT-NeoX"
|
|
}
|
|
}
|
|
parameters {
|
|
key: "model_checkpoint_path"
|
|
value: {
|
|
string_value: "/model/fastertransformer/1/1-gpu"
|
|
}
|
|
}
|
|
parameters {
|
|
key: "enable_custom_all_reduce"
|
|
value: {
|
|
string_value: "0"
|
|
}
|
|
}
|