[gptneox] model_name = gptneox head_num = 12 size_per_head = 64 inter_size = 3072 num_layer = 12 rotary_embedding = 64 vocab_size = 50304 start_id = 0 end_id = 0 use_gptj_residual = 0 weight_data_type = fp32