87 lines
2.0 KiB
INI
87 lines
2.0 KiB
INI
[gpt]
|
|
saved_dir = out
|
|
in_file = hf-internal-testing/tiny-random-gptj
|
|
trained_gpu_num = 1
|
|
infer_gpu_num = 1
|
|
processes = 4
|
|
weight_data_type = fp32
|
|
vocab_size = 1000
|
|
n_positions = 512
|
|
n_embd = 32
|
|
n_layer = 5
|
|
n_head = 4
|
|
n_inner = None
|
|
rotary_dim = 4
|
|
activation_function = gelu_new
|
|
resid_pdrop = 0.0
|
|
embd_pdrop = 0.0
|
|
attn_pdrop = 0.0
|
|
layer_norm_epsilon = 1e-05
|
|
initializer_range = 0.02
|
|
use_cache = True
|
|
bos_token_id = 98
|
|
eos_token_id = 98
|
|
return_dict = True
|
|
output_hidden_states = False
|
|
output_attentions = False
|
|
torchscript = False
|
|
torch_dtype = None
|
|
use_bfloat16 = False
|
|
tf_legacy_loss = False
|
|
pruned_heads = {}
|
|
tie_word_embeddings = False
|
|
is_encoder_decoder = False
|
|
is_decoder = False
|
|
cross_attention_hidden_size = None
|
|
add_cross_attention = False
|
|
tie_encoder_decoder = False
|
|
max_length = 20
|
|
min_length = 0
|
|
do_sample = False
|
|
early_stopping = False
|
|
num_beams = 1
|
|
num_beam_groups = 1
|
|
diversity_penalty = 0.0
|
|
temperature = 1.0
|
|
top_k = 50
|
|
top_p = 1.0
|
|
typical_p = 1.0
|
|
repetition_penalty = 1.0
|
|
length_penalty = 1.0
|
|
no_repeat_ngram_size = 0
|
|
encoder_no_repeat_ngram_size = 0
|
|
bad_words_ids = None
|
|
num_return_sequences = 1
|
|
chunk_size_feed_forward = 0
|
|
output_scores = False
|
|
return_dict_in_generate = False
|
|
forced_bos_token_id = None
|
|
forced_eos_token_id = None
|
|
remove_invalid_values = False
|
|
exponential_decay_length_penalty = None
|
|
suppress_tokens = None
|
|
begin_suppress_tokens = None
|
|
architectures = None
|
|
finetuning_task = None
|
|
id2label = {0: 'LABEL_0', 1: 'LABEL_1'}
|
|
label2id = {'LABEL_0': 0, 'LABEL_1': 1}
|
|
tokenizer_class = None
|
|
prefix = None
|
|
pad_token_id = 98
|
|
sep_token_id = None
|
|
decoder_start_token_id = None
|
|
task_specific_params = None
|
|
problem_type = None
|
|
_name_or_path = hf-internal-testing/tiny-random-gptj
|
|
_commit_hash = b96595a4bcdeb272096214589efa0314259853a0
|
|
transformers_version = 4.11.0.dev0
|
|
attention_probs_dropout_prob = 0.0
|
|
gradient_checkpointing = False
|
|
hidden_act = gelu
|
|
hidden_dropout_prob = 0.0
|
|
intermediate_size = 37
|
|
model_type = gptj
|
|
n_ctx = 512
|
|
scale_attn_weights = True
|
|
type_vocab_size = 16
|