minpeter commited on
Commit
8b0e3cd
·
verified ·
1 Parent(s): bc2dcf6

Training in progress, step 1000

Browse files
Files changed (3) hide show
  1. config.json +3 -2
  2. model.safetensors +1 -1
  3. training_args.bin +2 -2
config.json CHANGED
@@ -12,12 +12,13 @@
12
  "initializer_range": 0.041666666666666664,
13
  "intermediate_size": 1536,
14
  "is_llama_config": true,
15
- "max_position_embeddings": 512,
16
  "mlp_bias": false,
17
  "model_type": "llama",
18
  "num_attention_heads": 9,
19
  "num_hidden_layers": 30,
20
  "num_key_value_heads": 3,
 
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
23
  "rope_interleaved": false,
@@ -25,7 +26,7 @@
25
  "rope_theta": 100000,
26
  "tie_word_embeddings": true,
27
  "torch_dtype": "float32",
28
- "transformers_version": "4.51.3",
29
  "use_cache": true,
30
  "vocab_size": 128256
31
  }
 
12
  "initializer_range": 0.041666666666666664,
13
  "intermediate_size": 1536,
14
  "is_llama_config": true,
15
+ "max_position_embeddings": 2048,
16
  "mlp_bias": false,
17
  "model_type": "llama",
18
  "num_attention_heads": 9,
19
  "num_hidden_layers": 30,
20
  "num_key_value_heads": 3,
21
+ "pad_token_id": 128001,
22
  "pretraining_tp": 1,
23
  "rms_norm_eps": 1e-05,
24
  "rope_interleaved": false,
 
26
  "rope_theta": 100000,
27
  "tie_word_embeddings": true,
28
  "torch_dtype": "float32",
29
+ "transformers_version": "4.52.4",
30
  "use_cache": true,
31
  "vocab_size": 128256
32
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2401cd24257d203d8b2dec1890bae07845e5551a744a8f01bdef5ee00b1326bb
3
  size 720346024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f17d5619a7378f35482ad7d5053f050d5b14eae6f958e2319116bf9ecfc347c9
3
  size 720346024
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25fb4691a1a754fc20a14c94605e92c90427fa489b3144013ed9532a2d2f0141
3
- size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8877c8f6054fb7e4e14bb35368b7bb0ff88a9ebca9cd9310fb591d80d6a0cd9
3
+ size 5713