mikasenghaas commited on
Commit
61a94fb
·
verified ·
1 Parent(s): 7062733

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +74 -0
  2. generation_config.json +9 -0
  3. model-00001-of-00066.safetensors +3 -0
  4. model-00002-of-00066.safetensors +3 -0
  5. model-00003-of-00066.safetensors +3 -0
  6. model-00004-of-00066.safetensors +3 -0
  7. model-00005-of-00066.safetensors +3 -0
  8. model-00006-of-00066.safetensors +3 -0
  9. model-00007-of-00066.safetensors +3 -0
  10. model-00008-of-00066.safetensors +3 -0
  11. model-00009-of-00066.safetensors +3 -0
  12. model-00010-of-00066.safetensors +3 -0
  13. model-00011-of-00066.safetensors +3 -0
  14. model-00012-of-00066.safetensors +3 -0
  15. model-00013-of-00066.safetensors +3 -0
  16. model-00014-of-00066.safetensors +3 -0
  17. model-00015-of-00066.safetensors +3 -0
  18. model-00016-of-00066.safetensors +3 -0
  19. model-00017-of-00066.safetensors +3 -0
  20. model-00018-of-00066.safetensors +3 -0
  21. model-00019-of-00066.safetensors +3 -0
  22. model-00020-of-00066.safetensors +3 -0
  23. model-00021-of-00066.safetensors +3 -0
  24. model-00022-of-00066.safetensors +3 -0
  25. model-00023-of-00066.safetensors +3 -0
  26. model-00024-of-00066.safetensors +3 -0
  27. model-00025-of-00066.safetensors +3 -0
  28. model-00026-of-00066.safetensors +3 -0
  29. model-00027-of-00066.safetensors +3 -0
  30. model-00028-of-00066.safetensors +3 -0
  31. model-00029-of-00066.safetensors +3 -0
  32. model-00030-of-00066.safetensors +3 -0
  33. model-00031-of-00066.safetensors +3 -0
  34. model-00032-of-00066.safetensors +3 -0
  35. model-00033-of-00066.safetensors +3 -0
  36. model-00034-of-00066.safetensors +3 -0
  37. model-00035-of-00066.safetensors +3 -0
  38. model-00036-of-00066.safetensors +3 -0
  39. model-00037-of-00066.safetensors +3 -0
  40. model-00038-of-00066.safetensors +3 -0
  41. model-00039-of-00066.safetensors +3 -0
  42. model-00040-of-00066.safetensors +3 -0
  43. model-00041-of-00066.safetensors +3 -0
  44. model-00042-of-00066.safetensors +3 -0
  45. model-00043-of-00066.safetensors +3 -0
  46. model-00044-of-00066.safetensors +3 -0
  47. model-00045-of-00066.safetensors +3 -0
  48. model-00046-of-00066.safetensors +3 -0
  49. model-00047-of-00066.safetensors +3 -0
  50. model-00048-of-00066.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "deepseek-ai/DeepSeek-R1-0528--configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "deepseek-ai/DeepSeek-R1-0528--modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "deepseek-ai/DeepSeek-R1-0528--modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "bos_token_id": 0,
13
+ "eos_token_id": 1,
14
+ "ep_size": 1,
15
+ "first_k_dense_replace": 3,
16
+ "head_dim": 64,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 7168,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 18432,
21
+ "kv_lora_rank": 512,
22
+ "max_position_embeddings": 163840,
23
+ "model_type": "deepseek_v3",
24
+ "moe_intermediate_size": 2048,
25
+ "moe_layer_freq": 1,
26
+ "n_group": 8,
27
+ "n_routed_experts": 256,
28
+ "n_shared_experts": 1,
29
+ "norm_topk_prob": true,
30
+ "num_attention_heads": 128,
31
+ "num_experts_per_tok": 8,
32
+ "num_hidden_layers": 31,
33
+ "num_key_value_heads": 128,
34
+ "num_nextn_predict_layers": 1,
35
+ "num_shards": 2,
36
+ "pretraining_tp": 1,
37
+ "q_lora_rank": 1536,
38
+ "qk_head_dim": 192,
39
+ "qk_nope_head_dim": 128,
40
+ "qk_rope_head_dim": 64,
41
+ "quantization_config": {
42
+ "activation_scheme": "dynamic",
43
+ "modules_to_not_convert": null,
44
+ "quant_method": "fp8",
45
+ "weight_block_size": [
46
+ 128,
47
+ 128
48
+ ]
49
+ },
50
+ "rms_norm_eps": 1e-06,
51
+ "rope_interleave": true,
52
+ "rope_scaling": {
53
+ "beta_fast": 32,
54
+ "beta_slow": 1,
55
+ "factor": 40,
56
+ "mscale": 1.0,
57
+ "mscale_all_dim": 1.0,
58
+ "original_max_position_embeddings": 4096,
59
+ "rope_type": "yarn",
60
+ "type": "yarn"
61
+ },
62
+ "rope_theta": 10000,
63
+ "routed_scaling_factor": 2.5,
64
+ "scoring_func": "sigmoid",
65
+ "shard_idx": 0,
66
+ "tie_word_embeddings": false,
67
+ "topk_group": 4,
68
+ "topk_method": "noaux_tc",
69
+ "torch_dtype": "bfloat16",
70
+ "transformers_version": "4.51.3",
71
+ "use_cache": true,
72
+ "v_head_dim": 128,
73
+ "vocab_size": 129280
74
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "do_sample": true,
5
+ "eos_token_id": 1,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.51.3"
9
+ }
model-00001-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd5c8fb76dbc975007c1bc2fe6d1cb991f4f75f13ae8d3410ed8666ff3deb9a2
3
+ size 4995145728
model-00002-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c46b4166247575ae33e6db7eef0be57767e3133f3a869f6e8dc63eaa478562a
3
+ size 4991915936
model-00003-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65172dc3cff45b4ecd6db919beb0697ed76416c39274370c1a19cedee91e784d
3
+ size 4991916376
model-00004-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d688728ee3a5536246f48098f6dc49908ab8c1e53033d8113586cbfaf42d2d8
3
+ size 4991882432
model-00005-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc6d8d05e0efcae9cbd851d17f68a234b5286611f6248bd52f4bc02b188276b
3
+ size 4991916376
model-00006-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efd3cb53e576be716e907d797497efdc119ddea9b1c3de22b99ac2f6594cd929
3
+ size 4991882320
model-00007-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b9bda1294d5704880aad70a40a22124efbb039bf2500eff8bd50da0b0659cf0
3
+ size 4991916200
model-00008-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18d24e68a016ef3735cfb165fac9a47641b5c64adc21dc4df2c23514ddf3a570
3
+ size 4991882320
model-00009-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b60bd6e070be0fc786d9348ce63aecb871065fca3432d8db2f9d18fd0a56623d
3
+ size 4991915992
model-00010-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5df8329422a699731dcfccb924a33c290b65f16f4eb8d55ae556494139967dd4
3
+ size 4991882368
model-00011-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9a0db0dbfdc6e82234a66dbb08b008c04d68579ebeaae746cfa4701dce3fef
3
+ size 4991915736
model-00012-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510e71bae50d15af4d6163740711a4b5b6548f999ecc6b8f6e289ba9ca987569
3
+ size 4991916368
model-00013-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdce1e9f0a701354d1d401048314bc3235dcf2c30d89fe7532ce1f0ccc10e5e0
3
+ size 4991882320
model-00014-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:669ef41414513a4b749cbabc734650a17d02b2af138f02a3905cf558d9fa0ccc
3
+ size 4991916256
model-00015-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd4a40600fc4e8d2ec6a979b5982d8cdf05682b7fda931355dbede859f65f305
3
+ size 4991882320
model-00016-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3147db2013c51eb6b36f866f1208e7f6c18d3ef76674e0f1c58cbcdbbed19b9
3
+ size 4991916048
model-00017-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f702b17c794027ca32ae8f208f73c5800aeb7ff79b5b8cddb80ea9b631d495ec
3
+ size 4991882336
model-00018-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:373aff916f99e57e6f6a99fa6d39f62d9ac6788446565ca6a475fe548737db23
3
+ size 4991916520
model-00019-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d901331505460c4ab9bf6ed3ca38e440af8667d036e3565f2fabf445d355c9b6
3
+ size 4991917056
model-00020-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65154ed113ef7ccef01df0183692913a8c5477589e51f7d1d722015d4083966a
3
+ size 4991882984
model-00021-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71598042c8fc8bdf433921fbd71b00092a4f9e54914bee5ffa99fb6ce4cdefac
3
+ size 4991916992
model-00022-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b03fd10b3ab5001c2d2f69e03caa138fa806529dcae9d7ef82b4ac14647d88f1
3
+ size 4991882984
model-00023-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f4e222cd17340721f08a3617897fb675acf86ae0f5057fb064363a83164b3f6
3
+ size 4991916784
model-00024-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea76297da8034b0df76a70e511cfbc60bd15a5def4fc19c17d60ad397b2c1aae
3
+ size 4991882984
model-00025-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:253102bacbb8e6fd73e150f44268a536876e187ded8307eb8883c05d92f77285
3
+ size 4991916576
model-00026-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c1d8531b4e574caea23d16067923ca2d141afaadd943ad099465ceeffa6a1a4
3
+ size 4991917056
model-00027-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11cd519c826839e2ed1c10e4ffc49ec47f53bfea20323ed1fd7f666724035ab7
3
+ size 4991883024
model-00028-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d87b80ceefafbb6730eaaf0d6a7b1cb2f206672a7e8ab47370a3c12c8a4d5922
3
+ size 4991917048
model-00029-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a5b4d416de9b57f89b87a0be59f803a5f602faf04ca0862a0dbcdb22574b3dd
3
+ size 4991882984
model-00030-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4f6f6e708c6a3ce26d59f926306d2383618f8aaf1ad98b61b6071c9fbef61a7
3
+ size 4991916840
model-00031-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f81081a0ec8aaa30a8c919126b98787043a656cc42e695574e1a2f09947a62f3
3
+ size 4991882984
model-00032-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9453c0992a317ecb8126e46208ccac910395ff6864479e1ac9687bfa32b687ba
3
+ size 4991916632
model-00033-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:312d5aaf84ffd8376beaca2298a8ff57d53774c7c5ccaf4a1dec0f8e5296507e
3
+ size 4995587816
model-00034-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2bdb5fcf0c7dd0b833cf8488b4c4a743d2a4637c811c4e281b3cfe7ffecff1b
3
+ size 4988211624
model-00035-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724dfb69d2825187bffdfd58072f45916be3adc6ef3f41472b435e326f458fcd
3
+ size 4991917056
model-00036-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9919497894717857974790a71d1a744314dbc7b9c9bead19252b6050b9714418
3
+ size 4991882984
model-00037-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc05d3eff7cf06eabcdfd6bf141478ddff0576353446e81ef72ebfc7000f7c26
3
+ size 4991916896
model-00038-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9643884faf640e0d0d5e687aa22cd34cd270060c99b74d353f1a03baaec87c2b
3
+ size 4991882984
model-00039-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ebf24b6c6cfdee945c460b67db85c40771cc6b65f41d269d3f87629780848d8
3
+ size 4991916688
model-00040-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7cc0bfe7aa1ee7973eb8c2b3b998dfe6717ad07377b336f20e6950bd8fedc19
3
+ size 4991883016
model-00041-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0927b5df4c57506ccb00c4b49a523cc27d606f2910b6a6b056997e74ef868a71
3
+ size 4991916448
model-00042-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:690b10d5d3f09461ecd8bab1b21c0bf2978da7644db53d40827fcda1dd6e593a
3
+ size 4991917056
model-00043-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007e5a5a2e5d287200b95404c00a4433f7dbce655dd37ebdc436d23f5b40d6cd
3
+ size 4991882984
model-00044-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb596cf305edef3d13545b4fc69cdc05f7ce0a9f41bff1cfa1e857d2a595609
3
+ size 4991916952
model-00045-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0b4c126d4b8604fb81b8952ff2e9bd35fb338ceeae88c8d7e3d217f3340d8cb
3
+ size 4991882984
model-00046-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c91c5111f7bdf6290f90e4a7b3abecf290cb793306578dec709e9560be13cd
3
+ size 4991916744
model-00047-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52344f1a344dfa63518963816bf5779f021a0886bd1db912f770de3c3a2e185c
3
+ size 4991882984
model-00048-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:022611ccf44b11c1d79f57094fba7349a93cd6e86fb6a4391236e203f40aa06e
3
+ size 4991916536