{ "metadata": { "ParamSize": 325, "ParamBytes": 2388848640.0, "BitsPerParam": 5.001410952042906 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 49250304, "records": [ { "name": "lm_head.q_weight", "shape": [ 32064, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 49250304, "byteOffset": 0 } ], "md5sum": "3aeeb3d7e9d8e033e2294aaa3617c930" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.21.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4c6eb27388de688ccd2fa3a8ee670b51" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 23470080, "records": [ { "name": "lm_head.q_scale", "shape": [ 32064, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6156288, "byteOffset": 0 }, { "name": "transformer.h.21.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 6156288 }, { "name": "transformer.h.21.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 6162432 }, { "name": "transformer.h.21.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 18745344 }, { "name": "transformer.h.21.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 20318208 }, { "name": "transformer.h.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 23463936 } ], "md5sum": "b79b4d227556e018376ea5d6fd622968" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.22.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e2b23df08fdde2a47ae85ad21aa558a0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.21.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.21.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.22.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.22.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.22.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.22.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "725020e4e78739a8f6343cb415878cd0" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.22.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.22.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.22.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.22.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.23.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "e4ae28ecd247c29bbc4928f992d1bced" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.23.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "275662165301882e56bced0cfecdbab7" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.23.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.23.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.23.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.23.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.23.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "f45f08bcf9df989fddf13aa89358f016" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.24.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ce8fcd47094a2b30452c24ce679cbe7c" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.23.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.23.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.24.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.24.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.24.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.24.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "d09017dad54cf3f00e5e172b62477a0a" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.24.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.24.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.24.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.24.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.25.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "4c9676bece9b5e7b221a69336026a268" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.25.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0e72d98ffd8355dd7be0b1a4fcffa048" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.25.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.25.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.25.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.25.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.25.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "d09eeff14583901b180dc6eccb4ace12" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.26.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "99b504cdeea00e6028b34a65c94a9535" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.25.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.25.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.26.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.26.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.26.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.26.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "c67c1651ca7d9ca3cdcff92dbf9d50d5" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.26.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.26.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.26.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.26.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.27.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "894a738901cf878c2c8494742c86099d" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.27.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b81cca171ee50bf0c5765e60cd249633" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.27.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.27.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.27.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.27.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.27.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "cfa61f772775738e0a0353ec164421c6" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.28.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cc4e16d406507daff55b0fcf11c2be6d" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.27.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.27.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.28.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.28.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.28.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.28.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.28.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "99e55559e6cf21d7141bf69d453a6298" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.28.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.28.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.28.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.28.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.29.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "cf72032d2f29bab3fe90b58a0c58cb36" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.29.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "057b59306b4ad62ffd608fa7431df6c4" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.29.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.29.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.29.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.29.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.29.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.29.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "be5495faf43ff8ef7b41e2e1591b2f63" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.30.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "90dc59780c48c89ca310944617918b30" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.29.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.29.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.30.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.30.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.30.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.30.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.30.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "357d7ff9e326fb7afb1c19ebabb9437e" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.30.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.30.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.30.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.30.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.31.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "d650c22c656bd85695c90079e696df7d" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.31.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "20370d2b717a8e485de07646ba0e31d7" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.31.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.31.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.31.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.31.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.31.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.31.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "ca3d3d45623ec5e503780e7e08b06241" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 49250304, "records": [ { "name": "transformer.embd.q_weight", "shape": [ 32064, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 49250304, "byteOffset": 0 } ], "md5sum": "6151d64f2428a5c1d62203744ae7522f" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 22093824, "records": [ { "name": "transformer.h.31.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.31.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.norm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.embd.q_scale", "shape": [ 32064, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6156288, "byteOffset": 15931392 }, { "name": "transformer.h.0.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 22087680 } ], "md5sum": "3f943d23a92f58ba96157b96389476b5" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.0.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4343c908c761bf45cd71ea2342de2e9d" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.0.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.0.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.0.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.0.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.0.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "c2bd6ef56d875c4f0adf154cfe47c933" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.1.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cd7f902c8eeaa61096ce9f4b28851caf" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.0.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.0.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.1.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.1.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.1.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.1.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "281620d48c1b4b25171eac94add6e6a0" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.1.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.1.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.1.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.1.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.10.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "71c800a9c8f2688b5a09fab340b952ab" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.10.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "eb56232c87f198c03c89d18e5ef36b67" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.10.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.10.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.10.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.10.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.10.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "f1697a0679ed6fa119d0f06c2d6afd9a" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.11.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1c21ba64c0480980df4feb90e466ba7c" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.10.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.10.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.11.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.11.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.11.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.11.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "357ddc41322e4e58c1500316ba5eff87" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.11.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.11.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.11.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.11.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.12.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "ca57db2f19189c4ee50f1b259502345c" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.12.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a0f302db7e317d5affa27c8f6c1ba903" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.12.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.12.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.12.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.12.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.12.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "7ccbda76aa15483e6a3f15eba7fc6a70" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.13.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "efa96e2bc14423470f06edd30dd8a1ee" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.12.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.12.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.13.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.13.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.13.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.13.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "4a5dc9c0b947885e1cf127639fbfe632" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.13.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.13.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.13.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.13.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.14.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "67903c92547a920e3a8160a591c51044" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.14.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "482cbcfc8565a260833ac2ef6f854dcf" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.14.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.14.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.14.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.14.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.14.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "6e5cdaaa46681ebee024b02fcc59d597" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.15.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "20f1425c7d6d4ebe6d38086e8014f8b5" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.14.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.14.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.15.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.15.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.15.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.15.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "ad9cff41d1e77691fe35d1e2d1cc288f" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.15.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.15.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.15.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.15.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.16.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "53c4c98a271b4528f433486363134787" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.16.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cb442181137be51100cc66942af5d750" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.16.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.16.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.16.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.16.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.16.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "f1f65ff2578babdee1f79b9746f53777" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.17.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "43deb776cdcf11197e52daa5eaf65bb4" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.16.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.16.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.17.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.17.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.17.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.17.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "30e4627221d5cd27e21f27c419916cdc" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.17.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.17.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.17.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.17.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.18.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "958f964c7c041d88ee24eaea103f9f5b" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.18.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0916edaf606a71efca575bdab3eb7fed" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.18.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.18.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.18.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.18.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.18.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "f384838842289a74a8033bbb4b0c6752" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.19.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "87402cfadbf5f167aff039e25e5bd8df" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.18.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.18.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.19.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.19.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.19.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.19.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "26e8a37ec5bffe7133a9bc27c1a78cb3" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.19.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.19.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.19.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.19.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.2.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "9298980b2969ff9185155c63f27c4833" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.2.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fe08c7935fa15b57efc3c4091e779770" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.2.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.2.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.2.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.2.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.2.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "b5de4b357981555a6ffb6b1508b80c3c" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.20.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "884ecdd4f22892d8bdcc791aa3cd6452" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.2.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.2.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.20.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.20.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.20.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.20.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "8b478873b579011c9503b8e1c18da89b" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 26548224, "records": [ { "name": "transformer.h.20.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.20.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.20.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.20.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.21.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21233664 }, { "name": "transformer.h.21.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 25952256 }, { "name": "transformer.h.3.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 26542080 } ], "md5sum": "d868060e32452d6ad8de1939afa175a0" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.3.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c863795534140ccc964b6a6bb5477722" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.3.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.3.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.3.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.3.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.3.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "5ee6f1c5c478e7defad5adc986d203bd" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.4.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fd4abc21b279b19fa3e7021fb1bdd6d8" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.3.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.3.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.4.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.4.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.4.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.4.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "92cf09176b4af272ff88b4d770bebb49" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.4.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.4.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.4.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.4.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.5.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "f2576337c4f7e49b7f289cb52ca9691c" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.5.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4aad834b4bc23460843517ff71d6a03c" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.5.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.5.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.5.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.5.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.5.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "76b77a44a3819f7b935f5503820080c9" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.6.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "28d4502b269e7fdaef6d85e023fb8181" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.5.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.5.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.6.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.6.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.6.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.6.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "122540bf729688fc534acaaf84d3a3f8" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.6.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.6.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.6.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.6.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.7.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "0a15e33fd4087a189cc2284bfdfe05b2" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.7.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "552a946908bb2a8e68df3ad4124df411" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.7.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.7.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.7.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.7.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.7.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "39c5ce0bbd57ffc7293c1f21a4340678" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.8.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d394cec2b8266f047c085ca6b27a23de" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.7.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.7.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.8.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.8.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.8.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.8.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "c702901239fe22e5e139aa2b315eb4ab" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.8.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.8.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.8.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.8.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.9.ln.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "cea066ba9a628075f151bf691f5c0792" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.9.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "548f267eb43d829b7094b0544bc8e46e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.9.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.9.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.9.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.9.mixer.out_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.9.mixer.out_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "91104658482ada6ec8851f5670890038" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 15925248, "records": [ { "name": "transformer.h.9.mixer.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.9.mixer.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1769472, "byteOffset": 14155776 } ], "md5sum": "acfd1a32ed7e0091ec650cbfc933b085" } ] }