Skip to content

Commit 2818c44

Browse files
committed
Fix quant for v7 & Add q4_k/q5_k quants support
Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
1 parent 49c4754 commit 2818c44

File tree

4 files changed

+49
-8
lines changed

4 files changed

+49
-8
lines changed

extras/quantize.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@ bool QueryPerformanceCounter(uint64_t* lpPerformanceCount);
2525
static enum ggml_type type_from_string(const char * string) {
2626
if (strcmp(string, "Q4_0") == 0) return GGML_TYPE_Q4_0;
2727
if (strcmp(string, "Q4_1") == 0) return GGML_TYPE_Q4_1;
28+
if (strcmp(string, "Q4_K") == 0) return GGML_TYPE_Q4_K;
2829
if (strcmp(string, "Q5_0") == 0) return GGML_TYPE_Q5_0;
2930
if (strcmp(string, "Q5_1") == 0) return GGML_TYPE_Q5_1;
31+
if (strcmp(string, "Q5_K") == 0) return GGML_TYPE_Q5_K;
3032
if (strcmp(string, "Q8_0") == 0) return GGML_TYPE_Q8_0;
3133
return GGML_TYPE_COUNT;
3234
}

python/rwkv_cpp/rwkv_cpp_shared_library.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
QUANTIZED_FORMAT_NAMES: Tuple[str, str, str, str, str] = (
99
'Q4_0',
1010
'Q4_1',
11+
'Q4_K',
1112
'Q5_0',
1213
'Q5_1',
14+
'Q5_K',
1315
'Q8_0'
1416
)
1517

rwkv_file_format.inc

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ enum rwkv_type {
1313
TYPE_Q5_0,
1414
TYPE_Q5_1,
1515
TYPE_Q8_0,
16+
TYPE_Q8_1,
17+
TYPE_Q2_K,
18+
TYPE_Q3_K,
19+
TYPE_Q4_K,
20+
TYPE_Q5_K,
21+
TYPE_Q6_K,
22+
TYPE_Q8_K,
1623
TYPE_COUNT
1724
};
1825

@@ -29,6 +36,13 @@ static const enum ggml_type rwkv_type_to_ggml[TYPE_COUNT + 1] = {
2936
GGML_TYPE_Q5_0, /* Q5_0 */
3037
GGML_TYPE_Q5_1, /* Q5_1 */
3138
GGML_TYPE_Q8_0, /* Q8_0 */
39+
GGML_TYPE_Q8_1, /* Q8_1 */
40+
GGML_TYPE_Q2_K, /* Q2_K */
41+
GGML_TYPE_Q3_K, /* Q3_K */
42+
GGML_TYPE_Q4_K, /* Q4_K */
43+
GGML_TYPE_Q5_K, /* Q5_K */
44+
GGML_TYPE_Q6_K, /* Q6_K */
45+
GGML_TYPE_Q8_K, /* Q8_K */
3246
GGML_TYPE_COUNT /* COUNT */
3347
};
3448

@@ -42,10 +56,13 @@ static const enum rwkv_type rwkv_type_from_ggml[GGML_TYPE_COUNT + 1] = {
4256
TYPE_Q5_0, /* Q5_0 */
4357
TYPE_Q5_1, /* Q5_1 */
4458
TYPE_Q8_0, /* Q8_0 */
45-
TYPE_COUNT, /* Q8_1 */
46-
TYPE_COUNT, /* I8 */
47-
TYPE_COUNT, /* I16 */
48-
TYPE_COUNT, /* I32 */
59+
TYPE_Q8_1, /* Q8_1 */
60+
TYPE_Q2_K, /* Q2_K */
61+
TYPE_Q3_K, /* Q3_K */
62+
TYPE_Q4_K, /* Q4_K */
63+
TYPE_Q5_K, /* Q5_K */
64+
TYPE_Q6_K, /* Q6_K */
65+
TYPE_Q8_K, /* Q8_K */
4966
TYPE_COUNT, /* COUNT */
5067
};
5168

@@ -60,6 +77,13 @@ static const char * rwkv_type_to_string[TYPE_COUNT + 1] = {
6077
"Q5_0",
6178
"Q5_1",
6279
"Q8_0",
80+
"Q8_1",
81+
"Q2_K",
82+
"Q3_K",
83+
"Q4_K",
84+
"Q5_K",
85+
"Q6_K",
86+
"Q8_K",
6387
"unknown"
6488
};
6589

rwkv_quantize.inc

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
static bool rwkv_tensor_needs_quant(std::string name) {
2+
return name != "emb.weight" &&
3+
name != "head.weight" &&
4+
name.find("att.v1") == std::string::npos &&
5+
name.find("att.v2") == std::string::npos &&
6+
name.find("att.g1") == std::string::npos &&
7+
name.find("att.g2") == std::string::npos &&
8+
name.find("att.a1") == std::string::npos &&
9+
name.find("att.a2") == std::string::npos &&
10+
name.find("att.w1") == std::string::npos &&
11+
name.find("att.w2") == std::string::npos &&
12+
name.find("att.r_k") == std::string::npos;
13+
}
14+
115
// API function.
216
bool rwkv_quantize_model_file(const char * in_path, const char * out_path, const char * type_name) {
317
global_last_error = RWKV_ERROR_NONE;
@@ -122,10 +136,9 @@ bool rwkv_quantize_model_file(const char * in_path, const char * out_path, const
122136
// In RWKV v5, time_decay and time_first/time_faaaa are 3D tensors, so they are not quantized.
123137
if ((header.data_type == TYPE_FP32 || header.data_type == TYPE_FP16) &&
124138
header.dim_count == 2 &&
125-
name != "emb.weight" &&
126-
name != "head.weight"
139+
rwkv_tensor_needs_quant(name)
127140
) {
128-
RWKV_MSG("quantizing... ");
141+
RWKV_MSG("-> %6s ", rwkv_type_to_string[rwkv_type_from_ggml[out_type]]);
129142

130143
size_t nelements = (size_t) header.size0 * (size_t) header.size1 * (size_t) header.size2;
131144

@@ -137,7 +150,7 @@ bool rwkv_quantize_model_file(const char * in_path, const char * out_path, const
137150
header.data_type = rwkv_type_from_ggml[out_type];
138151
data = out_buf;
139152

140-
RWKV_MSG("size = %8.2f MB -> %8.2f MB | hist: ", orig_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
153+
RWKV_MSG("size = %8.2f MB -> %8.2f MB", orig_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
141154

142155
RWKV_MSG("\n");
143156
} else {

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy