From 76e82a815bf2b85c8db596566fa58807799f1d42 Mon Sep 17 00:00:00 2001 From: Timmy Knight Date: Thu, 23 Mar 2023 01:19:36 -1000 Subject: [PATCH 1/2] Fix GPTQ converter --- convert-gptq-to-ggml.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/convert-gptq-to-ggml.py b/convert-gptq-to-ggml.py index 7fccb4d569d8f..7ff584e9f7ce2 100644 --- a/convert-gptq-to-ggml.py +++ b/convert-gptq-to-ggml.py @@ -36,7 +36,8 @@ fout = open(fname_out, "wb") -fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex +fout.write(struct.pack("i", 0x67676d66)) # magic: ggml in hex +fout.write(struct.pack("i", 1)) # file version fout.write(struct.pack("i", n_vocab)) fout.write(struct.pack("i", n_embd)) fout.write(struct.pack("i", n_mult)) @@ -49,27 +50,21 @@ # This loop unchanged from convert-pth-to-ggml.py: for i in range(tokenizer.vocab_size()): if tokenizer.is_unknown(i): - # "" token (translated as ??) text = " \u2047 ".encode("utf-8") - fout.write(struct.pack("i", len(text))) - fout.write(text) elif tokenizer.is_control(i): - # ""/"" tokens - fout.write(struct.pack("i", 0)) + text = b"" elif tokenizer.is_byte(i): - # "" tokens (which may be invalid UTF-8) piece = tokenizer.id_to_piece(i) if len(piece) != 6: - print("Invalid token: " + piece) + print(f"Invalid token: {piece}") sys.exit(1) byte_value = int(piece[3:-1], 16) - fout.write(struct.pack("i", 1)) - fout.write(struct.pack("B", byte_value)) + text = struct.pack("B", byte_value) else: - # normal token. Uses U+2581 (LOWER ONE EIGHTH BLOCK) to represent spaces. text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") - fout.write(struct.pack("i", len(text))) - fout.write(text) + fout.write(struct.pack("i", len(text))) + fout.write(text) + fout.write(struct.pack("f", tokenizer.get_score(i))) def write_header(shape, dst_name, ftype_cur): sname = dst_name.encode('utf-8') From 347592b36520ff3e57b85058b66cfdd29244aa01 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 23 Mar 2023 22:13:54 +0200 Subject: [PATCH 2/2] Fix comment --- convert-gptq-to-ggml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-gptq-to-ggml.py b/convert-gptq-to-ggml.py index 7ff584e9f7ce2..6c77808fcd186 100644 --- a/convert-gptq-to-ggml.py +++ b/convert-gptq-to-ggml.py @@ -36,7 +36,7 @@ fout = open(fname_out, "wb") -fout.write(struct.pack("i", 0x67676d66)) # magic: ggml in hex +fout.write(struct.pack("i", 0x67676d66)) # magic: ggmf in hex fout.write(struct.pack("i", 1)) # file version fout.write(struct.pack("i", n_vocab)) fout.write(struct.pack("i", n_embd))