From ebcccb6cdf45872cdeedc34cbe72256caeb084bc Mon Sep 17 00:00:00 2001 From: jwxiong Date: Fri, 16 Oct 2020 18:46:36 +0800 Subject: [PATCH 01/18] add utf8mb4_general_zh_ci interface --- ddl/serial_test.go | 1 + executor/seqtest/seq_executor_test.go | 1 + expression/distsql_builtin_test.go | 1 + expression/expr_to_pb.go | 19 ++++++------- expression/expr_to_pb_test.go | 2 ++ util/collate/collate.go | 41 ++++++++++++++++++++------- util/collate/collate_test.go | 4 +++ util/collate/general_ci.go | 9 ------ util/collate/general_zh_ci.go | 22 ++++++++++++++ 9 files changed, 71 insertions(+), 29 deletions(-) create mode 100644 util/collate/general_zh_ci.go diff --git a/ddl/serial_test.go b/ddl/serial_test.go index c54db5cf44dfa..1f146ef64a9cf 100644 --- a/ddl/serial_test.go +++ b/ddl/serial_test.go @@ -1272,6 +1272,7 @@ func (s *testSerialSuite) TestModifyingColumn4NewCollations(c *C) { tk.MustExec("alter table t convert to charset utf8 collate utf8_general_ci") tk.MustExec("alter table t modify b varchar(10) collate utf8_unicode_ci") tk.MustExec("alter table t modify b varchar(10) collate utf8_bin") + tk.MustExec("alter table t modify b varchar(10) collate utf8mb4_general_zh_ci") tk.MustExec("alter table t add index b_idx(b)") tk.MustExec("alter table t add index c_idx(c)") diff --git a/executor/seqtest/seq_executor_test.go b/executor/seqtest/seq_executor_test.go index d792ac8829706..8a7fcc3b6ddc8 100644 --- a/executor/seqtest/seq_executor_test.go +++ b/executor/seqtest/seq_executor_test.go @@ -1236,6 +1236,7 @@ func (s *seqTestSuite) TestShowForNewCollations(c *C) { "utf8mb4_bin utf8mb4 46 Yes Yes 1", "utf8mb4_general_ci utf8mb4 45 Yes 1", "utf8mb4_unicode_ci utf8mb4 224 Yes 1", + "utf8mb4_general_zh_ci utf8mb4 2048 Yes 1", ) tk.MustQuery("show collation").Check(expectRows) tk.MustQuery("select * from information_schema.COLLATIONS").Check(expectRows) diff --git a/expression/distsql_builtin_test.go b/expression/distsql_builtin_test.go index a438ea80a65d7..2140ab9602e1d 100644 --- a/expression/distsql_builtin_test.go +++ b/expression/distsql_builtin_test.go @@ -56,6 +56,7 @@ func (s *testEvalSerialSuite) TestPBToExprWithNewCollation(c *C) { {"some_error_collation", "utf8mb4_bin", 46, 46}, {"utf8_unicode_ci", "utf8_unicode_ci", 192, 192}, {"utf8mb4_unicode_ci", "utf8mb4_unicode_ci", 224, 224}, + {"utf8mb4_general_zh_ci", "utf8mb4_general_zh_ci", 2048, 2048}, } for _, cs := range cases { diff --git a/expression/expr_to_pb.go b/expression/expr_to_pb.go index a68673f1b4a19..74e9779d0091b 100644 --- a/expression/expr_to_pb.go +++ b/expression/expr_to_pb.go @@ -174,17 +174,16 @@ func FieldTypeFromPB(ft *tipb.FieldType) *types.FieldType { } func collationToProto(c string) int32 { - if v, ok := mysql.CollationNames[c]; ok { - return collate.RewriteNewCollationIDIfNeeded(int32(v)) + v := int32(collate.CollationName2ID(c)) + if v == mysql.DefaultCollationID && c != mysql.DefaultCollationName { + logutil.BgLogger().Warn( + "Unable to get collation ID by name, use ID of the default collation instead", + zap.String("name", c), + zap.Int32("default collation ID", v), + zap.String("default collation", mysql.DefaultCollationName), + ) } - v := collate.RewriteNewCollationIDIfNeeded(int32(mysql.DefaultCollationID)) - logutil.BgLogger().Warn( - "Unable to get collation ID by name, use ID of the default collation instead", - zap.String("name", c), - zap.Int32("default collation ID", v), - zap.String("default collation", mysql.DefaultCollationName), - ) - return v + return collate.RewriteNewCollationIDIfNeeded(v) } func protoToCollation(c int32) string { diff --git a/expression/expr_to_pb_test.go b/expression/expr_to_pb_test.go index 4daeb88e0939a..76070b6494df5 100644 --- a/expression/expr_to_pb_test.go +++ b/expression/expr_to_pb_test.go @@ -789,6 +789,7 @@ func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) { colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeString, 4), "utf8mb4_0900_ai_ci")) colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 5), "utf8_bin")) colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 6), "utf8_unicode_ci")) + colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 7), "utf8mb4_general_zh_ci")) pushed, _ := PushDownExprs(sc, colExprs, client, kv.UnSpecified) c.Assert(len(pushed), Equals, len(colExprs)) pbExprs, err := ExpressionsToPBList(sc, colExprs, client) @@ -800,6 +801,7 @@ func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) { "{\"tp\":201,\"val\":\"gAAAAAAAAAQ=\",\"sig\":0,\"field_type\":{\"tp\":254,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-255,\"charset\":\"\"}}", "{\"tp\":201,\"val\":\"gAAAAAAAAAU=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-83,\"charset\":\"\"}}", "{\"tp\":201,\"val\":\"gAAAAAAAAAY=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-192,\"charset\":\"\"}}", + "{\"tp\":201,\"val\":\"gAAAAAAAAAc=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-2048,\"charset\":\"\"}}", } for i, pbExpr := range pbExprs { c.Assert(pbExprs, NotNil) diff --git a/util/collate/collate.go b/util/collate/collate.go index dcb60e0ead7f5..2d91c3c723f9e 100644 --- a/util/collate/collate.go +++ b/util/collate/collate.go @@ -175,6 +175,15 @@ func CollationID2Name(id int32) string { return name } +// CollationName2ID return the collation id by the given name. +// If the name is not found in the map, the default collation id is returned +func CollationName2ID(name string) int { + if coll, err := charset.GetCollationByName(name); err == nil { + return coll.ID + } + return mysql.DefaultCollationID +} + // GetCollationByName wraps charset.GetCollationByName, it checks the collation. func GetCollationByName(name string) (coll *charset.Collation, err error) { if coll, err = charset.GetCollationByName(name); err != nil { @@ -220,10 +229,20 @@ func truncateTailingSpace(str string) string { return str } +func sign(i int) int { + if i < 0 { + return -1 + } else if i > 0 { + return 1 + } + return 0 +} + // IsCICollation returns if the collation is case-sensitive func IsCICollation(collate string) bool { return collate == "utf8_general_ci" || collate == "utf8mb4_general_ci" || - collate == "utf8_unicode_ci" || collate == "utf8mb4_unicode_ci" + collate == "utf8_unicode_ci" || collate == "utf8mb4_unicode_ci" || + collate == "utf8mb4_general_zh_ci" } func init() { @@ -231,21 +250,23 @@ func init() { newCollatorIDMap = make(map[int]Collator) newCollatorMap["binary"] = &binCollator{} - newCollatorIDMap[int(mysql.CollationNames["binary"])] = &binCollator{} + newCollatorIDMap[CollationName2ID("binary")] = &binCollator{} newCollatorMap["ascii_bin"] = &binPaddingCollator{} - newCollatorIDMap[int(mysql.CollationNames["ascii_bin"])] = &binPaddingCollator{} + newCollatorIDMap[CollationName2ID("ascii_bin")] = &binPaddingCollator{} newCollatorMap["latin1_bin"] = &binPaddingCollator{} - newCollatorIDMap[int(mysql.CollationNames["latin1_bin"])] = &binPaddingCollator{} + newCollatorIDMap[CollationName2ID("latin1_bin")] = &binPaddingCollator{} newCollatorMap["utf8mb4_bin"] = &binPaddingCollator{} - newCollatorIDMap[int(mysql.CollationNames["utf8mb4_bin"])] = &binPaddingCollator{} + newCollatorIDMap[CollationName2ID("utf8mb4_bin")] = &binPaddingCollator{} newCollatorMap["utf8_bin"] = &binPaddingCollator{} - newCollatorIDMap[int(mysql.CollationNames["utf8_bin"])] = &binPaddingCollator{} + newCollatorIDMap[CollationName2ID("utf8_bin")] = &binPaddingCollator{} newCollatorMap["utf8mb4_general_ci"] = &generalCICollator{} - newCollatorIDMap[int(mysql.CollationNames["utf8mb4_general_ci"])] = &generalCICollator{} + newCollatorIDMap[CollationName2ID("utf8mb4_general_ci")] = &generalCICollator{} newCollatorMap["utf8_general_ci"] = &generalCICollator{} - newCollatorIDMap[int(mysql.CollationNames["utf8_general_ci"])] = &generalCICollator{} + newCollatorIDMap[CollationName2ID("utf8_general_ci")] = &generalCICollator{} newCollatorMap["utf8mb4_unicode_ci"] = &unicodeCICollator{} - newCollatorIDMap[int(mysql.CollationNames["utf8mb4_unicode_ci"])] = &unicodeCICollator{} + newCollatorIDMap[CollationName2ID("utf8mb4_unicode_ci")] = &unicodeCICollator{} newCollatorMap["utf8_unicode_ci"] = &unicodeCICollator{} - newCollatorIDMap[int(mysql.CollationNames["utf8_unicode_ci"])] = &unicodeCICollator{} + newCollatorIDMap[CollationName2ID("utf8_unicode_ci")] = &unicodeCICollator{} + newCollatorMap["utf8mb4_general_zh_ci"] = &generalZhCICollator{} + newCollatorIDMap[CollationName2ID("utf8mb4_general_zh_ci")] = &generalZhCICollator{} } diff --git a/util/collate/collate_test.go b/util/collate/collate_test.go index f727cf000a87a..493dcc81ed0aa 100644 --- a/util/collate/collate_test.go +++ b/util/collate/collate_test.go @@ -199,6 +199,7 @@ func (s *testCollateSuite) TestGetCollator(c *C) { c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &generalCICollator{}) c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &unicodeCICollator{}) c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &unicodeCICollator{}) + c.Assert(GetCollator("utf8mb4_general_zh_ci"), FitsTypeOf, &generalZhCICollator{}) c.Assert(GetCollator("default_test"), FitsTypeOf, &binPaddingCollator{}) c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(46), FitsTypeOf, &binPaddingCollator{}) @@ -207,6 +208,7 @@ func (s *testCollateSuite) TestGetCollator(c *C) { c.Assert(GetCollatorByID(33), FitsTypeOf, &generalCICollator{}) c.Assert(GetCollatorByID(224), FitsTypeOf, &unicodeCICollator{}) c.Assert(GetCollatorByID(192), FitsTypeOf, &unicodeCICollator{}) + c.Assert(GetCollatorByID(2048), FitsTypeOf, &generalZhCICollator{}) c.Assert(GetCollatorByID(9999), FitsTypeOf, &binPaddingCollator{}) SetNewCollationEnabledForTest(false) @@ -217,6 +219,7 @@ func (s *testCollateSuite) TestGetCollator(c *C) { c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &binCollator{}) c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &binCollator{}) c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &binCollator{}) + c.Assert(GetCollator("utf8mb4_general_zh_ci"), FitsTypeOf, &binCollator{}) c.Assert(GetCollator("default_test"), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(46), FitsTypeOf, &binCollator{}) @@ -225,5 +228,6 @@ func (s *testCollateSuite) TestGetCollator(c *C) { c.Assert(GetCollatorByID(33), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(224), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(192), FitsTypeOf, &binCollator{}) + c.Assert(GetCollatorByID(2048), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(9999), FitsTypeOf, &binCollator{}) } diff --git a/util/collate/general_ci.go b/util/collate/general_ci.go index 2adbb12e5f75d..7b572f1b57c77 100644 --- a/util/collate/general_ci.go +++ b/util/collate/general_ci.go @@ -22,15 +22,6 @@ import ( type generalCICollator struct { } -func sign(i int) int { - if i < 0 { - return -1 - } else if i > 0 { - return 1 - } - return 0 -} - // compilePatternGeneralCI handles escapes and wild cards, generate pattern weights and types. // This function is modified from stringutil.CompilePattern. func compilePatternGeneralCI(pattern string, escape byte) (patWeights []uint16, patTypes []byte) { diff --git a/util/collate/general_zh_ci.go b/util/collate/general_zh_ci.go new file mode 100644 index 0000000000000..0118fbc19a51f --- /dev/null +++ b/util/collate/general_zh_ci.go @@ -0,0 +1,22 @@ +package collate + +type generalZhCICollator struct { +} + +// Collator interface, no implements now. +func (g generalZhCICollator) Compare(a, b string) int { + panic("implement me") +} + +// Collator interface, no implements now. +func (g generalZhCICollator) Key(str string) []byte { + panic("implement me") +} + +// Collator interface, no implements now. +func (g generalZhCICollator) Pattern() WildcardPattern { + panic("implement me") +} + + + From b60a3fd53f1ff08ca47b90220295d4eae087c6ee Mon Sep 17 00:00:00 2001 From: jwxiong Date: Mon, 19 Oct 2020 10:47:23 +0800 Subject: [PATCH 02/18] fmt --- util/collate/general_zh_ci.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/util/collate/general_zh_ci.go b/util/collate/general_zh_ci.go index 0118fbc19a51f..08f407b2ce37e 100644 --- a/util/collate/general_zh_ci.go +++ b/util/collate/general_zh_ci.go @@ -17,6 +17,3 @@ func (g generalZhCICollator) Key(str string) []byte { func (g generalZhCICollator) Pattern() WildcardPattern { panic("implement me") } - - - From d50b27b4f90ae840afbb3fe94dcf82e0d7540d00 Mon Sep 17 00:00:00 2001 From: jwxiong Date: Mon, 19 Oct 2020 11:22:17 +0800 Subject: [PATCH 03/18] use parser method instead access collation directly --- util/collate/collate.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/util/collate/collate.go b/util/collate/collate.go index 2d91c3c723f9e..c0e1d3c2eca5a 100644 --- a/util/collate/collate.go +++ b/util/collate/collate.go @@ -163,8 +163,8 @@ func GetCollatorByID(id int) Collator { // CollationID2Name return the collation name by the given id. // If the id is not found in the map, the default collation is returned. func CollationID2Name(id int32) string { - name, ok := mysql.Collations[uint8(id)] - if !ok { + collation, err := charset.GetCollationByID(int(id)) + if err != nil { // TODO(bb7133): fix repeating logs when the following code is uncommented. //logutil.BgLogger().Warn( // "Unable to get collation name from ID, use default collation instead.", @@ -172,7 +172,7 @@ func CollationID2Name(id int32) string { // zap.Stack("stack")) return mysql.DefaultCollationName } - return name + return collation.Name } // CollationName2ID return the collation id by the given name. From 64059f8b5e74c6698312589bd7c5b978f605d043 Mon Sep 17 00:00:00 2001 From: jwxiong Date: Mon, 19 Oct 2020 12:08:30 +0800 Subject: [PATCH 04/18] update parser --- go.mod | 2 ++ go.sum | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 8cad21b4f45b2..541f77e3a1064 100644 --- a/go.mod +++ b/go.mod @@ -83,3 +83,5 @@ require ( ) go 1.13 + +replace github.com/pingcap/parser => github.com/xiongjiwei/parser v0.0.0-20201019031748-6052b03b52f1 diff --git a/go.sum b/go.sum index 744fa5476091a..a2e7ff9007b30 100644 --- a/go.sum +++ b/go.sum @@ -451,8 +451,6 @@ github.com/pingcap/log v0.0.0-20200511115504-543df19646ad h1:SveG82rmu/GFxYanffx github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200828042413-fce0951f1463 h1:Jboj+s4jSCp5E1WDgmRUv5rIFKFHaaSWuSZ4wMwXIcc= github.com/pingcap/log v0.0.0-20200828042413-fce0951f1463/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= -github.com/pingcap/parser v0.0.0-20201014065945-fb6bde872a79 h1:Dcxi/lDJ6C3M5ocRbhR66MBDMmqFkPVt/Y79DVb5QR8= -github.com/pingcap/parser v0.0.0-20201014065945-fb6bde872a79/go.mod h1:RlLfMRJwFBSiXd2lUaWdV5pSXtrpyvZM8k5bbZWsheU= github.com/pingcap/sysutil v0.0.0-20200206130906-2bfa6dc40bcd/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI= github.com/pingcap/sysutil v0.0.0-20200715082929-4c47bcac246a h1:i2RElJ2aykSqZKeY+3SK18NHhajil8cQdG77wHe+P1Y= github.com/pingcap/sysutil v0.0.0-20200715082929-4c47bcac246a/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI= @@ -584,6 +582,8 @@ github.com/vmihailenco/msgpack/v5 v5.0.0-beta.1/go.mod h1:xlngVLeyQ/Qi05oQxhQ+oT github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= +github.com/xiongjiwei/parser v0.0.0-20201019031748-6052b03b52f1 h1:Khgu4oWd4D+MpWKQ74fjOa7UdjROnLyS3jo3YP1zR+A= +github.com/xiongjiwei/parser v0.0.0-20201019031748-6052b03b52f1/go.mod h1:RlLfMRJwFBSiXd2lUaWdV5pSXtrpyvZM8k5bbZWsheU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= github.com/yookoala/realpath v1.0.0/go.mod h1:gJJMA9wuX7AcqLy1+ffPatSCySA1FQ2S8Ya9AIoYBpE= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= From fbe58f6f4bfe4c329d7f6435b2f6838cc8a43b9a Mon Sep 17 00:00:00 2001 From: jwxiong Date: Mon, 19 Oct 2020 17:19:27 +0800 Subject: [PATCH 05/18] update parser --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 541f77e3a1064..6346f4cc367aa 100644 --- a/go.mod +++ b/go.mod @@ -45,7 +45,7 @@ require ( github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 github.com/pingcap/kvproto v0.0.0-20200927054727-1290113160f0 github.com/pingcap/log v0.0.0-20200828042413-fce0951f1463 - github.com/pingcap/parser v0.0.0-20201014065945-fb6bde872a79 + github.com/pingcap/parser v0.0.0-20201016052439-96f4986f1ed3 github.com/pingcap/sysutil v0.0.0-20200715082929-4c47bcac246a github.com/pingcap/tidb-tools v4.0.5-0.20200820092506-34ea90c93237+incompatible github.com/pingcap/tipb v0.0.0-20200618092958-4fad48b4c8c3 From 61b834b441f5f34a193cba0749fee2a4ab8b42f0 Mon Sep 17 00:00:00 2001 From: jwxiong Date: Mon, 19 Oct 2020 19:11:41 +0800 Subject: [PATCH 06/18] use collate to get collation id --- expression/expr_to_pb.go | 22 +++++++++---------- store/mockstore/mocktikv/cop_handler_dag.go | 2 +- .../unistore/cophandler/cop_handler.go | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/expression/expr_to_pb.go b/expression/expr_to_pb.go index 74e9779d0091b..6bc0c90eb5d66 100644 --- a/expression/expr_to_pb.go +++ b/expression/expr_to_pb.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tipb/go-tipb" "go.uber.org/zap" + "strings" ) // ExpressionsToPBList converts expressions to tipb.Expr list for new plan. @@ -175,7 +176,7 @@ func FieldTypeFromPB(ft *tipb.FieldType) *types.FieldType { func collationToProto(c string) int32 { v := int32(collate.CollationName2ID(c)) - if v == mysql.DefaultCollationID && c != mysql.DefaultCollationName { + if v == mysql.DefaultCollationID && strings.ToLower(c) != mysql.DefaultCollationName { logutil.BgLogger().Warn( "Unable to get collation ID by name, use ID of the default collation instead", zap.String("name", c), @@ -187,17 +188,16 @@ func collationToProto(c string) int32 { } func protoToCollation(c int32) string { - v, ok := mysql.Collations[uint8(collate.RestoreCollationIDIfNeeded(c))] - if ok { - return v + v := collate.CollationID2Name(collate.RestoreCollationIDIfNeeded(c)) + if v == mysql.DefaultCollationName && c != mysql.DefaultCollationID { + logutil.BgLogger().Warn( + "Unable to get collation name from ID, use name of the default collation instead", + zap.Int32("id", c), + zap.Int("default collation ID", mysql.DefaultCollationID), + zap.String("default collation", mysql.DefaultCollationName), + ) } - logutil.BgLogger().Warn( - "Unable to get collation name from ID, use name of the default collation instead", - zap.Int32("id", c), - zap.Int("default collation ID", mysql.DefaultCollationID), - zap.String("default collation", mysql.DefaultCollationName), - ) - return mysql.DefaultCollationName + return v } func (pc PbConverter) columnToPBExpr(column *Column) *tipb.Expr { diff --git a/store/mockstore/mocktikv/cop_handler_dag.go b/store/mockstore/mocktikv/cop_handler_dag.go index 8c8874fab2094..d020d058467ee 100644 --- a/store/mockstore/mocktikv/cop_handler_dag.go +++ b/store/mockstore/mocktikv/cop_handler_dag.go @@ -925,6 +925,6 @@ func fieldTypeFromPBColumn(col *tipb.ColumnInfo) *types.FieldType { Flen: int(col.GetColumnLen()), Decimal: int(col.GetDecimal()), Elems: col.Elems, - Collate: mysql.Collations[uint8(collate.RestoreCollationIDIfNeeded(col.GetCollation()))], + Collate: collate.CollationID2Name(collate.RestoreCollationIDIfNeeded(col.GetCollation())), } } diff --git a/store/mockstore/unistore/cophandler/cop_handler.go b/store/mockstore/unistore/cophandler/cop_handler.go index 89f169d6008ce..f9fc3045d3c00 100644 --- a/store/mockstore/unistore/cophandler/cop_handler.go +++ b/store/mockstore/unistore/cophandler/cop_handler.go @@ -419,7 +419,7 @@ func fieldTypeFromPBColumn(col *tipb.ColumnInfo) *types.FieldType { Flen: int(col.GetColumnLen()), Decimal: int(col.GetDecimal()), Elems: col.Elems, - Collate: mysql.Collations[uint8(collate.RestoreCollationIDIfNeeded(col.GetCollation()))], + Collate: collate.CollationID2Name(collate.RestoreCollationIDIfNeeded(col.GetCollation())), } } From cecafd01053450c09220fd900b53dc8fcde73acf Mon Sep 17 00:00:00 2001 From: jwxiong Date: Mon, 19 Oct 2020 21:15:48 +0800 Subject: [PATCH 07/18] change test --- ddl/serial_test.go | 2 +- expression/expr_to_pb.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ddl/serial_test.go b/ddl/serial_test.go index 1f146ef64a9cf..50b6efcec9548 100644 --- a/ddl/serial_test.go +++ b/ddl/serial_test.go @@ -1271,8 +1271,8 @@ func (s *testSerialSuite) TestModifyingColumn4NewCollations(c *C) { tk.MustExec("alter table t convert to charset utf8 collate utf8_unicode_ci") tk.MustExec("alter table t convert to charset utf8 collate utf8_general_ci") tk.MustExec("alter table t modify b varchar(10) collate utf8_unicode_ci") - tk.MustExec("alter table t modify b varchar(10) collate utf8_bin") tk.MustExec("alter table t modify b varchar(10) collate utf8mb4_general_zh_ci") + tk.MustExec("alter table t modify b varchar(10) collate utf8_bin") tk.MustExec("alter table t add index b_idx(b)") tk.MustExec("alter table t add index c_idx(c)") diff --git a/expression/expr_to_pb.go b/expression/expr_to_pb.go index 6bc0c90eb5d66..b1adccc07b1e1 100644 --- a/expression/expr_to_pb.go +++ b/expression/expr_to_pb.go @@ -14,6 +14,8 @@ package expression import ( + "strings" + "github.com/gogo/protobuf/proto" "github.com/pingcap/errors" "github.com/pingcap/failpoint" @@ -28,7 +30,6 @@ import ( "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tipb/go-tipb" "go.uber.org/zap" - "strings" ) // ExpressionsToPBList converts expressions to tipb.Expr list for new plan. From bd4a76d5036bc3098d3ad390ebae3a164bbdb855 Mon Sep 17 00:00:00 2001 From: jwxiong Date: Mon, 19 Oct 2020 22:32:12 +0800 Subject: [PATCH 08/18] update test --- ddl/serial_test.go | 2 +- executor/seqtest/seq_executor_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ddl/serial_test.go b/ddl/serial_test.go index 50b6efcec9548..94f2d413d8910 100644 --- a/ddl/serial_test.go +++ b/ddl/serial_test.go @@ -1271,7 +1271,6 @@ func (s *testSerialSuite) TestModifyingColumn4NewCollations(c *C) { tk.MustExec("alter table t convert to charset utf8 collate utf8_unicode_ci") tk.MustExec("alter table t convert to charset utf8 collate utf8_general_ci") tk.MustExec("alter table t modify b varchar(10) collate utf8_unicode_ci") - tk.MustExec("alter table t modify b varchar(10) collate utf8mb4_general_zh_ci") tk.MustExec("alter table t modify b varchar(10) collate utf8_bin") tk.MustExec("alter table t add index b_idx(b)") @@ -1286,6 +1285,7 @@ func (s *testSerialSuite) TestModifyingColumn4NewCollations(c *C) { tk.MustExec("alter table t collate utf8mb4_general_ci") tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_bin") tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_unicode_ci") + tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_general_zh_ci") // Change the default collation of database is allowed. tk.MustExec("alter database dct charset utf8mb4 collate utf8mb4_general_ci") } diff --git a/executor/seqtest/seq_executor_test.go b/executor/seqtest/seq_executor_test.go index 8a7fcc3b6ddc8..a61862ca163e8 100644 --- a/executor/seqtest/seq_executor_test.go +++ b/executor/seqtest/seq_executor_test.go @@ -1235,8 +1235,8 @@ func (s *seqTestSuite) TestShowForNewCollations(c *C) { "utf8_unicode_ci utf8 192 Yes 1", "utf8mb4_bin utf8mb4 46 Yes Yes 1", "utf8mb4_general_ci utf8mb4 45 Yes 1", - "utf8mb4_unicode_ci utf8mb4 224 Yes 1", "utf8mb4_general_zh_ci utf8mb4 2048 Yes 1", + "utf8mb4_unicode_ci utf8mb4 224 Yes 1", ) tk.MustQuery("show collation").Check(expectRows) tk.MustQuery("select * from information_schema.COLLATIONS").Check(expectRows) From ac5b989bfcdf627617d429deff54bc76548c3ac7 Mon Sep 17 00:00:00 2001 From: jwxiong Date: Thu, 22 Oct 2020 19:03:29 +0800 Subject: [PATCH 09/18] update go.mod --- go.mod | 6 ++---- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 6346f4cc367aa..8d209709e80c5 100644 --- a/go.mod +++ b/go.mod @@ -39,13 +39,13 @@ require ( github.com/pingcap/badger v1.5.1-0.20200908111422-2e78ee155d19 github.com/pingcap/br v4.0.0-beta.2.0.20201014031603-5676c8fdad1a+incompatible github.com/pingcap/check v0.0.0-20200212061837-5e12011dc712 - github.com/pingcap/errors v0.11.5-0.20200917111840-a15ef68f753d + github.com/pingcap/errors v0.11.5-0.20201021055732-210aacd3fd99 github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce github.com/pingcap/fn v0.0.0-20191016082858-07623b84a47d github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 github.com/pingcap/kvproto v0.0.0-20200927054727-1290113160f0 github.com/pingcap/log v0.0.0-20200828042413-fce0951f1463 - github.com/pingcap/parser v0.0.0-20201016052439-96f4986f1ed3 + github.com/pingcap/parser v0.0.0-20201022104947-db4c0c5ed13c github.com/pingcap/sysutil v0.0.0-20200715082929-4c47bcac246a github.com/pingcap/tidb-tools v4.0.5-0.20200820092506-34ea90c93237+incompatible github.com/pingcap/tipb v0.0.0-20200618092958-4fad48b4c8c3 @@ -83,5 +83,3 @@ require ( ) go 1.13 - -replace github.com/pingcap/parser => github.com/xiongjiwei/parser v0.0.0-20201019031748-6052b03b52f1 diff --git a/go.sum b/go.sum index a2e7ff9007b30..c32a89b68c98b 100644 --- a/go.sum +++ b/go.sum @@ -430,8 +430,8 @@ github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTw github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= github.com/pingcap/errors v0.11.5-0.20190809092503-95897b64e011/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= github.com/pingcap/errors v0.11.5-0.20200902104258-eba4f1d8f6de/go.mod h1:g4vx//d6VakjJ0mk7iLBlKA8LFavV/sAVINT/1PFxeQ= -github.com/pingcap/errors v0.11.5-0.20200917111840-a15ef68f753d h1:TH18wFO5Nq/zUQuWu9ms2urgZnLP69XJYiI2JZAkUGc= -github.com/pingcap/errors v0.11.5-0.20200917111840-a15ef68f753d/go.mod h1:g4vx//d6VakjJ0mk7iLBlKA8LFavV/sAVINT/1PFxeQ= +github.com/pingcap/errors v0.11.5-0.20201021055732-210aacd3fd99 h1:PVuEvTi/LlviMG7X3av44NRwcdPf0tiqL/YdVOIKCpA= +github.com/pingcap/errors v0.11.5-0.20201021055732-210aacd3fd99/go.mod h1:G7x87le1poQzLB/TqvTJI2ILrSgobnq4Ut7luOwvfvI= github.com/pingcap/failpoint v0.0.0-20191029060244-12f4ac2fd11d/go.mod h1:DNS3Qg7bEDhU6EXNHF+XSv/PGznQaMJ5FWvctpm6pQI= github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce h1:Y1kCxlCtlPTMtVcOkjUcuQKh+YrluSo7+7YMCQSzy30= github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= @@ -451,6 +451,8 @@ github.com/pingcap/log v0.0.0-20200511115504-543df19646ad h1:SveG82rmu/GFxYanffx github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200828042413-fce0951f1463 h1:Jboj+s4jSCp5E1WDgmRUv5rIFKFHaaSWuSZ4wMwXIcc= github.com/pingcap/log v0.0.0-20200828042413-fce0951f1463/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= +github.com/pingcap/parser v0.0.0-20201022104947-db4c0c5ed13c h1:G2YhOMDBtbo6xdxwAlz4ocyt3zZ7cLiCeT0kkcFTBuM= +github.com/pingcap/parser v0.0.0-20201022104947-db4c0c5ed13c/go.mod h1:74+OEdwM4B/jMpBRl92ch6CSmSYkQtv2TNxIjFdT/GE= github.com/pingcap/sysutil v0.0.0-20200206130906-2bfa6dc40bcd/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI= github.com/pingcap/sysutil v0.0.0-20200715082929-4c47bcac246a h1:i2RElJ2aykSqZKeY+3SK18NHhajil8cQdG77wHe+P1Y= github.com/pingcap/sysutil v0.0.0-20200715082929-4c47bcac246a/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI= @@ -582,8 +584,6 @@ github.com/vmihailenco/msgpack/v5 v5.0.0-beta.1/go.mod h1:xlngVLeyQ/Qi05oQxhQ+oT github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -github.com/xiongjiwei/parser v0.0.0-20201019031748-6052b03b52f1 h1:Khgu4oWd4D+MpWKQ74fjOa7UdjROnLyS3jo3YP1zR+A= -github.com/xiongjiwei/parser v0.0.0-20201019031748-6052b03b52f1/go.mod h1:RlLfMRJwFBSiXd2lUaWdV5pSXtrpyvZM8k5bbZWsheU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= github.com/yookoala/realpath v1.0.0/go.mod h1:gJJMA9wuX7AcqLy1+ffPatSCySA1FQ2S8Ya9AIoYBpE= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= From 6e5972fe504aead70c0a63e1388788fad6279b61 Mon Sep 17 00:00:00 2001 From: jwxiong Date: Thu, 22 Oct 2020 19:52:26 +0800 Subject: [PATCH 10/18] Merge remote-tracking branch 'upstream/master' into pinyin-order # Conflicts: # go.mod # go.sum --- ddl/serial_test.go | 2 +- executor/seqtest/seq_executor_test.go | 2 +- expression/distsql_builtin_test.go | 2 +- expression/expr_to_pb.go | 22 +++++++++++----------- expression/expr_to_pb_test.go | 2 +- util/collate/collate.go | 7 +++---- util/collate/collate_test.go | 6 +++--- util/collate/general_zh_ci.go | 8 ++++---- 8 files changed, 25 insertions(+), 26 deletions(-) diff --git a/ddl/serial_test.go b/ddl/serial_test.go index 94f2d413d8910..698dbdff179d6 100644 --- a/ddl/serial_test.go +++ b/ddl/serial_test.go @@ -1285,7 +1285,7 @@ func (s *testSerialSuite) TestModifyingColumn4NewCollations(c *C) { tk.MustExec("alter table t collate utf8mb4_general_ci") tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_bin") tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_unicode_ci") - tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_general_zh_ci") + tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_zh_pinyin_tidb_as_cs") // Change the default collation of database is allowed. tk.MustExec("alter database dct charset utf8mb4 collate utf8mb4_general_ci") } diff --git a/executor/seqtest/seq_executor_test.go b/executor/seqtest/seq_executor_test.go index a61862ca163e8..cc2894093c7b8 100644 --- a/executor/seqtest/seq_executor_test.go +++ b/executor/seqtest/seq_executor_test.go @@ -1235,8 +1235,8 @@ func (s *seqTestSuite) TestShowForNewCollations(c *C) { "utf8_unicode_ci utf8 192 Yes 1", "utf8mb4_bin utf8mb4 46 Yes Yes 1", "utf8mb4_general_ci utf8mb4 45 Yes 1", - "utf8mb4_general_zh_ci utf8mb4 2048 Yes 1", "utf8mb4_unicode_ci utf8mb4 224 Yes 1", + "utf8mb4_zh_pinyin_tidb_as_cs utf8mb4 2048 Yes 1", ) tk.MustQuery("show collation").Check(expectRows) tk.MustQuery("select * from information_schema.COLLATIONS").Check(expectRows) diff --git a/expression/distsql_builtin_test.go b/expression/distsql_builtin_test.go index 2140ab9602e1d..20ff67e5792f5 100644 --- a/expression/distsql_builtin_test.go +++ b/expression/distsql_builtin_test.go @@ -56,7 +56,7 @@ func (s *testEvalSerialSuite) TestPBToExprWithNewCollation(c *C) { {"some_error_collation", "utf8mb4_bin", 46, 46}, {"utf8_unicode_ci", "utf8_unicode_ci", 192, 192}, {"utf8mb4_unicode_ci", "utf8mb4_unicode_ci", 224, 224}, - {"utf8mb4_general_zh_ci", "utf8mb4_general_zh_ci", 2048, 2048}, + {"utf8mb4_zh_pinyin_tidb_as_cs", "utf8mb4_zh_pinyin_tidb_as_cs", 2048, 2048}, } for _, cs := range cases { diff --git a/expression/expr_to_pb.go b/expression/expr_to_pb.go index 788b4da27f1b1..6cd787baf8c0e 100644 --- a/expression/expr_to_pb.go +++ b/expression/expr_to_pb.go @@ -14,11 +14,10 @@ package expression import ( - "strings" - "github.com/gogo/protobuf/proto" "github.com/pingcap/errors" "github.com/pingcap/failpoint" + "github.com/pingcap/parser/charset" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/sessionctx/stmtctx" @@ -176,16 +175,17 @@ func FieldTypeFromPB(ft *tipb.FieldType) *types.FieldType { } func collationToProto(c string) int32 { - v := int32(collate.CollationName2ID(c)) - if v == mysql.DefaultCollationID && strings.ToLower(c) != mysql.DefaultCollationName { - logutil.BgLogger().Warn( - "Unable to get collation ID by name, use ID of the default collation instead", - zap.String("name", c), - zap.Int32("default collation ID", v), - zap.String("default collation", mysql.DefaultCollationName), - ) + if coll, err := charset.GetCollationByName(c); err == nil { + return collate.RewriteNewCollationIDIfNeeded(int32(coll.ID)) } - return collate.RewriteNewCollationIDIfNeeded(v) + v := collate.RewriteNewCollationIDIfNeeded(int32(mysql.DefaultCollationID)) + logutil.BgLogger().Warn( + "Unable to get collation ID by name, use ID of the default collation instead", + zap.String("name", c), + zap.Int32("default collation ID", v), + zap.String("default collation", mysql.DefaultCollationName), + ) + return v } func protoToCollation(c int32) string { diff --git a/expression/expr_to_pb_test.go b/expression/expr_to_pb_test.go index 76070b6494df5..bbfe7ee6853d6 100644 --- a/expression/expr_to_pb_test.go +++ b/expression/expr_to_pb_test.go @@ -789,7 +789,7 @@ func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) { colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeString, 4), "utf8mb4_0900_ai_ci")) colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 5), "utf8_bin")) colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 6), "utf8_unicode_ci")) - colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 7), "utf8mb4_general_zh_ci")) + colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 7), "utf8mb4_zh_pinyin_tidb_as_cs")) pushed, _ := PushDownExprs(sc, colExprs, client, kv.UnSpecified) c.Assert(len(pushed), Equals, len(colExprs)) pbExprs, err := ExpressionsToPBList(sc, colExprs, client) diff --git a/util/collate/collate.go b/util/collate/collate.go index 131096cd6c90a..b510f7f7df48a 100644 --- a/util/collate/collate.go +++ b/util/collate/collate.go @@ -242,8 +242,7 @@ func sign(i int) int { // IsCICollation returns if the collation is case-sensitive func IsCICollation(collate string) bool { return collate == "utf8_general_ci" || collate == "utf8mb4_general_ci" || - collate == "utf8_unicode_ci" || collate == "utf8mb4_unicode_ci" || - collate == "utf8mb4_general_zh_ci" + collate == "utf8_unicode_ci" || collate == "utf8mb4_unicode_ci" } func init() { @@ -268,6 +267,6 @@ func init() { newCollatorIDMap[CollationName2ID("utf8mb4_unicode_ci")] = &unicodeCICollator{} newCollatorMap["utf8_unicode_ci"] = &unicodeCICollator{} newCollatorIDMap[CollationName2ID("utf8_unicode_ci")] = &unicodeCICollator{} - newCollatorMap["utf8mb4_general_zh_ci"] = &generalZhCICollator{} - newCollatorIDMap[CollationName2ID("utf8mb4_general_zh_ci")] = &generalZhCICollator{} + newCollatorMap["utf8mb4_zh_pinyin_tidb_as_cs"] = &zhPinyinTiDBASCS{} + newCollatorIDMap[CollationName2ID("utf8mb4_zh_pinyin_tidb_as_cs")] = &zhPinyinTiDBASCS{} } diff --git a/util/collate/collate_test.go b/util/collate/collate_test.go index 493dcc81ed0aa..3aa8799d0e05b 100644 --- a/util/collate/collate_test.go +++ b/util/collate/collate_test.go @@ -199,7 +199,7 @@ func (s *testCollateSuite) TestGetCollator(c *C) { c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &generalCICollator{}) c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &unicodeCICollator{}) c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &unicodeCICollator{}) - c.Assert(GetCollator("utf8mb4_general_zh_ci"), FitsTypeOf, &generalZhCICollator{}) + c.Assert(GetCollator("utf8mb4_zh_pinyin_tidb_as_cs"), FitsTypeOf, &zhPinyinTiDBASCS{}) c.Assert(GetCollator("default_test"), FitsTypeOf, &binPaddingCollator{}) c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(46), FitsTypeOf, &binPaddingCollator{}) @@ -208,7 +208,7 @@ func (s *testCollateSuite) TestGetCollator(c *C) { c.Assert(GetCollatorByID(33), FitsTypeOf, &generalCICollator{}) c.Assert(GetCollatorByID(224), FitsTypeOf, &unicodeCICollator{}) c.Assert(GetCollatorByID(192), FitsTypeOf, &unicodeCICollator{}) - c.Assert(GetCollatorByID(2048), FitsTypeOf, &generalZhCICollator{}) + c.Assert(GetCollatorByID(2048), FitsTypeOf, &zhPinyinTiDBASCS{}) c.Assert(GetCollatorByID(9999), FitsTypeOf, &binPaddingCollator{}) SetNewCollationEnabledForTest(false) @@ -219,7 +219,7 @@ func (s *testCollateSuite) TestGetCollator(c *C) { c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &binCollator{}) c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &binCollator{}) c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &binCollator{}) - c.Assert(GetCollator("utf8mb4_general_zh_ci"), FitsTypeOf, &binCollator{}) + c.Assert(GetCollator("utf8mb4_zh_pinyin_tidb_as_cs"), FitsTypeOf, &binCollator{}) c.Assert(GetCollator("default_test"), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(46), FitsTypeOf, &binCollator{}) diff --git a/util/collate/general_zh_ci.go b/util/collate/general_zh_ci.go index 08f407b2ce37e..064f4b8dcc052 100644 --- a/util/collate/general_zh_ci.go +++ b/util/collate/general_zh_ci.go @@ -1,19 +1,19 @@ package collate -type generalZhCICollator struct { +type zhPinyinTiDBASCS struct { } // Collator interface, no implements now. -func (g generalZhCICollator) Compare(a, b string) int { +func (g zhPinyinTiDBASCS) Compare(a, b string) int { panic("implement me") } // Collator interface, no implements now. -func (g generalZhCICollator) Key(str string) []byte { +func (g zhPinyinTiDBASCS) Key(str string) []byte { panic("implement me") } // Collator interface, no implements now. -func (g generalZhCICollator) Pattern() WildcardPattern { +func (g zhPinyinTiDBASCS) Pattern() WildcardPattern { panic("implement me") } From 961a626941f96242fce1d6baee80fc44cfa8f435 Mon Sep 17 00:00:00 2001 From: jwxiong Date: Thu, 22 Oct 2020 20:04:13 +0800 Subject: [PATCH 11/18] change code style --- expression/expr_to_pb.go | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/expression/expr_to_pb.go b/expression/expr_to_pb.go index 6cd787baf8c0e..775d285839373 100644 --- a/expression/expr_to_pb.go +++ b/expression/expr_to_pb.go @@ -189,16 +189,17 @@ func collationToProto(c string) int32 { } func protoToCollation(c int32) string { - v := collate.CollationID2Name(collate.RestoreCollationIDIfNeeded(c)) - if v == mysql.DefaultCollationName && c != mysql.DefaultCollationID { - logutil.BgLogger().Warn( - "Unable to get collation name from ID, use name of the default collation instead", - zap.Int32("id", c), - zap.Int("default collation ID", mysql.DefaultCollationID), - zap.String("default collation", mysql.DefaultCollationName), - ) + if coll, err := charset.GetCollationByID(int(collate.RestoreCollationIDIfNeeded(c))); err == nil { + return coll.Name } - return v + logutil.BgLogger().Warn( + "Unable to get collation name from ID, use name of the default collation instead", + zap.Int32("id", c), + zap.Int("default collation ID", mysql.DefaultCollationID), + zap.String("default collation", mysql.DefaultCollationName), + ) + + return mysql.DefaultCollationName } func (pc PbConverter) columnToPBExpr(column *Column) *tipb.Expr { From 09fe5af33f9836ccee6798de87653ed824be712f Mon Sep 17 00:00:00 2001 From: jwxiong Date: Fri, 23 Oct 2020 10:52:30 +0800 Subject: [PATCH 12/18] rename file name --- util/collate/collate.go | 25 +++++++++++++++++++ .../{general_zh_ci.go => pinyin_tidb_cs.go} | 0 util/collate/unicode_ci.go | 25 ------------------- 3 files changed, 25 insertions(+), 25 deletions(-) rename util/collate/{general_zh_ci.go => pinyin_tidb_cs.go} (100%) diff --git a/util/collate/collate.go b/util/collate/collate.go index b510f7f7df48a..c345f270f1790 100644 --- a/util/collate/collate.go +++ b/util/collate/collate.go @@ -239,6 +239,31 @@ func sign(i int) int { return 0 } +// decode rune by hand +func decodeRune(s string, si int) (r rune, newIndex int) { + switch b := s[si]; { + case b < 0x80: + r = rune(b) + newIndex = si + 1 + case b < 0xE0: + r = rune(b&b2Mask)<<6 | + rune(s[1+si]&mbMask) + newIndex = si + 2 + case b < 0xF0: + r = rune(b&b3Mask)<<12 | + rune(s[si+1]&mbMask)<<6 | + rune(s[si+2]&mbMask) + newIndex = si + 3 + default: + r = rune(b&b4Mask)<<18 | + rune(s[si+1]&mbMask)<<12 | + rune(s[si+2]&mbMask)<<6 | + rune(s[si+3]&mbMask) + newIndex = si + 4 + } + return +} + // IsCICollation returns if the collation is case-sensitive func IsCICollation(collate string) bool { return collate == "utf8_general_ci" || collate == "utf8mb4_general_ci" || diff --git a/util/collate/general_zh_ci.go b/util/collate/pinyin_tidb_cs.go similarity index 100% rename from util/collate/general_zh_ci.go rename to util/collate/pinyin_tidb_cs.go diff --git a/util/collate/unicode_ci.go b/util/collate/unicode_ci.go index fd3d57799d9ee..6fbdb2b635faf 100644 --- a/util/collate/unicode_ci.go +++ b/util/collate/unicode_ci.go @@ -33,31 +33,6 @@ const ( mbMask = 0x3F // 0011 1111 ) -// decode rune by hand -func decodeRune(s string, si int) (r rune, newIndex int) { - switch b := s[si]; { - case b < 0x80: - r = rune(b) - newIndex = si + 1 - case b < 0xE0: - r = rune(b&b2Mask)<<6 | - rune(s[1+si]&mbMask) - newIndex = si + 2 - case b < 0xF0: - r = rune(b&b3Mask)<<12 | - rune(s[si+1]&mbMask)<<6 | - rune(s[si+2]&mbMask) - newIndex = si + 3 - default: - r = rune(b&b4Mask)<<18 | - rune(s[si+1]&mbMask)<<12 | - rune(s[si+2]&mbMask)<<6 | - rune(s[si+3]&mbMask) - newIndex = si + 4 - } - return -} - // unicodeCICollator implements UCA. see http://unicode.org/reports/tr10/ type unicodeCICollator struct { } From 4f634eaa8752414d03d43bffdb0b30c0b20ddd94 Mon Sep 17 00:00:00 2001 From: jwxiong Date: Fri, 23 Oct 2020 11:07:54 +0800 Subject: [PATCH 13/18] move const number --- util/collate/collate.go | 10 +++++++++- util/collate/unicode_ci.go | 11 ----------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/util/collate/collate.go b/util/collate/collate.go index c345f270f1790..268232a8fd56f 100644 --- a/util/collate/collate.go +++ b/util/collate/collate.go @@ -44,9 +44,17 @@ var ( ErrIllegalMix3Collation = dbterror.ClassExpression.NewStd(mysql.ErrCantAggregate3collations) ) -// DefaultLen is set for datum if the string datum don't know its length. const ( + // DefaultLen is set for datum if the string datum don't know its length. DefaultLen = 0 + // first byte of a 2-byte encoding starts 110 and carries 5 bits of data + b2Mask = 0x1F // 0001 1111 + // first byte of a 3-byte encoding starts 1110 and carries 4 bits of data + b3Mask = 0x0F // 0000 1111 + // first byte of a 4-byte encoding starts 11110 and carries 3 bits of data + b4Mask = 0x07 // 0000 0111 + // non-first bytes start 10 and carry 6 bits of data + mbMask = 0x3F // 0011 1111 ) // Collator provides functionality for comparing strings for a given diff --git a/util/collate/unicode_ci.go b/util/collate/unicode_ci.go index 6fbdb2b635faf..d03b169fee9e9 100644 --- a/util/collate/unicode_ci.go +++ b/util/collate/unicode_ci.go @@ -20,17 +20,6 @@ import ( const ( // magic number indicate weight has 2 uint64, should get from `longRuneMap` longRune uint64 = 0xFFFD - // first byte of a 2-byte encoding starts 110 and carries 5 bits of data - b2Mask = 0x1F // 0001 1111 - - // first byte of a 3-byte encoding starts 1110 and carries 4 bits of data - b3Mask = 0x0F // 0000 1111 - - // first byte of a 4-byte encoding starts 11110 and carries 3 bits of data - b4Mask = 0x07 // 0000 0111 - - // non-first bytes start 10 and carry 6 bits of data - mbMask = 0x3F // 0011 1111 ) // unicodeCICollator implements UCA. see http://unicode.org/reports/tr10/ From 91db9f5bc34dcdd59abe9aff887fe52e7dfb393e Mon Sep 17 00:00:00 2001 From: jwxiong Date: Fri, 23 Oct 2020 17:20:38 +0800 Subject: [PATCH 14/18] update license --- util/collate/pinyin_tidb_cs.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/util/collate/pinyin_tidb_cs.go b/util/collate/pinyin_tidb_cs.go index 064f4b8dcc052..ad3b114e845c4 100644 --- a/util/collate/pinyin_tidb_cs.go +++ b/util/collate/pinyin_tidb_cs.go @@ -1,3 +1,16 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + package collate type zhPinyinTiDBASCS struct { From a7057023d9f83cb2533991920595ab9741910a41 Mon Sep 17 00:00:00 2001 From: jwxiong Date: Wed, 28 Oct 2020 11:58:58 +0800 Subject: [PATCH 15/18] address comments --- expression/expr_to_pb.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/expression/expr_to_pb.go b/expression/expr_to_pb.go index 775d285839373..8c35e5f5f3a41 100644 --- a/expression/expr_to_pb.go +++ b/expression/expr_to_pb.go @@ -189,16 +189,17 @@ func collationToProto(c string) int32 { } func protoToCollation(c int32) string { - if coll, err := charset.GetCollationByID(int(collate.RestoreCollationIDIfNeeded(c))); err == nil { + coll, err := charset.GetCollationByID(int(collate.RestoreCollationIDIfNeeded(c))) + if err == nil { return coll.Name } + logutil.BgLogger().Error(err.Error()) logutil.BgLogger().Warn( "Unable to get collation name from ID, use name of the default collation instead", zap.Int32("id", c), zap.Int("default collation ID", mysql.DefaultCollationID), zap.String("default collation", mysql.DefaultCollationName), ) - return mysql.DefaultCollationName } From 7cdd8eeffcfd249e2162c5543249b1171858c51a Mon Sep 17 00:00:00 2001 From: jwxiong Date: Wed, 28 Oct 2020 16:01:16 +0800 Subject: [PATCH 16/18] rename file --- util/collate/{pinyin_tidb_cs.go => pinyin_tidb_as_cs.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename util/collate/{pinyin_tidb_cs.go => pinyin_tidb_as_cs.go} (100%) diff --git a/util/collate/pinyin_tidb_cs.go b/util/collate/pinyin_tidb_as_cs.go similarity index 100% rename from util/collate/pinyin_tidb_cs.go rename to util/collate/pinyin_tidb_as_cs.go From 7cb682c1213268a3dd31c2b0b0a98818aede08c5 Mon Sep 17 00:00:00 2001 From: jwxiong Date: Wed, 28 Oct 2020 16:24:02 +0800 Subject: [PATCH 17/18] remove dup log --- expression/expr_to_pb.go | 1 - 1 file changed, 1 deletion(-) diff --git a/expression/expr_to_pb.go b/expression/expr_to_pb.go index 8c35e5f5f3a41..223f71df311e8 100644 --- a/expression/expr_to_pb.go +++ b/expression/expr_to_pb.go @@ -193,7 +193,6 @@ func protoToCollation(c int32) string { if err == nil { return coll.Name } - logutil.BgLogger().Error(err.Error()) logutil.BgLogger().Warn( "Unable to get collation name from ID, use name of the default collation instead", zap.Int32("id", c), From d32f607f9d79a3db042ed5fa8ceafd2adb3c5b77 Mon Sep 17 00:00:00 2001 From: jwxiong Date: Thu, 29 Oct 2020 11:13:22 +0800 Subject: [PATCH 18/18] rename collator --- util/collate/collate.go | 4 ++-- util/collate/collate_test.go | 4 ++-- util/collate/pinyin_tidb_as_cs.go | 9 +++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/util/collate/collate.go b/util/collate/collate.go index 268232a8fd56f..1cdf88529e147 100644 --- a/util/collate/collate.go +++ b/util/collate/collate.go @@ -300,6 +300,6 @@ func init() { newCollatorIDMap[CollationName2ID("utf8mb4_unicode_ci")] = &unicodeCICollator{} newCollatorMap["utf8_unicode_ci"] = &unicodeCICollator{} newCollatorIDMap[CollationName2ID("utf8_unicode_ci")] = &unicodeCICollator{} - newCollatorMap["utf8mb4_zh_pinyin_tidb_as_cs"] = &zhPinyinTiDBASCS{} - newCollatorIDMap[CollationName2ID("utf8mb4_zh_pinyin_tidb_as_cs")] = &zhPinyinTiDBASCS{} + newCollatorMap["utf8mb4_zh_pinyin_tidb_as_cs"] = &zhPinyinTiDBASCSCollator{} + newCollatorIDMap[CollationName2ID("utf8mb4_zh_pinyin_tidb_as_cs")] = &zhPinyinTiDBASCSCollator{} } diff --git a/util/collate/collate_test.go b/util/collate/collate_test.go index 502c9299bf8c7..1da50af005ce8 100644 --- a/util/collate/collate_test.go +++ b/util/collate/collate_test.go @@ -198,7 +198,7 @@ func (s *testCollateSuite) TestGetCollator(c *C) { c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &generalCICollator{}) c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &unicodeCICollator{}) c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &unicodeCICollator{}) - c.Assert(GetCollator("utf8mb4_zh_pinyin_tidb_as_cs"), FitsTypeOf, &zhPinyinTiDBASCS{}) + c.Assert(GetCollator("utf8mb4_zh_pinyin_tidb_as_cs"), FitsTypeOf, &zhPinyinTiDBASCSCollator{}) c.Assert(GetCollator("default_test"), FitsTypeOf, &binPaddingCollator{}) c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(46), FitsTypeOf, &binPaddingCollator{}) @@ -207,7 +207,7 @@ func (s *testCollateSuite) TestGetCollator(c *C) { c.Assert(GetCollatorByID(33), FitsTypeOf, &generalCICollator{}) c.Assert(GetCollatorByID(224), FitsTypeOf, &unicodeCICollator{}) c.Assert(GetCollatorByID(192), FitsTypeOf, &unicodeCICollator{}) - c.Assert(GetCollatorByID(2048), FitsTypeOf, &zhPinyinTiDBASCS{}) + c.Assert(GetCollatorByID(2048), FitsTypeOf, &zhPinyinTiDBASCSCollator{}) c.Assert(GetCollatorByID(9999), FitsTypeOf, &binPaddingCollator{}) SetNewCollationEnabledForTest(false) diff --git a/util/collate/pinyin_tidb_as_cs.go b/util/collate/pinyin_tidb_as_cs.go index ad3b114e845c4..565680e2cff56 100644 --- a/util/collate/pinyin_tidb_as_cs.go +++ b/util/collate/pinyin_tidb_as_cs.go @@ -13,20 +13,21 @@ package collate -type zhPinyinTiDBASCS struct { +// Collation of utf8mb4_zh_pinyin_tidb_as_cs +type zhPinyinTiDBASCSCollator struct { } // Collator interface, no implements now. -func (g zhPinyinTiDBASCS) Compare(a, b string) int { +func (py *zhPinyinTiDBASCSCollator) Compare(a, b string) int { panic("implement me") } // Collator interface, no implements now. -func (g zhPinyinTiDBASCS) Key(str string) []byte { +func (py *zhPinyinTiDBASCSCollator) Key(str string) []byte { panic("implement me") } // Collator interface, no implements now. -func (g zhPinyinTiDBASCS) Pattern() WildcardPattern { +func (py *zhPinyinTiDBASCSCollator) Pattern() WildcardPattern { panic("implement me") }