From e60910aad3b038b7751e3798a7fb16b5f8b93faa Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Tue, 10 Sep 2019 19:41:26 +0800 Subject: [PATCH 1/3] executor, expression: 1. rename tryToMatch to tryToMatchOuters 2. tiny-refine tryToMatchOuters --- executor/index_lookup_join.go | 2 +- executor/join.go | 4 +-- executor/joiner.go | 54 ++++++++++++++--------------- executor/joiner_test.go | 2 +- executor/merge_join.go | 2 +- expression/chunk_executor.go | 64 +++++++++++++---------------------- 6 files changed, 56 insertions(+), 72 deletions(-) diff --git a/executor/index_lookup_join.go b/executor/index_lookup_join.go index 77cc841bfb535..b0366da60c83e 100644 --- a/executor/index_lookup_join.go +++ b/executor/index_lookup_join.go @@ -250,7 +250,7 @@ func (e *IndexLookUpJoin) Next(ctx context.Context, req *chunk.Chunk) error { outerRow := task.outerResult.GetRow(task.cursor) if e.innerIter.Current() != e.innerIter.End() { - matched, isNull, err := e.joiner.tryToMatch(outerRow, e.innerIter, req) + matched, isNull, err := e.joiner.tryToMatchInners(outerRow, e.innerIter, req) if err != nil { return err } diff --git a/executor/join.go b/executor/join.go index 053bf04a78e1f..9f35f208fd8e4 100644 --- a/executor/join.go +++ b/executor/join.go @@ -378,7 +378,7 @@ func (e *HashJoinExec) joinMatchedOuterRow2Chunk(workerID uint, outerRow chunk.R iter := chunk.NewIterator4Slice(innerRows) hasMatch, hasNull := false, false for iter.Begin(); iter.Current() != iter.End(); { - matched, isNull, err := e.joiners[workerID].tryToMatch(outerRow, iter, joinResult.chk) + matched, isNull, err := e.joiners[workerID].tryToMatchInners(outerRow, iter, joinResult.chk) if err != nil { joinResult.err = err return false, joinResult @@ -673,7 +673,7 @@ func (e *NestedLoopApplyExec) Next(ctx context.Context, req *chunk.Chunk) (err e e.innerIter.Begin() } - matched, isNull, err := e.joiner.tryToMatch(*e.outerRow, e.innerIter, req) + matched, isNull, err := e.joiner.tryToMatchInners(*e.outerRow, e.innerIter, req) e.hasMatch = e.hasMatch || matched e.hasNull = e.hasNull || isNull diff --git a/executor/joiner.go b/executor/joiner.go index 3f677a4527eba..5bf6dafc1a31e 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -36,7 +36,7 @@ var ( // // hasMatch, hasNull := false, false // for innerIter.Current() != innerIter.End() { -// matched, isNull, err := j.tryToMatch(outer, innerIter, chk) +// matched, isNull, err := j.tryToMatchInners(outer, innerIter, chk) // // handle err // hasMatch = hasMatch || matched // hasNull = hasNull || isNull @@ -47,7 +47,7 @@ var ( // // NOTE: This interface is **not** thread-safe. type joiner interface { - // tryToMatch tries to join an outer row with a batch of inner rows. When + // tryToMatchInners tries to join an outer row with a batch of inner rows. When // 'inners.Len != 0' but all the joined rows are filtered, the outer row is // considered unmatched. Otherwise, the outer row is matched and some joined // rows are appended to `chk`. The size of `chk` is limited to MaxChunkSize. @@ -59,7 +59,7 @@ type joiner interface { // NOTE: Callers need to call this function multiple times to consume all // the inner rows for an outer row, and decide whether the outer row can be // matched with at lease one inner row. - tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, isNull bool, err error) + tryToMatchInners(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, isNull bool, err error) // tryToMatchOuters tries to join a batch of outer rows with one inner row. // It's used when the join is an outer join and the hash table is built @@ -78,7 +78,7 @@ type joiner interface { // 2. 'AntiSemiJoin': appends the unmatched outer row to the result buffer. // 3. 'LeftOuterSemiJoin': concats the unmatched outer row with 0 and // appends it to the result buffer. - // 4. 'AntiLeftOuterSemiJoin': concats the unmatched outer row with 0 and + // 4. 'AntiLeftOuterSemiJoin': concats the unmatched outer row with 1 and // appends it to the result buffer. // 5. 'LeftOuterJoin': concats the unmatched outer row with a row of NULLs // and appends it to the result buffer. @@ -183,7 +183,7 @@ func (j *baseJoiner) makeShallowJoinRow(isRightJoin bool, inner, outer chunk.Row j.shallowRow.ShallowCopyPartialRow(inner.Len(), outer) } -// filter is used to filter the result constructed by tryToMatch, the result is +// filter is used to filter the result constructed by tryToMatchInners, the result is // built by one outer row and multiple inner rows. The returned bool value // indicates whether the outer row matches any inner rows. func (j *baseJoiner) filter(input, output *chunk.Chunk, outerColsLen int) (bool, error) { @@ -232,7 +232,7 @@ type semiJoiner struct { baseJoiner } -func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { +func (j *semiJoiner) tryToMatchInners(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { if inners.Len() == 0 { return false, false, nil } @@ -294,8 +294,8 @@ type antiSemiJoiner struct { baseJoiner } -// tryToMatch implements joiner interface. -func (j *antiSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { +// tryToMatchInners implements joiner interface. +func (j *antiSemiJoiner) tryToMatchInners(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { if inners.Len() == 0 { return false, false, nil } @@ -357,8 +357,8 @@ type leftOuterSemiJoiner struct { baseJoiner } -// tryToMatch implements joiner interface. -func (j *leftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { +// tryToMatchInners implements joiner interface. +func (j *leftOuterSemiJoiner) tryToMatchInners(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { if inners.Len() == 0 { return false, false, nil } @@ -433,8 +433,8 @@ type antiLeftOuterSemiJoiner struct { baseJoiner } -// tryToMatch implements joiner interface. -func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { +// tryToMatchInners implements joiner interface. +func (j *antiLeftOuterSemiJoiner) tryToMatchInners(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { if inners.Len() == 0 { return false, false, nil } @@ -509,8 +509,8 @@ type leftOuterJoiner struct { baseJoiner } -// tryToMatch implements joiner interface. -func (j *leftOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { +// tryToMatchInners implements joiner interface. +func (j *leftOuterJoiner) tryToMatchInners(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { if inners.Len() == 0 { return false, false, nil } @@ -544,12 +544,12 @@ func (j *leftOuterJoiner) tryToMatchOuters(outers chunk.Iterator, inner chunk.Ro chkForJoin = chk } - outer, numToAppend, outerBatchSize := outers.Current(), chk.RequiredRows()-chk.NumRows(), 0 - for ; outer != outers.End() && numToAppend > 0; outer, numToAppend, outerBatchSize = outers.Next(), numToAppend-1, outerBatchSize+1 { + outer, numToAppend, cursor := outers.Current(), chk.RequiredRows()-chk.NumRows(), 0 + for ; outer != outers.End() && cursor < numToAppend; outer, cursor = outers.Next(), cursor+1 { j.makeJoinRowToChunk(chkForJoin, outer, inner) } outerRowStatus = outerRowStatus[:0] - for i := 0; i < outerBatchSize; i++ { + for i := 0; i < cursor; i++ { outerRowStatus = append(outerRowStatus, outerRowMatched) } if len(j.conditions) == 0 { @@ -568,8 +568,8 @@ type rightOuterJoiner struct { baseJoiner } -// tryToMatch implements joiner interface. -func (j *rightOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { +// tryToMatchInners implements joiner interface. +func (j *rightOuterJoiner) tryToMatchInners(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { if inners.Len() == 0 { return false, false, nil } @@ -603,12 +603,12 @@ func (j *rightOuterJoiner) tryToMatchOuters(outers chunk.Iterator, inner chunk.R chkForJoin = chk } - outer, numToAppend, outerBatchSize := outers.Current(), chk.RequiredRows()-chk.NumRows(), 0 - for ; outer != outers.End() && numToAppend > 0; outer, numToAppend, outerBatchSize = outers.Next(), numToAppend-1, outerBatchSize+1 { + outer, numToAppend, cursor := outers.Current(), chk.RequiredRows()-chk.NumRows(), 0 + for ; outer != outers.End() && cursor < numToAppend; outer, cursor = outers.Next(), cursor+1 { j.makeJoinRowToChunk(chkForJoin, inner, outer) } outerRowStatus = outerRowStatus[:0] - for i := 0; i < outerBatchSize; i++ { + for i := 0; i < cursor; i++ { outerRowStatus = append(outerRowStatus, outerRowMatched) } if len(j.conditions) == 0 { @@ -627,8 +627,8 @@ type innerJoiner struct { baseJoiner } -// tryToMatch implements joiner interface. -func (j *innerJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { +// tryToMatchInners implements joiner interface. +func (j *innerJoiner) tryToMatchInners(outer chunk.Row, inners chunk.Iterator, chk *chunk.Chunk) (matched bool, hasNull bool, err error) { if inners.Len() == 0 { return false, false, nil } @@ -663,8 +663,8 @@ func (j *innerJoiner) tryToMatchOuters(outers chunk.Iterator, inner chunk.Row, c if len(j.conditions) == 0 { chkForJoin = chk } - outer, numToAppend, outerBatchSize := outers.Current(), chk.RequiredRows()-chk.NumRows(), 0 - for ; outer != outers.End() && numToAppend > 0; outer, numToAppend, outerBatchSize = outers.Next(), numToAppend-1, outerBatchSize+1 { + outer, numToAppend, cursor := outers.Current(), chk.RequiredRows()-chk.NumRows(), 0 + for ; outer != outers.End() && cursor < numToAppend; outer, numToAppend = outers.Next(), cursor+1 { if j.outerIsRight { j.makeJoinRowToChunk(chkForJoin, inner, outer) } else { @@ -672,7 +672,7 @@ func (j *innerJoiner) tryToMatchOuters(outers chunk.Iterator, inner chunk.Row, c } } outerRowStatus = outerRowStatus[:0] - for i := 0; i < outerBatchSize; i++ { + for i := 0; i < cursor; i++ { outerRowStatus = append(outerRowStatus, outerRowMatched) } if len(j.conditions) == 0 { diff --git a/executor/joiner_test.go b/executor/joiner_test.go index ef93629160e18..818a171e3e7d8 100644 --- a/executor/joiner_test.go +++ b/executor/joiner_test.go @@ -72,7 +72,7 @@ func (s *testSuiteJoiner) TestRequiredRows(c *C) { result.Reset() it := chunk.NewIterator4Chunk(innerChk) it.Begin() - _, _, err := joiner.tryToMatch(outerRow, it, result) + _, _, err := joiner.tryToMatchInners(outerRow, it, result) c.Assert(err, IsNil) c.Assert(result.NumRows(), Equals, required) } diff --git a/executor/merge_join.go b/executor/merge_join.go index f7415d6bf1444..68a7b95228a6a 100644 --- a/executor/merge_join.go +++ b/executor/merge_join.go @@ -322,7 +322,7 @@ func (e *MergeJoinExec) joinToChunk(ctx context.Context, chk *chunk.Chunk) (hasM continue } - matched, isNull, err := e.joiner.tryToMatch(e.outerTable.row, e.innerIter4Row, chk) + matched, isNull, err := e.joiner.tryToMatchInners(e.outerTable.row, e.innerIter4Row, chk) if err != nil { return false, err } diff --git a/expression/chunk_executor.go b/expression/chunk_executor.go index 9876575b67ca4..0e1d1812b717b 100644 --- a/expression/chunk_executor.go +++ b/expression/chunk_executor.go @@ -336,47 +336,25 @@ func executeToString(ctx sessionctx.Context, expr Expression, fieldType *types.F // VectorizedFilter applies a list of filters to a Chunk and // returns a bool slice, which indicates whether a row is passed the filters. // Filters is executed vectorized. -func VectorizedFilter(ctx sessionctx.Context, filters []Expression, iterator *chunk.Iterator4Chunk, selected []bool) ([]bool, error) { - selected = selected[:0] - for i, numRows := 0, iterator.Len(); i < numRows; i++ { - selected = append(selected, true) - } - for _, filter := range filters { - isIntType := true - if filter.GetType().EvalType() != types.ETInt { - isIntType = false - } - for row := iterator.Begin(); row != iterator.End(); row = iterator.Next() { - if !selected[row.Idx()] { - continue - } - if isIntType { - filterResult, isNull, err := filter.EvalInt(ctx, row) - if err != nil { - return nil, err - } - selected[row.Idx()] = selected[row.Idx()] && !isNull && (filterResult != 0) - } else { - // TODO: should rewrite the filter to `cast(expr as SIGNED) != 0` and always use `EvalInt`. - bVal, _, err := EvalBool(ctx, []Expression{filter}, row) - if err != nil { - return nil, err - } - selected[row.Idx()] = selected[row.Idx()] && bVal - } - } - } - return selected, nil +func VectorizedFilter(ctx sessionctx.Context, filters []Expression, iterator *chunk.Iterator4Chunk, selected []bool) (_ []bool, err error) { + selected, _, err = VectorizedFilterConsiderNull(ctx, filters, iterator, selected, nil) + return selected, err } // VectorizedFilterConsiderNull applies a list of filters to a Chunk and // returns two bool slices, `selected` indicates whether a row passed the // filters, `isNull` indicates whether the result of the filter is null. // Filters is executed vectorized. -func VectorizedFilterConsiderNull(ctx sessionctx.Context, filters []Expression, iterator *chunk.Iterator4Chunk, selected []bool, isNil []bool) ([]bool, []bool, error) { - selected, isNil = selected[:0], isNil[:0] +func VectorizedFilterConsiderNull(ctx sessionctx.Context, filters []Expression, iterator *chunk.Iterator4Chunk, selected []bool, isNull []bool) ([]bool, []bool, error) { + selected = selected[:0] for i, numRows := 0, iterator.Len(); i < numRows; i++ { - selected, isNil = append(selected, true), append(isNil, false) + selected = append(selected, true) + } + if isNull != nil { + isNull = isNull[:0] + for i, numRows := 0, iterator.Len(); i < numRows; i++ { + isNull = append(isNull, false) + } } for _, filter := range filters { isIntType := true @@ -387,23 +365,29 @@ func VectorizedFilterConsiderNull(ctx sessionctx.Context, filters []Expression, if !selected[row.Idx()] { continue } + var ( + filterResult int64 + bVal, isNullResult bool + err error + ) if isIntType { - filterResult, isNull, err := filter.EvalInt(ctx, row) + filterResult, isNullResult, err = filter.EvalInt(ctx, row) if err != nil { return nil, nil, err } - selected[row.Idx()] = selected[row.Idx()] && !isNull && (filterResult != 0) - isNil[row.Idx()] = isNil[row.Idx()] || isNull + selected[row.Idx()] = selected[row.Idx()] && !isNullResult && (filterResult != 0) } else { // TODO: should rewrite the filter to `cast(expr as SIGNED) != 0` and always use `EvalInt`. - bVal, isNull, err := EvalBool(ctx, []Expression{filter}, row) + bVal, isNullResult, err = EvalBool(ctx, []Expression{filter}, row) if err != nil { return nil, nil, err } selected[row.Idx()] = selected[row.Idx()] && bVal - isNil[row.Idx()] = isNil[row.Idx()] || isNull + } + if isNull != nil { + isNull[row.Idx()] = isNull[row.Idx()] || isNullResult } } } - return selected, isNil, nil + return selected, isNull, nil } From b1d27ed599b99bf867a0d9c5e55a09393af21be2 Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Tue, 10 Sep 2019 19:44:26 +0800 Subject: [PATCH 2/3] tiny refine --- executor/joiner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/executor/joiner.go b/executor/joiner.go index 5bf6dafc1a31e..abb8881b9ff2b 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -664,7 +664,7 @@ func (j *innerJoiner) tryToMatchOuters(outers chunk.Iterator, inner chunk.Row, c chkForJoin = chk } outer, numToAppend, cursor := outers.Current(), chk.RequiredRows()-chk.NumRows(), 0 - for ; outer != outers.End() && cursor < numToAppend; outer, numToAppend = outers.Next(), cursor+1 { + for ; outer != outers.End() && cursor < numToAppend; outer, cursor = outers.Next(), cursor+1 { if j.outerIsRight { j.makeJoinRowToChunk(chkForJoin, inner, outer) } else { From d6b3b2eed947c672dde7965e83b8ed6c22a9091f Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Wed, 11 Sep 2019 10:18:49 +0800 Subject: [PATCH 3/3] address comment --- expression/chunk_executor.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/expression/chunk_executor.go b/expression/chunk_executor.go index 0e1d1812b717b..89293ff9e117b 100644 --- a/expression/chunk_executor.go +++ b/expression/chunk_executor.go @@ -356,6 +356,11 @@ func VectorizedFilterConsiderNull(ctx sessionctx.Context, filters []Expression, isNull = append(isNull, false) } } + var ( + filterResult int64 + bVal, isNullResult bool + err error + ) for _, filter := range filters { isIntType := true if filter.GetType().EvalType() != types.ETInt { @@ -365,11 +370,6 @@ func VectorizedFilterConsiderNull(ctx sessionctx.Context, filters []Expression, if !selected[row.Idx()] { continue } - var ( - filterResult int64 - bVal, isNullResult bool - err error - ) if isIntType { filterResult, isNullResult, err = filter.EvalInt(ctx, row) if err != nil {