-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
plan: maintain HistColl
in Selection
and Projection
's statsInfo
.
#7595
Changes from 2 commits
317de51
a582bf2
239f4ff
e68813a
76f96e4
ca5b5a7
7db31b5
6b6c285
9a4cfe5
bc371d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ type statsInfo struct { | |
count float64 | ||
cardinality []float64 | ||
|
||
histColl statistics.HistColl | ||
histColl *statistics.HistColl | ||
// usePseudoStats indicates whether the statsInfo is calculated using the | ||
// pseudo statistics on a table. | ||
usePseudoStats bool | ||
|
@@ -47,7 +47,6 @@ func (s *statsInfo) scale(factor float64) *statsInfo { | |
profile := &statsInfo{ | ||
count: s.count * factor, | ||
cardinality: make([]float64, len(s.cardinality)), | ||
histColl: s.histColl, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But since it's the problem of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Emm.. i need some clearer explain? |
||
usePseudoStats: s.usePseudoStats, | ||
} | ||
for i := range profile.cardinality { | ||
|
@@ -110,11 +109,10 @@ func (p *baseLogicalPlan) deriveStats() (*statsInfo, error) { | |
return profile, nil | ||
} | ||
|
||
func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *statsInfo { | ||
profile := &statsInfo{ | ||
func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) (profile *statsInfo, histColl *statistics.HistColl) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that all the code changes related to the functions of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we can extract the same part with |
||
profile = &statsInfo{ | ||
count: float64(ds.statisticTable.Count), | ||
cardinality: make([]float64, len(ds.Columns)), | ||
histColl: ds.statisticTable.GenerateHistCollFromColumnInfo(ds.Columns, ds.schema.Columns), | ||
usePseudoStats: ds.statisticTable.Pseudo, | ||
} | ||
for i, col := range ds.Columns { | ||
|
@@ -126,21 +124,22 @@ func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *statsInfo { | |
profile.cardinality[i] = profile.count * distinctFactor | ||
} | ||
} | ||
ds.stats = profile | ||
selectivity, err := profile.histColl.Selectivity(ds.ctx, conds) | ||
histColl = ds.statisticTable.GenerateHistCollFromColumnInfo(ds.Columns, ds.schema.Columns) | ||
selectivity, err := histColl.Selectivity(ds.ctx, conds) | ||
if err != nil { | ||
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error()) | ||
selectivity = selectionFactor | ||
} | ||
return profile.scale(selectivity) | ||
return profile.scale(selectivity), histColl | ||
} | ||
|
||
func (ds *DataSource) deriveStats() (*statsInfo, error) { | ||
// PushDownNot here can convert query 'not (a != 1)' to 'a = 1'. | ||
for i, expr := range ds.pushedDownConds { | ||
ds.pushedDownConds[i] = expression.PushDownNot(nil, expr, false) | ||
} | ||
ds.stats = ds.getStatsByFilter(ds.pushedDownConds) | ||
statsInfo, histColl := ds.getStatsByFilter(ds.pushedDownConds) | ||
ds.stats = statsInfo | ||
for _, path := range ds.possibleAccessPaths { | ||
if path.isTablePath { | ||
noIntervalRanges, err := ds.deriveTablePathStats(path) | ||
|
@@ -155,7 +154,7 @@ func (ds *DataSource) deriveStats() (*statsInfo, error) { | |
} | ||
continue | ||
} | ||
noIntervalRanges, err := ds.deriveIndexPathStats(path) | ||
noIntervalRanges, err := ds.deriveIndexPathStats(path, histColl) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
|
@@ -166,6 +165,8 @@ func (ds *DataSource) deriveStats() (*statsInfo, error) { | |
break | ||
} | ||
} | ||
histColl.Count = int64(ds.stats.count) | ||
ds.stats.histColl = histColl | ||
return ds.stats, nil | ||
} | ||
|
||
|
@@ -174,7 +175,23 @@ func (p *LogicalSelection) deriveStats() (*statsInfo, error) { | |
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
p.stats = childProfile.scale(selectionFactor) | ||
factor := selectionFactor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about s/factor/selecity/? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Em, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah. |
||
if p.ctx.GetSessionVars().OptimizerSelectivityLevel == 1 && childProfile.histColl != nil { | ||
winoros marked this conversation as resolved.
Show resolved
Hide resolved
|
||
factor, err = childProfile.histColl.Selectivity(p.ctx, p.Conditions) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
} | ||
p.stats = childProfile.scale(factor) | ||
if childProfile.histColl != nil { | ||
p.stats.histColl = &statistics.HistColl{ | ||
Count: int64(p.stats.count), | ||
Columns: childProfile.histColl.Columns, | ||
Indices: childProfile.histColl.Indices, | ||
Idx2ColumnIDs: childProfile.histColl.Idx2ColumnIDs, | ||
ColID2IdxID: childProfile.histColl.ColID2IdxID, | ||
} | ||
} | ||
return p.stats, nil | ||
} | ||
|
||
|
@@ -254,9 +271,58 @@ func (p *LogicalProjection) deriveStats() (*statsInfo, error) { | |
cols := expression.ExtractColumns(expr) | ||
p.stats.cardinality[i] = getCardinality(cols, p.children[0].Schema(), childProfile) | ||
} | ||
// If we enables the enhance selectivity we'll try to maintain the histogram. | ||
if p.ctx.GetSessionVars().OptimizerSelectivityLevel >= 1 && !childProfile.histColl.Pseudo { | ||
winoros marked this conversation as resolved.
Show resolved
Hide resolved
|
||
p.deriveHistStats(childProfile) | ||
} | ||
return p.stats, nil | ||
} | ||
|
||
func (p *LogicalProjection) deriveHistStats(childProfile *statsInfo) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add comments in the function. |
||
colHistMap := make(map[int64]*statistics.Column) | ||
colIDMap := make(map[int64]int64) | ||
childHist := childProfile.histColl | ||
for i, expr := range p.Exprs { | ||
col, ok := expr.(*expression.Column) | ||
if !ok { | ||
continue | ||
} | ||
colHist, ok := childHist.Columns[col.UniqueID] | ||
if !ok { | ||
continue | ||
} | ||
colHistMap[p.schema.Columns[i].UniqueID] = colHist | ||
colIDMap[col.UniqueID] = p.schema.Columns[i].UniqueID | ||
} | ||
|
||
colID2IdxID := make(map[int64]int64) | ||
idx2ColumnIDs := make(map[int64][]int64) | ||
idxHistMap := make(map[int64]*statistics.Index) | ||
for id, colIDs := range childHist.Idx2ColumnIDs { | ||
newIDList := make([]int64, 0, len(colIDs)) | ||
for _, id := range colIDs { | ||
if newID, ok := colIDMap[id]; ok { | ||
newIDList = append(newIDList, newID) | ||
continue | ||
} | ||
break | ||
} | ||
if len(newIDList) == 0 { | ||
continue | ||
} | ||
colID2IdxID[newIDList[0]] = id | ||
idx2ColumnIDs[id] = newIDList | ||
idxHistMap[id] = childHist.Indices[id] | ||
} | ||
p.stats.histColl = &statistics.HistColl{ | ||
Columns: colHistMap, | ||
Indices: idxHistMap, | ||
Idx2ColumnIDs: idx2ColumnIDs, | ||
ColID2IdxID: colID2IdxID, | ||
Count: childHist.Count, | ||
} | ||
} | ||
|
||
func (la *LogicalAggregation) deriveStats() (*statsInfo, error) { | ||
childProfile, err := la.children[0].deriveStats() | ||
if err != nil { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -293,7 +293,7 @@ func ValidateSetSystemVar(vars *SessionVars, name string, value string) (string, | |
TiDBDistSQLScanConcurrency, | ||
TiDBIndexSerialScanConcurrency, TiDBDDLReorgWorkerCount, | ||
TiDBBackoffLockFast, TiDBMaxChunkSize, | ||
TiDBDMLBatchSize, TiDBOptimizerSelectivityLevel: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why remove it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This validation is not right. The default value of it is zero. But here just checks what it should not be zero. |
||
TiDBDMLBatchSize: | ||
v, err := strconv.Atoi(value) | ||
if err != nil { | ||
return value, ErrWrongTypeForVar.GenByArgs(name) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add comments in the test case.