Skip to content

Commit

Permalink
# This is a combination of 4 commits.
Browse files Browse the repository at this point in the history
# This is the 1st commit message:

Add MaxTotalQueryLength and fill it from MaxQueryLength if 0

Add the ability to define a higher total query length limit on the
frontend , compared to the current shared limit between querier and
query-frontend.

Ref: grafana/mimir-squad#889

Signed-off-by: György Krajcsovits <[email protected]>

# Conflicts:
#	CHANGELOG.md

# This is the commit message #2:

Update runbook

# This is the commit message #3:

Update pkg/util/validation/limits.go

Co-authored-by: Nick Pillitteri <[email protected]>
# This is the commit message #4:

Rename store.max-total-query-length to query-frontend...

Signed-off-by: György Krajcsovits <[email protected]>
  • Loading branch information
krajorama committed Sep 29, 2022
1 parent 27b9be9 commit 494f956
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 18 deletions.
12 changes: 11 additions & 1 deletion cmd/mimir/config-descriptor.json
Original file line number Diff line number Diff line change
Expand Up @@ -3076,7 +3076,7 @@
"kind": "field",
"name": "max_query_length",
"required": false,
"desc": "Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and ruler. 0 to disable.",
"desc": "Limit the query time range (end - start time). This limit is enforced in the querier (on the query possibly split by the query-frontend) and ruler. 0 to disable.",
"fieldValue": null,
"fieldDefaultValue": 0,
"fieldFlag": "store.max-query-length",
Expand Down Expand Up @@ -3154,6 +3154,16 @@
"fieldType": "duration",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "max_total_query_length",
"required": false,
"desc": "Limit the total query time range (end - start time). This limit is enforced in the query-frontend on the received query. Defaults to the value of -store.max-query-length if set to 0.",
"fieldValue": null,
"fieldDefaultValue": 0,
"fieldFlag": "store.max-total-query-length",
"fieldType": "duration"
},
{
"kind": "field",
"name": "cardinality_analysis_enabled",
Expand Down
4 changes: 3 additions & 1 deletion cmd/mimir/help-all.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -1954,7 +1954,9 @@ Usage of ./cmd/mimir/mimir:
-store.max-labels-query-length duration
Limit the time range (end - start time) of series, label names and values queries. This limit is enforced in the querier. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.
-store.max-query-length duration
Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and ruler. 0 to disable.
Limit the query time range (end - start time). This limit is enforced in the querier (on the query possibly split by the query-frontend) and ruler. 0 to disable.
-store.max-total-query-length duration
Limit the total query time range (end - start time). This limit is enforced in the query-frontend on the received query. Defaults to the value of -store.max-query-length if set to 0.
-target comma-separated-list-of-strings
Comma-separated list of components to include in the instantiated process. The default value 'all' includes all components that are required to form a functional Grafana Mimir instance in single-binary mode. Use the '-modules' command line flag to get a list of available components, and to see which components are included with 'all'. (default all)
-tenant-federation.enabled
Expand Down
4 changes: 3 additions & 1 deletion cmd/mimir/help.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,9 @@ Usage of ./cmd/mimir/mimir:
-store.max-labels-query-length duration
Limit the time range (end - start time) of series, label names and values queries. This limit is enforced in the querier. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.
-store.max-query-length duration
Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and ruler. 0 to disable.
Limit the query time range (end - start time). This limit is enforced in the querier (on the query possibly split by the query-frontend) and ruler. 0 to disable.
-store.max-total-query-length duration
Limit the total query time range (end - start time). This limit is enforced in the query-frontend on the received query. Defaults to the value of -store.max-query-length if set to 0.
-target comma-separated-list-of-strings
Comma-separated list of components to include in the instantiated process. The default value 'all' includes all components that are required to form a functional Grafana Mimir instance in single-binary mode. Use the '-modules' command line flag to get a list of available components, and to see which components are included with 'all'. (default all)
-tenant-federation.enabled
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2478,8 +2478,8 @@ The `limits` block configures default and per-tenant limits imposed by component
[max_query_lookback: <duration> | default = 0s]

# Limit the query time range (end - start time). This limit is enforced in the
# query-frontend (on the received query), in the querier (on the query possibly
# split by the query-frontend) and ruler. 0 to disable.
# querier (on the query possibly split by the query-frontend) and ruler. 0 to
# disable.
# CLI flag: -store.max-query-length
[max_query_length: <duration> | default = 0s]

Expand Down Expand Up @@ -2530,6 +2530,12 @@ The `limits` block configures default and per-tenant limits imposed by component
# CLI flag: -query-frontend.split-instant-queries-by-interval
[split_instant_queries_by_interval: <duration> | default = 0s]

# Limit the total query time range (end - start time). This limit is enforced in
# the query-frontend on the received query. Defaults to the value of
# -store.max-query-length if set to 0.
# CLI flag: -store.max-total-query-length
[max_total_query_length: <duration> | default = 0s]

# Enables endpoints used for cardinality analysis.
# CLI flag: -querier.cardinality-analysis-enabled
[cardinality_analysis_enabled: <boolean> | default = false]
Expand Down
6 changes: 3 additions & 3 deletions pkg/frontend/querymiddleware/limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type Limits interface {
MaxQueryLookback(userID string) time.Duration

// MaxQueryLength returns the limit of the length (in time) of a query.
MaxQueryLength(userID string) time.Duration
MaxTotalQueryLength(userID string) time.Duration

// MaxQueryParallelism returns the limit to the number of split queries the
// frontend will process in parallel.
Expand Down Expand Up @@ -112,10 +112,10 @@ func (l limitsMiddleware) Do(ctx context.Context, r Request) (Response, error) {
}

// Enforce the max query length.
if maxQueryLength := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, l.MaxQueryLength); maxQueryLength > 0 {
if maxQueryLength := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, l.MaxTotalQueryLength); maxQueryLength > 0 {
queryLen := timestamp.Time(r.GetEnd()).Sub(timestamp.Time(r.GetStart()))
if queryLen > maxQueryLength {
return nil, apierror.New(apierror.TypeBadData, validation.NewMaxQueryLengthError(queryLen, maxQueryLength).Error())
return nil, apierror.New(apierror.TypeBadData, validation.NewMaxTotalQueryLengthError(queryLen, maxQueryLength).Error())
}
}

Expand Down
29 changes: 22 additions & 7 deletions pkg/frontend/querymiddleware/limits_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,11 @@ func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
now := time.Now()

tests := map[string]struct {
maxQueryLength time.Duration
reqStartTime time.Time
reqEndTime time.Time
expectedErr string
maxQueryLength time.Duration
maxTotalQueryLength time.Duration
reqStartTime time.Time
reqEndTime time.Time
expectedErr string
}{
"should skip validation if max length is disabled": {
maxQueryLength: 0,
Expand All @@ -148,13 +149,19 @@ func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
maxQueryLength: thirtyDays,
reqStartTime: now.Add(-thirtyDays).Add(-100 * time.Hour),
reqEndTime: now,
expectedErr: "the query time range exceeds the limit",
expectedErr: "the total unsharded query time range exceeds the limit",
},
"should fail on a query on large time range over the limit, ending in the past": {
maxQueryLength: thirtyDays,
reqStartTime: now.Add(-4 * thirtyDays),
reqEndTime: now.Add(-2 * thirtyDays),
expectedErr: "the query time range exceeds the limit",
expectedErr: "the total unsharded query time range exceeds the limit",
},
"should succeed if total query length is higher than query length limit": {
maxQueryLength: thirtyDays,
maxTotalQueryLength: 8 * thirtyDays,
reqStartTime: now.Add(-4 * thirtyDays),
reqEndTime: now.Add(-2 * thirtyDays),
},
}

Expand All @@ -165,7 +172,7 @@ func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
End: util.TimeToMillis(testData.reqEndTime),
}

limits := mockLimits{maxQueryLength: testData.maxQueryLength}
limits := mockLimits{maxQueryLength: testData.maxQueryLength, maxTotalQueryLength: testData.maxTotalQueryLength}
middleware := newLimitsMiddleware(limits, log.NewNopLogger())

innerRes := newEmptyPrometheusResponse()
Expand Down Expand Up @@ -198,6 +205,7 @@ func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
type mockLimits struct {
maxQueryLookback time.Duration
maxQueryLength time.Duration
maxTotalQueryLength time.Duration
maxCacheFreshness time.Duration
maxQueryParallelism int
maxShardedQueries int
Expand All @@ -214,6 +222,13 @@ func (m mockLimits) MaxQueryLength(string) time.Duration {
return m.maxQueryLength
}

func (m mockLimits) MaxTotalQueryLength(string) time.Duration {
if m.maxTotalQueryLength == time.Duration(0) {
return m.maxQueryLength
}
return m.maxTotalQueryLength
}

func (m mockLimits) MaxQueryParallelism(string) int {
if m.maxQueryParallelism == 0 {
return 14 // Flag default.
Expand Down
1 change: 1 addition & 0 deletions pkg/util/globalerror/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ const (
MetricMetadataUnitTooLong ID = "unit-too-long"

MaxQueryLength ID = "max-query-length"
MaxTotalQueryLength ID = "max-total-query-length"
RequestRateLimited ID = "tenant-max-request-rate"
IngestionRateLimited ID = "tenant-max-ingestion-rate"
TooManyHAClusters ID = "tenant-too-many-ha-clusters"
Expand Down
8 changes: 7 additions & 1 deletion pkg/util/validation/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,16 @@ func newMetadataUnitTooLongError(metadata *mimirpb.MetricMetadata) ValidationErr

func NewMaxQueryLengthError(actualQueryLen, maxQueryLength time.Duration) LimitError {
return LimitError(globalerror.MaxQueryLength.MessageWithPerTenantLimitConfig(
fmt.Sprintf("the query time range exceeds the limit (query length: %s, limit: %s)", actualQueryLen, maxQueryLength),
fmt.Sprintf("the (sharded) query time range exceeds the limit (query length: %s, limit: %s)", actualQueryLen, maxQueryLength),
maxQueryLengthFlag))
}

func NewMaxTotalQueryLengthError(actualQueryLen, maxTotalQueryLength time.Duration) LimitError {
return LimitError(globalerror.MaxTotalQueryLength.MessageWithPerTenantLimitConfig(
fmt.Sprintf("the total unsharded query time range exceeds the limit (query length: %s, limit: %s)", actualQueryLen, maxTotalQueryLength),
maxTotalQueryLengthFlag))
}

func NewRequestRateLimitedError(limit float64, burst int) LimitError {
return LimitError(globalerror.RequestRateLimited.MessageWithPerTenantLimitConfig(
fmt.Sprintf("the request has been rejected because the tenant exceeded the request rate limit, set to %v requests/s across all distributors with a maximum allowed burst of %d", limit, burst),
Expand Down
7 changes: 6 additions & 1 deletion pkg/util/validation/errors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@ func TestNewMetadataUnitTooLongError(t *testing.T) {

func TestNewMaxQueryLengthError(t *testing.T) {
err := NewMaxQueryLengthError(time.Hour, time.Minute)
assert.Equal(t, "the query time range exceeds the limit (query length: 1h0m0s, limit: 1m0s) (err-mimir-max-query-length). To adjust the related per-tenant limit, configure -store.max-query-length, or contact your service administrator.", err.Error())
assert.Equal(t, "the (sharded) query time range exceeds the limit (query length: 1h0m0s, limit: 1m0s) (err-mimir-max-query-length). To adjust the related per-tenant limit, configure -store.max-query-length, or contact your service administrator.", err.Error())
}

func TestNewTotalMaxQueryLengthError(t *testing.T) {
err := NewMaxTotalQueryLengthError(time.Hour, time.Minute)
assert.Equal(t, "the total unsharded query time range exceeds the limit (query length: 1h0m0s, limit: 1m0s) (err-mimir-max-total-query-length). To adjust the related per-tenant limit, configure -store.max-total-query-length, or contact your service administrator.", err.Error())
}

func TestNewRequestRateLimitedError(t *testing.T) {
Expand Down
19 changes: 18 additions & 1 deletion pkg/util/validation/limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const (
maxMetadataLengthFlag = "validation.max-metadata-length"
creationGracePeriodFlag = "validation.create-grace-period"
maxQueryLengthFlag = "store.max-query-length"
maxTotalQueryLengthFlag = "store.max-total-query-length"
requestRateFlag = "distributor.request-rate-limit"
requestBurstSizeFlag = "distributor.request-burst-size"
ingestionRateFlag = "distributor.ingestion-rate-limit"
Expand Down Expand Up @@ -118,6 +119,10 @@ type Limits struct {
QueryShardingTotalShards int `yaml:"query_sharding_total_shards" json:"query_sharding_total_shards"`
QueryShardingMaxShardedQueries int `yaml:"query_sharding_max_sharded_queries" json:"query_sharding_max_sharded_queries"`
SplitInstantQueriesByInterval model.Duration `yaml:"split_instant_queries_by_interval" json:"split_instant_queries_by_interval" category:"experimental"`

// Query-frontend limits.
MaxTotalQueryLength model.Duration `yaml:"max_total_query_length,omitempty" json:"max_total_query_length,omitempty"`

// Cardinality
CardinalityAnalysisEnabled bool `yaml:"cardinality_analysis_enabled" json:"cardinality_analysis_enabled"`
LabelNamesAndValuesResultsMaxSizeBytes int `yaml:"label_names_and_values_results_max_size_bytes" json:"label_names_and_values_results_max_size_bytes"`
Expand Down Expand Up @@ -196,7 +201,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
f.IntVar(&l.MaxChunksPerQuery, MaxChunksPerQueryFlag, 2e6, "Maximum number of chunks that can be fetched in a single query from ingesters and long-term storage. This limit is enforced in the querier, ruler and store-gateway. 0 to disable.")
f.IntVar(&l.MaxFetchedSeriesPerQuery, MaxSeriesPerQueryFlag, 0, "The maximum number of unique series for which a query can fetch samples from each ingesters and storage. This limit is enforced in the querier and ruler. 0 to disable")
f.IntVar(&l.MaxFetchedChunkBytesPerQuery, MaxChunkBytesPerQueryFlag, 0, "The maximum size of all chunks in bytes that a query can fetch from each ingester and storage. This limit is enforced in the querier and ruler. 0 to disable.")
f.Var(&l.MaxQueryLength, maxQueryLengthFlag, "Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and ruler. 0 to disable.")
f.Var(&l.MaxQueryLength, maxQueryLengthFlag, "Limit the query time range (end - start time). This limit is enforced in the querier (on the query possibly split by the query-frontend) and ruler. 0 to disable.")
f.Var(&l.MaxQueryLookback, "querier.max-query-lookback", "Limit how long back data (series and metadata) can be queried, up until <lookback> duration ago. This limit is enforced in the query-frontend, querier and ruler. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.")
f.IntVar(&l.MaxQueryParallelism, "querier.max-query-parallelism", 14, "Maximum number of split (by time) or partial (by shard) queries that will be scheduled in parallel by the query-frontend for a single input query. This limit is introduced to have a fairer query scheduling and avoid a single query over a large time range saturating all available queriers.")
f.Var(&l.MaxLabelsQueryLength, "store.max-labels-query-length", "Limit the time range (end - start time) of series, label names and values queries. This limit is enforced in the querier. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.")
Expand All @@ -222,6 +227,9 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
f.Var(&l.CompactorPartialBlockDeletionDelay, "compactor.partial-block-deletion-delay", fmt.Sprintf("If a partial block (unfinished block without %s file) hasn't been modified for this time, it will be marked for deletion. The minimum accepted value is %s: a lower value will be ignored and the feature disabled. 0 to disable.", block.MetaFilename, MinCompactorPartialBlockDeletionDelay.String()))
f.BoolVar(&l.CompactorBlockUploadEnabled, "compactor.block-upload-enabled", false, "Enable block upload API for the tenant.")

// Query-frontend.
f.Var(&l.MaxTotalQueryLength, maxTotalQueryLengthFlag, fmt.Sprintf("Limit the total query time range (end - start time). This limit is enforced in the query-frontend on the received query. Defaults to the value of -%s if set to 0.", maxQueryLengthFlag))

// Store-gateway.
f.IntVar(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The tenant's shard size, used when store-gateway sharding is enabled. Value of 0 disables shuffle sharding for the tenant, that is all tenant blocks are sharded across all store-gateway replicas.")

Expand Down Expand Up @@ -458,6 +466,15 @@ func (o *Overrides) MaxQueryLength(userID string) time.Duration {
return time.Duration(o.getOverridesForUser(userID).MaxQueryLength)
}

// MaxTotalQueryLength returns the limit of the total length (in time) or a query.
func (o *Overrides) MaxTotalQueryLength(userID string) time.Duration {
t := time.Duration(o.getOverridesForUser(userID).MaxTotalQueryLength)
if t == time.Duration(0) {
return o.MaxQueryLength(userID)
}
return t
}

// MaxLabelsQueryLength returns the limit of the length (in time) of a label names or values request.
func (o *Overrides) MaxLabelsQueryLength(userID string) time.Duration {
return time.Duration(o.getOverridesForUser(userID).MaxLabelsQueryLength)
Expand Down
61 changes: 61 additions & 0 deletions pkg/util/validation/limits_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,67 @@ func TestSmallestPositiveNonZeroDurationPerTenant(t *testing.T) {
}
}

func TestMaxTotalQueryLengthWithoutDefault(t *testing.T) {
tenantLimits := map[string]*Limits{
"tenant-a": {
MaxQueryLength: model.Duration(time.Hour),
},
"tenant-b": {
MaxQueryLength: model.Duration(time.Hour),
MaxTotalQueryLength: model.Duration(4 * time.Hour),
},
}
defaults := Limits{
MaxQueryLength: model.Duration(2 * time.Hour),
}

ov, err := NewOverrides(defaults, newMockTenantLimits(tenantLimits))
require.NoError(t, err)

for _, tc := range []struct {
tenantIDs []string
expLimit time.Duration
}{
{tenantIDs: []string{}, expLimit: time.Duration(0)},
{tenantIDs: []string{"tenant-a"}, expLimit: time.Hour},
{tenantIDs: []string{"tenant-b"}, expLimit: 4 * time.Hour},
{tenantIDs: []string{"tenant-c"}, expLimit: 2 * time.Hour},
} {
assert.Equal(t, tc.expLimit, SmallestPositiveNonZeroDurationPerTenant(tc.tenantIDs, ov.MaxTotalQueryLength))
}
}

func TestMaxTotalQueryLengthWithDefault(t *testing.T) {
tenantLimits := map[string]*Limits{
"tenant-a": {
MaxQueryLength: model.Duration(time.Hour),
},
"tenant-b": {
MaxQueryLength: model.Duration(time.Hour),
MaxTotalQueryLength: model.Duration(4 * time.Hour),
},
}
defaults := Limits{
MaxQueryLength: model.Duration(2 * time.Hour),
MaxTotalQueryLength: model.Duration(3 * time.Hour),
}

ov, err := NewOverrides(defaults, newMockTenantLimits(tenantLimits))
require.NoError(t, err)

for _, tc := range []struct {
tenantIDs []string
expLimit time.Duration
}{
{tenantIDs: []string{}, expLimit: time.Duration(0)},
{tenantIDs: []string{"tenant-a"}, expLimit: time.Hour},
{tenantIDs: []string{"tenant-b"}, expLimit: 4 * time.Hour},
{tenantIDs: []string{"tenant-c"}, expLimit: 3 * time.Hour},
} {
assert.Equal(t, tc.expLimit, SmallestPositiveNonZeroDurationPerTenant(tc.tenantIDs, ov.MaxTotalQueryLength))
}
}

func TestAlertmanagerNotificationLimits(t *testing.T) {
for name, tc := range map[string]struct {
inputYAML string
Expand Down

0 comments on commit 494f956

Please sign in to comment.