Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

prog: avoid verifier loop of death #1693

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 80 additions & 42 deletions prog.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ const minVerifierLogSize = 64 * 1024
// future, but avoid the unnecessary EINVAL for now.
const maxVerifierLogSize = math.MaxUint32 >> 2

// maxVerifierAttempts is the maximum number of times the verifier will retry
// loading a program with a growing log buffer before giving up. Since we double
// the log size on every attempt, this is the absolute maximum number of
// attempts before the buffer reaches [maxVerifierLogSize].
const maxVerifierAttempts = 30

// ProgramOptions control loading a program into the kernel.
type ProgramOptions struct {
// Bitmap controlling the detail emitted by the kernel's eBPF verifier log.
Expand Down Expand Up @@ -409,59 +415,45 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er
}
}

// The caller requested a specific verifier log level. Set up the log buffer
// so that there is a chance of loading the program in a single shot.
logSize := internal.Between(opts.LogSizeStart, minVerifierLogSize, maxVerifierLogSize)
var logBuf []byte
if !opts.LogDisabled && opts.LogLevel != 0 {
logBuf = make([]byte, logSize)
attr.LogLevel = opts.LogLevel
attr.LogSize = uint32(len(logBuf))
attr.LogBuf = sys.NewSlicePointer(logBuf)
}
var fd *sys.FD

for {
var fd *sys.FD
if opts.LogDisabled {
// Loading with logging disabled should never retry.
fd, err = sys.ProgLoad(attr)
if err == nil {
return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil
}

if opts.LogDisabled {
break
return &Program{"", fd, spec.Name, "", spec.Type}, nil
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can return here, all of the other code only deals with the log being present. Then the else can go and we can unindent by one leve.


if attr.LogTrueSize != 0 && attr.LogSize >= attr.LogTrueSize {
// The log buffer already has the correct size.
break
} else {
// Only specify log size if log level is also specified. Setting size
// without level results in EINVAL. Level will be bumped to LogLevelBranch
// if the first load fails.
if opts.LogLevel != 0 {
attr.LogLevel = opts.LogLevel
attr.LogSize = internal.Between(opts.LogSizeStart, minVerifierLogSize, maxVerifierLogSize)
}

if attr.LogSize != 0 && !errors.Is(err, unix.ENOSPC) {
// Logging is enabled and the error is not ENOSPC, so we can infer
// that the log buffer is large enough.
break
}
attempts := 1
for {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit:

Suggested change
for {
for attempts := 0; attemps < maxVerifierAttempts ; attempts++ {

Putting the increment into the for loop has the benefit that it does the right thing when continuing, etc.

if attr.LogLevel != 0 {
logBuf = make([]byte, attr.LogSize)
attr.LogBuf = sys.NewSlicePointer(logBuf)
}

if attr.LogLevel == 0 {
// Logging is not enabled but loading the program failed. Enable
// basic logging.
attr.LogLevel = LogLevelBranch
}
fd, err = sys.ProgLoad(attr)
if err == nil {
return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil
}

// Make an educated guess how large the buffer should be by multiplying.
// Ensure the size doesn't overflow.
const factor = 2
logSize = internal.Between(logSize, minVerifierLogSize, maxVerifierLogSize/factor)
logSize *= factor
if !retryLogAttrs(attr, opts.LogSizeStart, err) {
break
}

if attr.LogTrueSize != 0 {
// The kernel has given us a hint how large the log buffer has to be.
logSize = attr.LogTrueSize
if attempts >= maxVerifierAttempts {
return nil, fmt.Errorf("load program: %w (bug: hit %d verifier attempts)", err, maxVerifierAttempts)
}
attempts++
}

logBuf = make([]byte, logSize)
attr.LogSize = logSize
attr.LogBuf = sys.NewSlicePointer(logBuf)
}

end := bytes.IndexByte(logBuf, 0)
Expand Down Expand Up @@ -511,6 +503,52 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er
return nil, internal.ErrorWithLog("load program", err, logBuf)
}

func retryLogAttrs(attr *sys.ProgLoadAttr, startSize uint32, err error) bool {
// ENOSPC means the log was enabled on the previous iteration, so we only
// need to grow the buffer.
if errors.Is(err, unix.ENOSPC) {
if attr.LogTrueSize != 0 {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't preserve the behaviour that we don't retry if LogSize >= LogTrueSize. I'm also not sure why this only happens on ENOSPC? LogTrueSize should always be populated?

// Kernel supports LogTrueSize and previous iteration undershot the buffer
// size. Try again with the given true size.
attr.LogSize = attr.LogTrueSize
return true
}

// ENOSPC means we've loaded the program before and the log buffer was too
// small.
if attr.LogSize == maxVerifierLogSize {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure you will ever reach this condition, because I don't think the kernel will return ENOSPC when using the max verifier log size. That said, it doesn't hurt to be defensive here.

// Maximum buffer size reached, don't grow or retry.
return false
}

// Ensure the size doesn't overflow.
const factor = 2
if attr.LogSize >= maxVerifierLogSize/factor {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this whole factor thing not very intuitive. This is to avoid overflow?

attr.LogSize = maxVerifierLogSize
return true
}

// Make an educated guess how large the buffer should be by multiplying. Due
// to int division, this rounds down odd sizes.
attr.LogSize = internal.Between(attr.LogSize, minVerifierLogSize, maxVerifierLogSize/factor)
attr.LogSize *= factor

return true
}

if attr.LogLevel == 0 {
// Loading the program failed, it wasn't a buffer-related error, and the log
// was disabled the previous iteration. Enable basic logging and retry.
attr.LogLevel = LogLevelBranch
attr.LogSize = internal.Between(startSize, minVerifierLogSize, maxVerifierLogSize)
return true
}

// Loading the program failed for a reason other than buffer size and the log
// was already enabled the previous iteration. Don't retry.
return false
}

// NewProgramFromFD creates a program from a raw fd.
//
// You should not use fd after calling this function.
Expand Down
75 changes: 75 additions & 0 deletions prog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,81 @@ func TestProgramVerifierLog(t *testing.T) {
prog.Close()
}

func TestProgramVerifierLogRetry(t *testing.T) {
someError := errors.New("not a buffer error")

t.Run("retry with oversized buffer", func(t *testing.T) {
// First load failure, without logging enabled. Retry with logging enabled.
attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0}
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError)))
qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch))
qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize))

// Second failure with logging enabled. No buffer error, don't retry.
qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, someError)))
qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch))
qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize))
})

t.Run("retry with undersized buffer, no log_true_size", func(t *testing.T) {
// First load failure, without logging enabled. Retry with larger buffer.
attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0}
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError)))

// Second failure, this time the kernel signals an undersized buffer. Retry
// with double the size.
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC)))
qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize*2))
})

t.Run("retry with undersized buffer, with log_true_size", func(t *testing.T) {
// First load failure, without logging enabled. Retry with larger buffer.
attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0}
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError)))

// Second failure, the kernel signals undersized buffer and also sets
// log_true_size. Retry with the exact size required.
attr.LogTrueSize = 123
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC)))
qt.Assert(t, qt.Equals(attr.LogSize, 123))
})

t.Run("grow to maximum buffer size", func(t *testing.T) {
// Previous loads pushed the log size to (or above) half of the maximum,
// which would make it overflow on the next retry. Make sure the log size
// actually hits the maximum so we can bail out.
attr := &sys.ProgLoadAttr{LogLevel: LogLevelBranch, LogSize: maxVerifierLogSize / 2}
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC)))
qt.Assert(t, qt.Equals(attr.LogSize, maxVerifierLogSize))

// Don't retry if the buffer is already at the maximum size.
qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, unix.ENOSPC)))
})

t.Run("start at maximum buffer size", func(t *testing.T) {
// The user requested a log buffer exceeding the maximum size, but no log
// level. Retry with the maximum size and default log level.
attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0}
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, math.MaxUint32, unix.EINVAL)))
qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch))
qt.Assert(t, qt.Equals(attr.LogSize, maxVerifierLogSize))

// Log still doesn't fit maximum-size buffer. Don't retry.
qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, unix.ENOSPC)))
})

t.Run("ensure growth terminates within max attempts", func(t *testing.T) {
attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0}
var terminated bool
for i := 1; i <= maxVerifierAttempts; i++ {
if !retryLogAttrs(attr, 0, syscall.ENOSPC) {
terminated = true
}
}
qt.Assert(t, qt.IsTrue(terminated))
})
}

func TestProgramWithUnsatisfiedMap(t *testing.T) {
coll, err := LoadCollectionSpec("testdata/loader-el.elf")
if err != nil {
Expand Down
Loading