diff --git a/core/controlcommands/commandqueue.go b/core/controlcommands/commandqueue.go index d14933ff..0c769dec 100644 --- a/core/controlcommands/commandqueue.go +++ b/core/controlcommands/commandqueue.go @@ -101,7 +101,7 @@ func (m *CommandQueue) Start() { } if err == nil && response == nil { log.WithField("partition", entry.cmd.GetEnvironmentId().String()). - Error("nil response") + Errorf("did not receive neither response nor error for %s", entry.cmd.GetName()) } entry.callback <- response @@ -198,6 +198,11 @@ func (m *CommandQueue) commit(command MesosCommand) (response MesosCommandRespon // Wait for goroutines to finish for i := 0; i < len(command.targets()); i++ { respSemaphore := <-semaphore + // for the sake of better error propagation, we treat a lack of response as a response with error, + // even though it's not technically the same. it can be surely done better, but it would require a larger refactoring. + if respSemaphore.err != nil && respSemaphore.response == nil { + respSemaphore.response = NewMesosCommandResponse(command, respSemaphore.err) + } responses[respSemaphore.receiver] = respSemaphore.response if respSemaphore.err != nil { sendErrorList = append(sendErrorList, respSemaphore.err) @@ -215,12 +220,11 @@ func (m *CommandQueue) commit(command MesosCommand) (response MesosCommandRespon } return }(), "\n")) - return } response = consolidateResponses(command, responses) log.WithField("partition", command.GetEnvironmentId().String()). Debug("responses consolidated, CommandQueue commit done") - return response, nil + return response, err } diff --git a/core/controlcommands/mesoscommandservent.go b/core/controlcommands/mesoscommandservent.go index 52708043..2185eea2 100644 --- a/core/controlcommands/mesoscommandservent.go +++ b/core/controlcommands/mesoscommandservent.go @@ -144,7 +144,7 @@ func (s *Servent) RunCommand(cmd MesosCommand, receiver MesosCommandTarget) (Mes // By the time we get here, ProcessResponse should have already added a Response to the // pending call, and removed it from servent.pending. case <-time.After(cmd.GetResponseTimeout()): - call.Error = fmt.Errorf("MesosCommand %s timed out for task %s", cmd.GetName(), receiver.TaskId.Value) + call.Error = fmt.Errorf("%s timed out for task %s", cmd.GetName(), receiver.TaskId.Value) log.WithPrefix("servent"). WithField("partition", cmd.GetEnvironmentId().String()). diff --git a/core/task/manager.go b/core/task/manager.go index d04964f2..dcff998b 100644 --- a/core/task/manager.go +++ b/core/task/manager.go @@ -754,7 +754,7 @@ func (m *Manager) configureTasks(envId uid.ID, tasks Tasks) error { close(notify) if response == nil { - return errors.New("nil response") + return fmt.Errorf("no response from Mesos to CONFIGURE transition request within %ds timeout", int(cmd.ResponseTimeout.Seconds())) } if response.IsMultiResponse() { @@ -765,14 +765,7 @@ func (m *Manager) configureTasks(envId uid.ID, tasks Tasks) error { task := m.GetTask(k.TaskId.Value) var taskDescription string if task != nil { - tci := task.GetTaskCommandInfo() - tciValue := "unknown command" - if tci.Value != nil { - tciValue = *tci.Value - } - - taskDescription = fmt.Sprintf("task '%s' on %s (id %s, name %s) failed with error: %s", tciValue, task.GetHostname(), task.GetTaskId(), task.GetName(), v.Error()) - + taskDescription = fmt.Sprintf("task '%s' on %s (id %s) failed with error: %s", task.GetParent().GetName(), task.GetHostname(), task.GetTaskId(), v.Error()) } else { taskDescription = fmt.Sprintf("unknown task (id %s) failed with error: %s", k.TaskId.Value, v.Error()) } diff --git a/core/task/task.go b/core/task/task.go index 89032ca2..825dcfc6 100644 --- a/core/task/task.go +++ b/core/task/task.go @@ -76,6 +76,7 @@ type parentRole interface { ConsolidatedVarStack() (varStack map[string]string, err error) CollectInboundChannels() []channel.Inbound SendEvent(event.Event) + GetName() string } type Traits struct {