Fix deadlock on cancelling healthcheck

tonistiigi · tonistiigi · commit 89b123473774 · 2016-11-15T20:10:16.000-08:00
Signed-off-by: Tonis Tiigi &lt;tonistiigi@gmail.com&gt;
diff --git a/container/health.go b/container/health.go
@@ -42,10 +42,7 @@ func (s *Health) OpenMonitorChannel() chan struct{} {
 func (s *Health) CloseMonitorChannel() {
 	if s.stop != nil {
 		logrus.Debug("CloseMonitorChannel: waiting for probe to stop")
-		// This channel does not buffer. Once the write succeeds, the monitor
-		// has read the stop request and will not make any further updates
-		// to c.State.Health.
-		s.stop <- struct{}{}
+		close(s.stop)
 		s.stop = nil
 		logrus.Debug("CloseMonitorChannel done")
 	}
diff --git a/daemon/health.go b/daemon/health.go
@@ -107,10 +107,17 @@ func (p *cmdProbe) run(ctx context.Context, d *Daemon, container *container.Cont
 }
 
 // Update the container's Status.Health struct based on the latest probe's result.
-func handleProbeResult(d *Daemon, c *container.Container, result *types.HealthcheckResult) {
+func handleProbeResult(d *Daemon, c *container.Container, result *types.HealthcheckResult, done chan struct{}) {
 	c.Lock()
 	defer c.Unlock()
 
+	// probe may have been cancelled while waiting on lock. Ignore result then
+	select {
+	case <-done:
+		return
+	default:
+	}
+
 	retries := c.Config.Healthcheck.Retries
 	if retries <= 0 {
 		retries = defaultProbeRetries
@@ -183,7 +190,7 @@ func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe)
 				cancelProbe()
 				return
 			case result := <-results:
-				handleProbeResult(d, c, result)
+				handleProbeResult(d, c, result, stop)
 				// Stop timeout
 				cancelProbe()
 			case <-ctx.Done():
@@ -193,7 +200,7 @@ func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe)
 					Output:   fmt.Sprintf("Health check exceeded timeout (%v)", probeTimeout),
 					Start:    startTime,
 					End:      time.Now(),
-				})
+				}, stop)
 				cancelProbe()
 				// Wait for probe to exit (it might take a while to respond to the TERM
 				// signal and we don't want dying probes to pile up).
diff --git a/daemon/health_test.go b/daemon/health_test.go
@@ -80,7 +80,7 @@ func TestHealthStates(t *testing.T) {
 			Start:    startTime,
 			End:      startTime,
 			ExitCode: exitCode,
-		})
+		}, nil)
 	}
 
 	// starting -> failed -> success -> failed

Original file line number	Diff line number	Diff line change
`@@ -80,7 +80,7 @@ func TestHealthStates(t *testing.T) {`
`80`	`80`	`Start: startTime,`
`81`	`81`	`End: startTime,`
`82`	`82`	`ExitCode: exitCode,`
`83`		`- })`
	`83`	`+ }, nil)`
`84`	`84`	`}`
`85`	`85`
`86`	`86`	`// starting -> failed -> success -> failed`