Skip to content

Commit 89b1234

Browse files
committedNov 16, 2016
Fix deadlock on cancelling healthcheck
Signed-off-by: Tonis Tiigi <[email protected]>
1 parent 27c20a7 commit 89b1234

File tree

3 files changed

+12
-8
lines changed

3 files changed

+12
-8
lines changed
 

‎container/health.go

+1-4
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,7 @@ func (s *Health) OpenMonitorChannel() chan struct{} {
4242
func (s *Health) CloseMonitorChannel() {
4343
if s.stop != nil {
4444
logrus.Debug("CloseMonitorChannel: waiting for probe to stop")
45-
// This channel does not buffer. Once the write succeeds, the monitor
46-
// has read the stop request and will not make any further updates
47-
// to c.State.Health.
48-
s.stop <- struct{}{}
45+
close(s.stop)
4946
s.stop = nil
5047
logrus.Debug("CloseMonitorChannel done")
5148
}

‎daemon/health.go

+10-3
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,17 @@ func (p *cmdProbe) run(ctx context.Context, d *Daemon, container *container.Cont
107107
}
108108

109109
// Update the container's Status.Health struct based on the latest probe's result.
110-
func handleProbeResult(d *Daemon, c *container.Container, result *types.HealthcheckResult) {
110+
func handleProbeResult(d *Daemon, c *container.Container, result *types.HealthcheckResult, done chan struct{}) {
111111
c.Lock()
112112
defer c.Unlock()
113113

114+
// probe may have been cancelled while waiting on lock. Ignore result then
115+
select {
116+
case <-done:
117+
return
118+
default:
119+
}
120+
114121
retries := c.Config.Healthcheck.Retries
115122
if retries <= 0 {
116123
retries = defaultProbeRetries
@@ -183,7 +190,7 @@ func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe)
183190
cancelProbe()
184191
return
185192
case result := <-results:
186-
handleProbeResult(d, c, result)
193+
handleProbeResult(d, c, result, stop)
187194
// Stop timeout
188195
cancelProbe()
189196
case <-ctx.Done():
@@ -193,7 +200,7 @@ func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe)
193200
Output: fmt.Sprintf("Health check exceeded timeout (%v)", probeTimeout),
194201
Start: startTime,
195202
End: time.Now(),
196-
})
203+
}, stop)
197204
cancelProbe()
198205
// Wait for probe to exit (it might take a while to respond to the TERM
199206
// signal and we don't want dying probes to pile up).

‎daemon/health_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ func TestHealthStates(t *testing.T) {
8080
Start: startTime,
8181
End: startTime,
8282
ExitCode: exitCode,
83-
})
83+
}, nil)
8484
}
8585

8686
// starting -> failed -> success -> failed

0 commit comments

Comments
 (0)
Please sign in to comment.