Skip to content

Commit

Permalink
lxd/instance/drivers/qemu: Lock the vsock ID selection
Browse files Browse the repository at this point in the history
If multiple VMs are started (e.g. parent and imported clone) using the same vsock ID
a race condition might occur in which the already cleared vsock ID isn't bound yet to the socket.
This leads to the next VM picking the exact same one since it doesn't seem to be acquired.

Signed-off-by: Julian Pelizäus <[email protected]>
  • Loading branch information
roosterfish committed Jul 11, 2023
1 parent 6dc6fbf commit d5401c3
Showing 1 changed file with 24 additions and 0 deletions.
24 changes: 24 additions & 0 deletions lxd/instance/drivers/driver_qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"sort"
"strconv"
"strings"
"sync"
"time"

"github.com/flosch/pongo2"
Expand Down Expand Up @@ -134,6 +135,8 @@ const qemuSparseUSBPorts = 8

var errQemuAgentOffline = fmt.Errorf("LXD VM agent isn't currently running")

var nextVsockIDMutex = sync.Mutex{}

type monitorHook func(m *qmp.Monitor) error

// qemuLoad creates a Qemu instance from the supplied InstanceArgs.
Expand Down Expand Up @@ -7477,6 +7480,8 @@ func (d *qemu) freeVsockID(vsockID uint32) bool {

// nextVsockID returns the next free vsock Context ID for the VM.
// It tries to acquire one randomly until the timeout exceeds.
// Concurrent requests from different routines will wait on each other
// if a new vsock ID needs to be selected.
func (d *qemu) nextVsockID() (uint32, error) {
// Check if vsock ID from last VM start is present in volatile, then use that.
// This allows a running VM to be recovered after DB record deletion and that an agent connection still works
Expand All @@ -7502,6 +7507,23 @@ func (d *qemu) nextVsockID() (uint32, error) {

timeout := 5 * time.Second

waitForVsockID := func(ctx context.Context, vsockID uint32) {
defer nextVsockIDMutex.Unlock()
ticker := time.NewTicker(50 * time.Millisecond)
defer ticker.Stop()

for {
select {
case <-ctx.Done():
return
case <-ticker.C:
if !d.freeVsockID(vsockID) {
return
}
}
}
}

// Try to find a new Context ID.
for start := time.Now(); time.Since(start) <= timeout; {
candidateVsockID := r.Uint32()
Expand All @@ -7511,6 +7533,8 @@ func (d *qemu) nextVsockID() (uint32, error) {
}

if d.freeVsockID(candidateVsockID) {
nextVsockIDMutex.Lock()
go waitForVsockID(d.state.ShutdownCtx, candidateVsockID)
return candidateVsockID, nil
}

Expand Down

0 comments on commit d5401c3

Please sign in to comment.