Skip to content

Commit

Permalink
lxd/instance/drivers/qemu: Occupy vsock Context ID through syscall
Browse files Browse the repository at this point in the history
When trying to acquire a new vsock Context ID, we want to occupy it right away to prevent collisions.
This helps for scenarios where two VMs have the same identical volatile.vsock_id (VM export/import) and
in case LXD is used in a nested environment.

If either the acquisition or the syscall fails, the next one gets selected.

Signed-off-by: Julian Pelizäus <[email protected]>
  • Loading branch information
roosterfish committed Jul 13, 2023
1 parent 7a41e66 commit 5b7a53b
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 51 deletions.
132 changes: 84 additions & 48 deletions lxd/instance/drivers/driver_qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ import (
"sort"
"strconv"
"strings"
"sync"
"syscall"
"time"
"unsafe"

"github.com/flosch/pongo2"
"github.com/gorilla/websocket"
Expand Down Expand Up @@ -134,6 +137,9 @@ const qemuSparseUSBPorts = 8

var errQemuAgentOffline = fmt.Errorf("LXD VM agent isn't currently running")

// Locks the vsock Context ID acquisition.
var vsockIDAcquistionLock = sync.Mutex{}

type monitorHook func(m *qmp.Monitor) error

// qemuLoad creates a Qemu instance from the supplied InstanceArgs.
Expand Down Expand Up @@ -1143,14 +1149,27 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error {

revert.Add(func() { _ = d.unmount() })

volatileSet := make(map[string]string)
// Define a set of files to open and pass their file descriptors to QEMU command.
fdFiles := make([]*os.File, 0)

// Ensure passed files are closed after start has returned (either because QEMU has started or on error).
defer func() {
for _, file := range fdFiles {
_ = file.Close()
}
}()

// New or existing vsock ID from volatile.
vsockID, err := d.nextVsockID()
vsockID, vsockF, err := d.nextVsockID()
if err != nil {
return err
}

// Add allocated QEMU vhost file descriptor.
vsockFD := d.addFileDescriptor(&fdFiles, vsockF)

volatileSet := make(map[string]string)

// Update vsock ID in volatile if needed for recovery (do this before UpdateBackupFile() call).
oldVsockID := d.localConfig["volatile.vsock_id"]
newVsockID := strconv.FormatUint(uint64(vsockID), 10)
Expand Down Expand Up @@ -1353,16 +1372,6 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error {
return err
}

// Define a set of files to open and pass their file descriptors to qemu command.
fdFiles := make([]*os.File, 0)

// Ensure passed files are closed after start has returned (either because qemu has started or on error).
defer func() {
for _, file := range fdFiles {
_ = file.Close()
}
}()

// Snapshot if needed.
snapName, expiry, err := d.getStartupSnapNameAndExpiry(d)
if err != nil {
Expand Down Expand Up @@ -1409,7 +1418,7 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error {
}

// Generate the QEMU configuration.
confFile, monHooks, err := d.generateQemuConfigFile(cpuInfo, mountInfo, qemuBus, devConfs, &fdFiles)
confFile, monHooks, err := d.generateQemuConfigFile(cpuInfo, mountInfo, qemuBus, vsockFD, devConfs, &fdFiles)
if err != nil {
op.Done(err)
return err
Expand Down Expand Up @@ -2825,7 +2834,7 @@ func (d *qemu) deviceBootPriorities() (map[string]int, error) {

// generateQemuConfigFile writes the qemu config file and returns its location.
// It writes the config file inside the VM's log path.
func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePools.MountInfo, busName string, devConfs []*deviceConfig.RunConfig, fdFiles *[]*os.File) (string, []monitorHook, error) {
func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePools.MountInfo, busName string, vsockFD int, devConfs []*deviceConfig.RunConfig, fdFiles *[]*os.File) (string, []monitorHook, error) {
var monHooks []monitorHook

cfg := qemuBase(&qemuBaseOpts{d.Architecture()})
Expand Down Expand Up @@ -2953,6 +2962,7 @@ func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePo
devAddr: devAddr,
multifunction: multi,
},
vsockFD: vsockFD,
vsockID: vsockID,
}

Expand Down Expand Up @@ -7450,74 +7460,100 @@ func (d *qemu) getVsockID() (uint32, error) {
return uint32(vsockID), nil
}

// freeVsockID returns true if the given vsockID is not yet acquired.
func (d *qemu) freeVsockID(vsockID uint32) bool {
c, err := lxdvsock.Dial(vsockID, shared.HTTPSDefaultPort)
// acquireVsockID tries to occupy the given vsock Context ID.
// If the ID is free it returns the corresponding file handle.
func (d *qemu) acquireVsockID(vsockID uint32) (*os.File, error) {
vsockIDAcquistionLock.Lock()
defer vsockIDAcquistionLock.Unlock()

revert := revert.New()
defer revert.Fail()

f, err := os.OpenFile("/dev/vhost-vsock", syscall.O_RDWR, 0)
if err != nil {
var unixErrno unix.Errno
return nil, fmt.Errorf("Failed to open vhost socket: %w", err)
}

if !errors.As(err, &unixErrno) {
return false
}
revert.Add(func() { _ = f.Close() })

if unixErrno != unix.ENODEV {
// Skip the vsockID if another syscall error was encountered.
return false
// 0x4008AF60 = VHOST_VSOCK_SET_GUEST_CID = _IOW(VHOST_VIRTIO, 0x60, __u64)
_, _, errno := unix.Syscall(unix.SYS_IOCTL, f.Fd(), 0x4008AF60, uintptr(unsafe.Pointer(&vsockID)))
if errno != 0 {
if !errors.Is(errno, unix.EADDRINUSE) {
return nil, fmt.Errorf("Failed ioctl syscall to vhost socket: %q", errno.Error())
}

// The syscall to the vsock device returned "no such device".
// This means the address (Context ID) is free.
return true
// vsock Context ID is already in use.
return nil, nil
}

// Address is already in use.
c.Close()
return false
revert.Success()
return f, nil
}

// nextVsockID returns the next free vsock Context ID for the VM.
// It tries to acquire one randomly until the timeout exceeds.
func (d *qemu) nextVsockID() (uint32, error) {
// acquireExistingVsockID tries to acquire an already existing vsock Context ID from volatile.
// It returns both the acquired ID and opened vsock file handle for QEMU.
func (d *qemu) acquireExistingVsockID() (uint32, *os.File, error) {
vsockID, err := d.getVsockID()
if err != nil {
return 0, nil, err
}

// Check if the vsockID from last VM start is still not acquired in case the VM was stopped.
f, err := d.acquireVsockID(vsockID)
if err != nil {
return 0, nil, err
}

return vsockID, f, nil
}

// nextVsockID tries to acquire the next free vsock Context ID for the VM.
// It returns both the acquired ID and opened vsock file handle for QEMU.
func (d *qemu) nextVsockID() (uint32, *os.File, error) {
// Check if vsock ID from last VM start is present in volatile, then use that.
// This allows a running VM to be recovered after DB record deletion and that an agent connection still works
// after the VM's instance ID has changed.
// Continue in case of error since the caller requires a valid vsockID in any case.
vsockID, err := d.getVsockID()
if err == nil {
// Check if the vsock ID from last VM start is still not acquired in case the VM was stopped.
if d.freeVsockID(vsockID) {
return vsockID, nil
}
vsockID, vsockF, _ := d.acquireExistingVsockID()
if vsockID != 0 && vsockF != nil {
return vsockID, vsockF, nil
}

// Ignore the error from before and start to acquire a new Context ID.
instanceUUID := uuid.Parse(d.localConfig["volatile.uuid"])
if instanceUUID == nil {
return 0, fmt.Errorf("Failed to parse instance UUID from volatile.uuid")
return 0, nil, fmt.Errorf("Failed to parse instance UUID from volatile.uuid")
}

r, err := util.GetStableRandomGenerator(instanceUUID.String())
if err != nil {
return 0, fmt.Errorf("Failed generating stable random seed from instance UUID %q: %w", instanceUUID, err)
return 0, nil, fmt.Errorf("Failed generating stable random seed from instance UUID %q: %w", instanceUUID, err)
}

timeout := 5 * time.Second
timeout := time.Now().Add(5 * time.Second)

// Try to find a new Context ID.
for start := time.Now(); time.Since(start) <= timeout; {
for {
if time.Now().After(timeout) {
return 0, nil, fmt.Errorf("Timeout exceeded whilst trying to acquire the next vsock Context ID")
}

candidateVsockID := r.Uint32()

if d.reservedVsockID(candidateVsockID) {
continue
}

if d.freeVsockID(candidateVsockID) {
return candidateVsockID, nil
vsockF, err := d.acquireVsockID(candidateVsockID)
if err != nil {
return 0, nil, err
}

continue
if vsockF != nil {
return candidateVsockID, vsockF, nil
}
}

return 0, fmt.Errorf("Timeout exceeded whilst trying to acquire the next vsock Context ID")
}

// InitPID returns the instance's current process ID.
Expand Down
6 changes: 4 additions & 2 deletions lxd/instance/drivers/driver_qemu_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,21 +281,23 @@ func TestQemuConfigTemplates(t *testing.T) {
opts qemuVsockOpts
expected string
}{{
qemuVsockOpts{qemuDevOpts{"pcie", "qemu_pcie0", "00.4", true}, 14},
qemuVsockOpts{qemuDevOpts{"pcie", "qemu_pcie0", "00.4", true}, 4, 14},
`# Vsock
[device "qemu_vsock"]
driver = "vhost-vsock-pci"
bus = "qemu_pcie0"
addr = "00.4"
multifunction = "on"
guest-cid = "14"
vhostfd = "4"
`,
}, {
qemuVsockOpts{qemuDevOpts{"ccw", "qemu_pcie0", "00.4", false}, 3},
qemuVsockOpts{qemuDevOpts{"ccw", "qemu_pcie0", "00.4", false}, 4, 3},
`# Vsock
[device "qemu_vsock"]
driver = "vhost-vsock-ccw"
guest-cid = "3"
vhostfd = "4"
`,
}}
for _, tc := range testCases {
Expand Down
4 changes: 3 additions & 1 deletion lxd/instance/drivers/driver_qemu_templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ func qemuSEV(opts *qemuSevOpts) []cfgSection {

type qemuVsockOpts struct {
dev qemuDevOpts
vsockFD int
vsockID uint32
}

Expand All @@ -340,7 +341,8 @@ func qemuVsock(opts *qemuVsockOpts) []cfgSection {
name: `device "qemu_vsock"`,
comment: "Vsock",
entries: append(qemuDeviceEntries(&entriesOpts),
cfgEntry{key: "guest-cid", value: fmt.Sprintf("%d", opts.vsockID)}),
cfgEntry{key: "guest-cid", value: fmt.Sprintf("%d", opts.vsockID)},
cfgEntry{key: "vhostfd", value: fmt.Sprintf("%d", opts.vsockFD)}),
}}
}

Expand Down

0 comments on commit 5b7a53b

Please sign in to comment.