diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index 76dbd48f677a..ffab9ee498ea 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -24,7 +24,10 @@ import ( "sort" "strconv" "strings" + "sync" + "syscall" "time" + "unsafe" "github.com/flosch/pongo2" "github.com/gorilla/websocket" @@ -134,6 +137,9 @@ const qemuSparseUSBPorts = 8 var errQemuAgentOffline = fmt.Errorf("LXD VM agent isn't currently running") +// Locks the vsock Context ID acquisition. +var vsockIDAcquistionLock = sync.Mutex{} + type monitorHook func(m *qmp.Monitor) error // qemuLoad creates a Qemu instance from the supplied InstanceArgs. @@ -1143,14 +1149,27 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error { revert.Add(func() { _ = d.unmount() }) - volatileSet := make(map[string]string) + // Define a set of files to open and pass their file descriptors to QEMU command. + fdFiles := make([]*os.File, 0) + + // Ensure passed files are closed after start has returned (either because QEMU has started or on error). + defer func() { + for _, file := range fdFiles { + _ = file.Close() + } + }() // New or existing vsock ID from volatile. - vsockID, err := d.nextVsockID() + vsockID, vsockF, err := d.nextVsockID() if err != nil { return err } + // Add allocated QEMU vhost file descriptor. + vsockFD := d.addFileDescriptor(&fdFiles, vsockF) + + volatileSet := make(map[string]string) + // Update vsock ID in volatile if needed for recovery (do this before UpdateBackupFile() call). oldVsockID := d.localConfig["volatile.vsock_id"] newVsockID := strconv.FormatUint(uint64(vsockID), 10) @@ -1353,16 +1372,6 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error { return err } - // Define a set of files to open and pass their file descriptors to qemu command. - fdFiles := make([]*os.File, 0) - - // Ensure passed files are closed after start has returned (either because qemu has started or on error). - defer func() { - for _, file := range fdFiles { - _ = file.Close() - } - }() - // Snapshot if needed. snapName, expiry, err := d.getStartupSnapNameAndExpiry(d) if err != nil { @@ -1409,7 +1418,7 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error { } // Generate the QEMU configuration. - confFile, monHooks, err := d.generateQemuConfigFile(cpuInfo, mountInfo, qemuBus, devConfs, &fdFiles) + confFile, monHooks, err := d.generateQemuConfigFile(cpuInfo, mountInfo, qemuBus, vsockFD, devConfs, &fdFiles) if err != nil { op.Done(err) return err @@ -2825,7 +2834,7 @@ func (d *qemu) deviceBootPriorities() (map[string]int, error) { // generateQemuConfigFile writes the qemu config file and returns its location. // It writes the config file inside the VM's log path. -func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePools.MountInfo, busName string, devConfs []*deviceConfig.RunConfig, fdFiles *[]*os.File) (string, []monitorHook, error) { +func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePools.MountInfo, busName string, vsockFD int, devConfs []*deviceConfig.RunConfig, fdFiles *[]*os.File) (string, []monitorHook, error) { var monHooks []monitorHook cfg := qemuBase(&qemuBaseOpts{d.Architecture()}) @@ -2953,6 +2962,7 @@ func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePo devAddr: devAddr, multifunction: multi, }, + vsockFD: vsockFD, vsockID: vsockID, } @@ -7450,74 +7460,100 @@ func (d *qemu) getVsockID() (uint32, error) { return uint32(vsockID), nil } -// freeVsockID returns true if the given vsockID is not yet acquired. -func (d *qemu) freeVsockID(vsockID uint32) bool { - c, err := lxdvsock.Dial(vsockID, shared.HTTPSDefaultPort) +// acquireVsockID tries to occupy the given vsock Context ID. +// If the ID is free it returns the corresponding file handle. +func (d *qemu) acquireVsockID(vsockID uint32) (*os.File, error) { + vsockIDAcquistionLock.Lock() + defer vsockIDAcquistionLock.Unlock() + + revert := revert.New() + defer revert.Fail() + + f, err := os.OpenFile("/dev/vhost-vsock", syscall.O_RDWR, 0) if err != nil { - var unixErrno unix.Errno + return nil, fmt.Errorf("Failed to open vhost socket: %w", err) + } - if !errors.As(err, &unixErrno) { - return false - } + revert.Add(func() { _ = f.Close() }) - if unixErrno != unix.ENODEV { - // Skip the vsockID if another syscall error was encountered. - return false + // 0x4008AF60 = VHOST_VSOCK_SET_GUEST_CID = _IOW(VHOST_VIRTIO, 0x60, __u64) + _, _, errno := unix.Syscall(unix.SYS_IOCTL, f.Fd(), 0x4008AF60, uintptr(unsafe.Pointer(&vsockID))) + if errno != 0 { + if !errors.Is(errno, unix.EADDRINUSE) { + return nil, fmt.Errorf("Failed ioctl syscall to vhost socket: %q", errno.Error()) } - // The syscall to the vsock device returned "no such device". - // This means the address (Context ID) is free. - return true + // vsock Context ID is already in use. + return nil, nil } - // Address is already in use. - c.Close() - return false + revert.Success() + return f, nil } -// nextVsockID returns the next free vsock Context ID for the VM. -// It tries to acquire one randomly until the timeout exceeds. -func (d *qemu) nextVsockID() (uint32, error) { +// acquireExistingVsockID tries to acquire an already existing vsock Context ID from volatile. +// It returns both the acquired ID and opened vsock file handle for QEMU. +func (d *qemu) acquireExistingVsockID() (uint32, *os.File, error) { + vsockID, err := d.getVsockID() + if err != nil { + return 0, nil, err + } + + // Check if the vsockID from last VM start is still not acquired in case the VM was stopped. + f, err := d.acquireVsockID(vsockID) + if err != nil { + return 0, nil, err + } + + return vsockID, f, nil +} + +// nextVsockID tries to acquire the next free vsock Context ID for the VM. +// It returns both the acquired ID and opened vsock file handle for QEMU. +func (d *qemu) nextVsockID() (uint32, *os.File, error) { // Check if vsock ID from last VM start is present in volatile, then use that. // This allows a running VM to be recovered after DB record deletion and that an agent connection still works // after the VM's instance ID has changed. // Continue in case of error since the caller requires a valid vsockID in any case. - vsockID, err := d.getVsockID() - if err == nil { - // Check if the vsock ID from last VM start is still not acquired in case the VM was stopped. - if d.freeVsockID(vsockID) { - return vsockID, nil - } + vsockID, vsockF, _ := d.acquireExistingVsockID() + if vsockID != 0 && vsockF != nil { + return vsockID, vsockF, nil } + // Ignore the error from before and start to acquire a new Context ID. instanceUUID := uuid.Parse(d.localConfig["volatile.uuid"]) if instanceUUID == nil { - return 0, fmt.Errorf("Failed to parse instance UUID from volatile.uuid") + return 0, nil, fmt.Errorf("Failed to parse instance UUID from volatile.uuid") } r, err := util.GetStableRandomGenerator(instanceUUID.String()) if err != nil { - return 0, fmt.Errorf("Failed generating stable random seed from instance UUID %q: %w", instanceUUID, err) + return 0, nil, fmt.Errorf("Failed generating stable random seed from instance UUID %q: %w", instanceUUID, err) } - timeout := 5 * time.Second + timeout := time.Now().Add(5 * time.Second) // Try to find a new Context ID. - for start := time.Now(); time.Since(start) <= timeout; { + for { + if time.Now().After(timeout) { + return 0, nil, fmt.Errorf("Timeout exceeded whilst trying to acquire the next vsock Context ID") + } + candidateVsockID := r.Uint32() if d.reservedVsockID(candidateVsockID) { continue } - if d.freeVsockID(candidateVsockID) { - return candidateVsockID, nil + vsockF, err := d.acquireVsockID(candidateVsockID) + if err != nil { + return 0, nil, err } - continue + if vsockF != nil { + return candidateVsockID, vsockF, nil + } } - - return 0, fmt.Errorf("Timeout exceeded whilst trying to acquire the next vsock Context ID") } // InitPID returns the instance's current process ID. diff --git a/lxd/instance/drivers/driver_qemu_config_test.go b/lxd/instance/drivers/driver_qemu_config_test.go index a50369722a3a..dcacfb6934ea 100644 --- a/lxd/instance/drivers/driver_qemu_config_test.go +++ b/lxd/instance/drivers/driver_qemu_config_test.go @@ -281,7 +281,7 @@ func TestQemuConfigTemplates(t *testing.T) { opts qemuVsockOpts expected string }{{ - qemuVsockOpts{qemuDevOpts{"pcie", "qemu_pcie0", "00.4", true}, 14}, + qemuVsockOpts{qemuDevOpts{"pcie", "qemu_pcie0", "00.4", true}, 4, 14}, `# Vsock [device "qemu_vsock"] driver = "vhost-vsock-pci" @@ -289,13 +289,15 @@ func TestQemuConfigTemplates(t *testing.T) { addr = "00.4" multifunction = "on" guest-cid = "14" + vhostfd = "4" `, }, { - qemuVsockOpts{qemuDevOpts{"ccw", "qemu_pcie0", "00.4", false}, 3}, + qemuVsockOpts{qemuDevOpts{"ccw", "qemu_pcie0", "00.4", false}, 4, 3}, `# Vsock [device "qemu_vsock"] driver = "vhost-vsock-ccw" guest-cid = "3" + vhostfd = "4" `, }} for _, tc := range testCases { diff --git a/lxd/instance/drivers/driver_qemu_templates.go b/lxd/instance/drivers/driver_qemu_templates.go index b559d450a1b9..adff4cf23ac4 100644 --- a/lxd/instance/drivers/driver_qemu_templates.go +++ b/lxd/instance/drivers/driver_qemu_templates.go @@ -326,6 +326,7 @@ func qemuSEV(opts *qemuSevOpts) []cfgSection { type qemuVsockOpts struct { dev qemuDevOpts + vsockFD int vsockID uint32 } @@ -340,7 +341,8 @@ func qemuVsock(opts *qemuVsockOpts) []cfgSection { name: `device "qemu_vsock"`, comment: "Vsock", entries: append(qemuDeviceEntries(&entriesOpts), - cfgEntry{key: "guest-cid", value: fmt.Sprintf("%d", opts.vsockID)}), + cfgEntry{key: "guest-cid", value: fmt.Sprintf("%d", opts.vsockID)}, + cfgEntry{key: "vhostfd", value: fmt.Sprintf("%d", opts.vsockFD)}), }} }