Skip to content

Commit

Permalink
Merge pull request #11993 from roosterfish/fix_vsock_startup_race
Browse files Browse the repository at this point in the history
QEMU: Occupy vsock Context ID through syscall
  • Loading branch information
tomponline authored Jul 13, 2023
2 parents f1bd9bb + 293f36f commit 1d792a6
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 61 deletions.
147 changes: 89 additions & 58 deletions lxd/instance/drivers/driver_qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"strconv"
"strings"
"time"
"unsafe"

"github.com/flosch/pongo2"
"github.com/gorilla/websocket"
Expand Down Expand Up @@ -1143,14 +1144,27 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error {

revert.Add(func() { _ = d.unmount() })

volatileSet := make(map[string]string)
// Define a set of files to open and pass their file descriptors to QEMU command.
fdFiles := make([]*os.File, 0)

// Ensure passed files are closed after start has returned (either because QEMU has started or on error).
defer func() {
for _, file := range fdFiles {
_ = file.Close()
}
}()

// New or existing vsock ID from volatile.
vsockID, err := d.nextVsockID()
vsockID, vsockF, err := d.nextVsockID()
if err != nil {
return err
}

// Add allocated QEMU vhost file descriptor.
vsockFD := d.addFileDescriptor(&fdFiles, vsockF)

volatileSet := make(map[string]string)

// Update vsock ID in volatile if needed for recovery (do this before UpdateBackupFile() call).
oldVsockID := d.localConfig["volatile.vsock_id"]
newVsockID := strconv.FormatUint(uint64(vsockID), 10)
Expand Down Expand Up @@ -1353,16 +1367,6 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error {
return err
}

// Define a set of files to open and pass their file descriptors to qemu command.
fdFiles := make([]*os.File, 0)

// Ensure passed files are closed after start has returned (either because qemu has started or on error).
defer func() {
for _, file := range fdFiles {
_ = file.Close()
}
}()

// Snapshot if needed.
snapName, expiry, err := d.getStartupSnapNameAndExpiry(d)
if err != nil {
Expand Down Expand Up @@ -1409,7 +1413,7 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error {
}

// Generate the QEMU configuration.
confFile, monHooks, err := d.generateQemuConfigFile(cpuInfo, mountInfo, qemuBus, devConfs, &fdFiles)
confFile, monHooks, err := d.generateQemuConfigFile(cpuInfo, mountInfo, qemuBus, vsockFD, devConfs, &fdFiles)
if err != nil {
op.Done(err)
return err
Expand Down Expand Up @@ -2825,7 +2829,7 @@ func (d *qemu) deviceBootPriorities() (map[string]int, error) {

// generateQemuConfigFile writes the qemu config file and returns its location.
// It writes the config file inside the VM's log path.
func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePools.MountInfo, busName string, devConfs []*deviceConfig.RunConfig, fdFiles *[]*os.File) (string, []monitorHook, error) {
func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePools.MountInfo, busName string, vsockFD int, devConfs []*deviceConfig.RunConfig, fdFiles *[]*os.File) (string, []monitorHook, error) {
var monHooks []monitorHook

cfg := qemuBase(&qemuBaseOpts{d.Architecture()})
Expand Down Expand Up @@ -2953,6 +2957,7 @@ func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePo
devAddr: devAddr,
multifunction: multi,
},
vsockFD: vsockFD,
vsockID: vsockID,
}

Expand Down Expand Up @@ -7434,90 +7439,116 @@ func (d *qemu) reservedVsockID(vsockID uint32) bool {
// getVsockID returns the vsock Context ID for the VM.
func (d *qemu) getVsockID() (uint32, error) {
existingVsockID, ok := d.localConfig["volatile.vsock_id"]
if ok {
vsockID, err := strconv.ParseUint(existingVsockID, 10, 32)
if err != nil {
return 0, fmt.Errorf("Failed to parse volatile.vsock_id: %q: %w", existingVsockID, err)
}
if !ok {
return 0, fmt.Errorf("Context ID not set in volatile.vsock_id")
}

if d.reservedVsockID(uint32(vsockID)) {
return 0, fmt.Errorf("Failed to use reserved vsock Context ID: %q", vsockID)
}
vsockID, err := strconv.ParseUint(existingVsockID, 10, 32)
if err != nil {
return 0, fmt.Errorf("Failed to parse volatile.vsock_id: %q: %w", existingVsockID, err)
}

return uint32(vsockID), nil
if d.reservedVsockID(uint32(vsockID)) {
return 0, fmt.Errorf("Failed to use reserved vsock Context ID: %q", vsockID)
}

return 0, fmt.Errorf("Context ID not set in volatile.vsock_id")
return uint32(vsockID), nil
}

// freeVsockID returns true if the given vsockID is not yet acquired.
func (d *qemu) freeVsockID(vsockID uint32) bool {
c, err := lxdvsock.Dial(vsockID, shared.HTTPSDefaultPort)
// acquireVsockID tries to occupy the given vsock Context ID.
// If the ID is free it returns the corresponding file handle.
func (d *qemu) acquireVsockID(vsockID uint32) (*os.File, error) {
revert := revert.New()
defer revert.Fail()

vsockF, err := os.OpenFile("/dev/vhost-vsock", os.O_RDWR, 0)
if err != nil {
var unixErrno unix.Errno
return nil, fmt.Errorf("Failed to open vhost socket: %w", err)
}

if !errors.As(err, &unixErrno) {
return false
}
revert.Add(func() { _ = vsockF.Close() })

if unixErrno != unix.ENODEV {
// Skip the vsockID if another syscall error was encountered.
return false
// The vsock Context ID cannot be supplied as type uint32.
vsockIDInt := int(vsockID)

// 0x4008AF60 = VHOST_VSOCK_SET_GUEST_CID = _IOW(VHOST_VIRTIO, 0x60, __u64)
_, _, errno := unix.Syscall(unix.SYS_IOCTL, vsockF.Fd(), 0x4008AF60, uintptr(unsafe.Pointer(&vsockIDInt)))
if errno != 0 {
if !errors.Is(errno, unix.EADDRINUSE) {
return nil, fmt.Errorf("Failed ioctl syscall to vhost socket: %q", errno.Error())
}

// The syscall to the vsock device returned "no such device".
// This means the address (Context ID) is free.
return true
// vsock Context ID is already in use.
return nil, nil
}

// Address is already in use.
c.Close()
return false
revert.Success()
return vsockF, nil
}

// acquireExistingVsockID tries to acquire an already existing vsock Context ID from volatile.
// It returns both the acquired ID and opened vsock file handle for QEMU.
func (d *qemu) acquireExistingVsockID() (uint32, *os.File, error) {
vsockID, err := d.getVsockID()
if err != nil {
return 0, nil, err
}

// Check if the vsockID from last VM start is still not acquired in case the VM was stopped.
f, err := d.acquireVsockID(vsockID)
if err != nil {
return 0, nil, err
}

return vsockID, f, nil
}

// nextVsockID returns the next free vsock Context ID for the VM.
// It tries to acquire one randomly until the timeout exceeds.
func (d *qemu) nextVsockID() (uint32, error) {
// nextVsockID tries to acquire the next free vsock Context ID for the VM.
// It returns both the acquired ID and opened vsock file handle for QEMU.
func (d *qemu) nextVsockID() (uint32, *os.File, error) {
// Check if vsock ID from last VM start is present in volatile, then use that.
// This allows a running VM to be recovered after DB record deletion and that an agent connection still works
// after the VM's instance ID has changed.
// Continue in case of error since the caller requires a valid vsockID in any case.
vsockID, err := d.getVsockID()
if err == nil {
// Check if the vsock ID from last VM start is still not acquired in case the VM was stopped.
if d.freeVsockID(vsockID) {
return vsockID, nil
}
vsockID, vsockF, _ := d.acquireExistingVsockID()
if vsockID != 0 && vsockF != nil {
return vsockID, vsockF, nil
}

// Ignore the error from before and start to acquire a new Context ID.
instanceUUID := uuid.Parse(d.localConfig["volatile.uuid"])
if instanceUUID == nil {
return 0, fmt.Errorf("Failed to parse instance UUID from volatile.uuid")
return 0, nil, fmt.Errorf("Failed to parse instance UUID from volatile.uuid")
}

r, err := util.GetStableRandomGenerator(instanceUUID.String())
if err != nil {
return 0, fmt.Errorf("Failed generating stable random seed from instance UUID %q: %w", instanceUUID, err)
return 0, nil, fmt.Errorf("Failed generating stable random seed from instance UUID %q: %w", instanceUUID, err)
}

timeout := 5 * time.Second
timeout := time.Now().Add(5 * time.Second)

// Try to find a new Context ID.
for start := time.Now(); time.Since(start) <= timeout; {
for {
if time.Now().After(timeout) {
return 0, nil, fmt.Errorf("Timeout exceeded whilst trying to acquire the next vsock Context ID")
}

candidateVsockID := r.Uint32()

if d.reservedVsockID(candidateVsockID) {
continue
}

if d.freeVsockID(candidateVsockID) {
return candidateVsockID, nil
vsockF, err := d.acquireVsockID(candidateVsockID)
if err != nil {
return 0, nil, err
}

continue
if vsockF != nil {
return candidateVsockID, vsockF, nil
}
}

return 0, fmt.Errorf("Timeout exceeded whilst trying to acquire the next vsock Context ID")
}

// InitPID returns the instance's current process ID.
Expand Down
6 changes: 4 additions & 2 deletions lxd/instance/drivers/driver_qemu_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,21 +281,23 @@ func TestQemuConfigTemplates(t *testing.T) {
opts qemuVsockOpts
expected string
}{{
qemuVsockOpts{qemuDevOpts{"pcie", "qemu_pcie0", "00.4", true}, 14},
qemuVsockOpts{qemuDevOpts{"pcie", "qemu_pcie0", "00.4", true}, 4, 14},
`# Vsock
[device "qemu_vsock"]
driver = "vhost-vsock-pci"
bus = "qemu_pcie0"
addr = "00.4"
multifunction = "on"
guest-cid = "14"
vhostfd = "4"
`,
}, {
qemuVsockOpts{qemuDevOpts{"ccw", "qemu_pcie0", "00.4", false}, 3},
qemuVsockOpts{qemuDevOpts{"ccw", "qemu_pcie0", "00.4", false}, 4, 3},
`# Vsock
[device "qemu_vsock"]
driver = "vhost-vsock-ccw"
guest-cid = "3"
vhostfd = "4"
`,
}}
for _, tc := range testCases {
Expand Down
4 changes: 3 additions & 1 deletion lxd/instance/drivers/driver_qemu_templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ func qemuSEV(opts *qemuSevOpts) []cfgSection {

type qemuVsockOpts struct {
dev qemuDevOpts
vsockFD int
vsockID uint32
}

Expand All @@ -340,7 +341,8 @@ func qemuVsock(opts *qemuVsockOpts) []cfgSection {
name: `device "qemu_vsock"`,
comment: "Vsock",
entries: append(qemuDeviceEntries(&entriesOpts),
cfgEntry{key: "guest-cid", value: fmt.Sprintf("%d", opts.vsockID)}),
cfgEntry{key: "guest-cid", value: fmt.Sprintf("%d", opts.vsockID)},
cfgEntry{key: "vhostfd", value: fmt.Sprintf("%d", opts.vsockFD)}),
}}
}

Expand Down

0 comments on commit 1d792a6

Please sign in to comment.