Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QEMU: Occupy vsock Context ID through syscall #11993

Merged
merged 2 commits into from
Jul 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 89 additions & 58 deletions lxd/instance/drivers/driver_qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"strconv"
"strings"
"time"
"unsafe"

"github.com/flosch/pongo2"
"github.com/gorilla/websocket"
Expand Down Expand Up @@ -1143,14 +1144,27 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error {

revert.Add(func() { _ = d.unmount() })

volatileSet := make(map[string]string)
// Define a set of files to open and pass their file descriptors to QEMU command.
fdFiles := make([]*os.File, 0)

// Ensure passed files are closed after start has returned (either because QEMU has started or on error).
defer func() {
for _, file := range fdFiles {
_ = file.Close()
}
}()

// New or existing vsock ID from volatile.
vsockID, err := d.nextVsockID()
vsockID, vsockF, err := d.nextVsockID()
if err != nil {
return err
}

// Add allocated QEMU vhost file descriptor.
vsockFD := d.addFileDescriptor(&fdFiles, vsockF)

volatileSet := make(map[string]string)

// Update vsock ID in volatile if needed for recovery (do this before UpdateBackupFile() call).
oldVsockID := d.localConfig["volatile.vsock_id"]
newVsockID := strconv.FormatUint(uint64(vsockID), 10)
Expand Down Expand Up @@ -1353,16 +1367,6 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error {
return err
}

// Define a set of files to open and pass their file descriptors to qemu command.
fdFiles := make([]*os.File, 0)

// Ensure passed files are closed after start has returned (either because qemu has started or on error).
defer func() {
for _, file := range fdFiles {
_ = file.Close()
}
}()

// Snapshot if needed.
snapName, expiry, err := d.getStartupSnapNameAndExpiry(d)
if err != nil {
Expand Down Expand Up @@ -1409,7 +1413,7 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error {
}

// Generate the QEMU configuration.
confFile, monHooks, err := d.generateQemuConfigFile(cpuInfo, mountInfo, qemuBus, devConfs, &fdFiles)
confFile, monHooks, err := d.generateQemuConfigFile(cpuInfo, mountInfo, qemuBus, vsockFD, devConfs, &fdFiles)
if err != nil {
op.Done(err)
return err
Expand Down Expand Up @@ -2825,7 +2829,7 @@ func (d *qemu) deviceBootPriorities() (map[string]int, error) {

// generateQemuConfigFile writes the qemu config file and returns its location.
// It writes the config file inside the VM's log path.
func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePools.MountInfo, busName string, devConfs []*deviceConfig.RunConfig, fdFiles *[]*os.File) (string, []monitorHook, error) {
func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePools.MountInfo, busName string, vsockFD int, devConfs []*deviceConfig.RunConfig, fdFiles *[]*os.File) (string, []monitorHook, error) {
var monHooks []monitorHook

cfg := qemuBase(&qemuBaseOpts{d.Architecture()})
Expand Down Expand Up @@ -2953,6 +2957,7 @@ func (d *qemu) generateQemuConfigFile(cpuInfo *cpuTopology, mountInfo *storagePo
devAddr: devAddr,
multifunction: multi,
},
vsockFD: vsockFD,
vsockID: vsockID,
}

Expand Down Expand Up @@ -7434,90 +7439,116 @@ func (d *qemu) reservedVsockID(vsockID uint32) bool {
// getVsockID returns the vsock Context ID for the VM.
func (d *qemu) getVsockID() (uint32, error) {
existingVsockID, ok := d.localConfig["volatile.vsock_id"]
if ok {
vsockID, err := strconv.ParseUint(existingVsockID, 10, 32)
if err != nil {
return 0, fmt.Errorf("Failed to parse volatile.vsock_id: %q: %w", existingVsockID, err)
}
if !ok {
return 0, fmt.Errorf("Context ID not set in volatile.vsock_id")
}

if d.reservedVsockID(uint32(vsockID)) {
return 0, fmt.Errorf("Failed to use reserved vsock Context ID: %q", vsockID)
}
vsockID, err := strconv.ParseUint(existingVsockID, 10, 32)
if err != nil {
return 0, fmt.Errorf("Failed to parse volatile.vsock_id: %q: %w", existingVsockID, err)
}

return uint32(vsockID), nil
if d.reservedVsockID(uint32(vsockID)) {
return 0, fmt.Errorf("Failed to use reserved vsock Context ID: %q", vsockID)
}

return 0, fmt.Errorf("Context ID not set in volatile.vsock_id")
return uint32(vsockID), nil
}

// freeVsockID returns true if the given vsockID is not yet acquired.
func (d *qemu) freeVsockID(vsockID uint32) bool {
c, err := lxdvsock.Dial(vsockID, shared.HTTPSDefaultPort)
// acquireVsockID tries to occupy the given vsock Context ID.
// If the ID is free it returns the corresponding file handle.
func (d *qemu) acquireVsockID(vsockID uint32) (*os.File, error) {
revert := revert.New()
defer revert.Fail()

vsockF, err := os.OpenFile("/dev/vhost-vsock", os.O_RDWR, 0)
if err != nil {
var unixErrno unix.Errno
return nil, fmt.Errorf("Failed to open vhost socket: %w", err)
}

if !errors.As(err, &unixErrno) {
return false
}
revert.Add(func() { _ = vsockF.Close() })

if unixErrno != unix.ENODEV {
// Skip the vsockID if another syscall error was encountered.
return false
// The vsock Context ID cannot be supplied as type uint32.
vsockIDInt := int(vsockID)

// 0x4008AF60 = VHOST_VSOCK_SET_GUEST_CID = _IOW(VHOST_VIRTIO, 0x60, __u64)
_, _, errno := unix.Syscall(unix.SYS_IOCTL, vsockF.Fd(), 0x4008AF60, uintptr(unsafe.Pointer(&vsockIDInt)))
if errno != 0 {
if !errors.Is(errno, unix.EADDRINUSE) {
return nil, fmt.Errorf("Failed ioctl syscall to vhost socket: %q", errno.Error())
}

// The syscall to the vsock device returned "no such device".
// This means the address (Context ID) is free.
return true
// vsock Context ID is already in use.
return nil, nil
}

// Address is already in use.
c.Close()
return false
revert.Success()
return vsockF, nil
}

// acquireExistingVsockID tries to acquire an already existing vsock Context ID from volatile.
// It returns both the acquired ID and opened vsock file handle for QEMU.
func (d *qemu) acquireExistingVsockID() (uint32, *os.File, error) {
vsockID, err := d.getVsockID()
if err != nil {
return 0, nil, err
}

// Check if the vsockID from last VM start is still not acquired in case the VM was stopped.
f, err := d.acquireVsockID(vsockID)
if err != nil {
return 0, nil, err
}

return vsockID, f, nil
}

// nextVsockID returns the next free vsock Context ID for the VM.
// It tries to acquire one randomly until the timeout exceeds.
func (d *qemu) nextVsockID() (uint32, error) {
// nextVsockID tries to acquire the next free vsock Context ID for the VM.
// It returns both the acquired ID and opened vsock file handle for QEMU.
func (d *qemu) nextVsockID() (uint32, *os.File, error) {
// Check if vsock ID from last VM start is present in volatile, then use that.
// This allows a running VM to be recovered after DB record deletion and that an agent connection still works
// after the VM's instance ID has changed.
// Continue in case of error since the caller requires a valid vsockID in any case.
vsockID, err := d.getVsockID()
if err == nil {
// Check if the vsock ID from last VM start is still not acquired in case the VM was stopped.
if d.freeVsockID(vsockID) {
return vsockID, nil
}
vsockID, vsockF, _ := d.acquireExistingVsockID()
if vsockID != 0 && vsockF != nil {
return vsockID, vsockF, nil
}

// Ignore the error from before and start to acquire a new Context ID.
instanceUUID := uuid.Parse(d.localConfig["volatile.uuid"])
if instanceUUID == nil {
return 0, fmt.Errorf("Failed to parse instance UUID from volatile.uuid")
return 0, nil, fmt.Errorf("Failed to parse instance UUID from volatile.uuid")
}

r, err := util.GetStableRandomGenerator(instanceUUID.String())
if err != nil {
return 0, fmt.Errorf("Failed generating stable random seed from instance UUID %q: %w", instanceUUID, err)
return 0, nil, fmt.Errorf("Failed generating stable random seed from instance UUID %q: %w", instanceUUID, err)
}

timeout := 5 * time.Second
timeout := time.Now().Add(5 * time.Second)

// Try to find a new Context ID.
for start := time.Now(); time.Since(start) <= timeout; {
for {
if time.Now().After(timeout) {
return 0, nil, fmt.Errorf("Timeout exceeded whilst trying to acquire the next vsock Context ID")
}

candidateVsockID := r.Uint32()

if d.reservedVsockID(candidateVsockID) {
continue
}

if d.freeVsockID(candidateVsockID) {
return candidateVsockID, nil
vsockF, err := d.acquireVsockID(candidateVsockID)
if err != nil {
return 0, nil, err
}

continue
if vsockF != nil {
return candidateVsockID, vsockF, nil
}
}

return 0, fmt.Errorf("Timeout exceeded whilst trying to acquire the next vsock Context ID")
}

// InitPID returns the instance's current process ID.
Expand Down
6 changes: 4 additions & 2 deletions lxd/instance/drivers/driver_qemu_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,21 +281,23 @@ func TestQemuConfigTemplates(t *testing.T) {
opts qemuVsockOpts
expected string
}{{
qemuVsockOpts{qemuDevOpts{"pcie", "qemu_pcie0", "00.4", true}, 14},
qemuVsockOpts{qemuDevOpts{"pcie", "qemu_pcie0", "00.4", true}, 4, 14},
`# Vsock
[device "qemu_vsock"]
driver = "vhost-vsock-pci"
bus = "qemu_pcie0"
addr = "00.4"
multifunction = "on"
guest-cid = "14"
vhostfd = "4"
`,
}, {
qemuVsockOpts{qemuDevOpts{"ccw", "qemu_pcie0", "00.4", false}, 3},
qemuVsockOpts{qemuDevOpts{"ccw", "qemu_pcie0", "00.4", false}, 4, 3},
`# Vsock
[device "qemu_vsock"]
driver = "vhost-vsock-ccw"
guest-cid = "3"
vhostfd = "4"
`,
}}
for _, tc := range testCases {
Expand Down
4 changes: 3 additions & 1 deletion lxd/instance/drivers/driver_qemu_templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ func qemuSEV(opts *qemuSevOpts) []cfgSection {

type qemuVsockOpts struct {
dev qemuDevOpts
vsockFD int
vsockID uint32
}

Expand All @@ -340,7 +341,8 @@ func qemuVsock(opts *qemuVsockOpts) []cfgSection {
name: `device "qemu_vsock"`,
comment: "Vsock",
entries: append(qemuDeviceEntries(&entriesOpts),
cfgEntry{key: "guest-cid", value: fmt.Sprintf("%d", opts.vsockID)}),
cfgEntry{key: "guest-cid", value: fmt.Sprintf("%d", opts.vsockID)},
cfgEntry{key: "vhostfd", value: fmt.Sprintf("%d", opts.vsockFD)}),
}}
}

Expand Down