From 2db71d66896a007a4f3d1560446d91f1c4e82ccb Mon Sep 17 00:00:00 2001 From: Albin Kerouanton Date: Wed, 24 Dec 2025 08:38:15 +0100 Subject: [PATCH] Use virtiofs to support bind-mounts Transform bind mounts received by the shim into virtiofs mounts, and update the OCI spec to let the VM-side shim mount from these virtiofs. File bind-mounts are supported by mounting their parent directory into the VM. A docs file is added to explain how it works, and the security implications of file bind-mounts. Signed-off-by: Albin Kerouanton --- cmd/vminitd/bind_mounts.go | 77 ++++++++++++++ cmd/vminitd/bind_mounts_test.go | 105 +++++++++++++++++++ cmd/vminitd/main.go | 6 ++ docs/bind-mounts.md | 58 +++++++++++ internal/shim/task/mount.go | 71 +++++++++++++ internal/shim/task/mount_test.go | 173 +++++++++++++++++++++++++++++++ internal/shim/task/service.go | 34 ++---- 7 files changed, 497 insertions(+), 27 deletions(-) create mode 100644 cmd/vminitd/bind_mounts.go create mode 100644 cmd/vminitd/bind_mounts_test.go create mode 100644 docs/bind-mounts.md create mode 100644 internal/shim/task/mount_test.go diff --git a/cmd/vminitd/bind_mounts.go b/cmd/vminitd/bind_mounts.go new file mode 100644 index 0000000..778a4ed --- /dev/null +++ b/cmd/vminitd/bind_mounts.go @@ -0,0 +1,77 @@ +//go:build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "context" + "fmt" + "os" + "strings" + + "github.com/containerd/containerd/v2/core/mount" + "github.com/containerd/log" +) + +type bindMounts []bindMount + +type bindMount struct { + tag string + target string +} + +func (b *bindMounts) String() string { + ss := make([]string, 0, len(*b)) + for _, bm := range *b { + ss = append(ss, bm.tag+":"+bm.target) + } + return strings.Join(ss, ",") +} + +func (b *bindMounts) Set(value string) error { + tag, target, ok := strings.Cut(value, ":") + if !ok || len(tag) == 0 || len(target) == 0 { + return fmt.Errorf("invalid bind mount %q: expected format: tag:target", value) + } + *b = append(*b, bindMount{ + tag: tag, + target: target, + }) + return nil +} + +func (b *bindMounts) mountAll(ctx context.Context) error { + for _, bm := range *b { + log.G(ctx).WithFields(log.Fields{ + "tag": bm.tag, + "target": bm.target, + }).Info("mounting virtiofs filesystem") + + if err := os.MkdirAll(bm.target, 0700); err != nil { + return fmt.Errorf("failed to create bind mount target directory %s: %w", bm.target, err) + } + if err := mount.All([]mount.Mount{{ + Type: "virtiofs", + Source: bm.tag, + Target: bm.target, + }}, "/"); err != nil { + return err + } + } + return nil +} diff --git a/cmd/vminitd/bind_mounts_test.go b/cmd/vminitd/bind_mounts_test.go new file mode 100644 index 0000000..b861d1a --- /dev/null +++ b/cmd/vminitd/bind_mounts_test.go @@ -0,0 +1,105 @@ +//go:build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseBindMounts(t *testing.T) { + testcases := []struct { + name string + inputs []string + want bindMounts + wantStr string + }{ + { + name: "single bind mount", + inputs: []string{"foo:/mnt/foo"}, + want: bindMounts{ + {tag: "foo", target: "/mnt/foo"}, + }, + wantStr: "foo:/mnt/foo", + }, + { + name: "multiple bind mounts", + inputs: []string{"foo:/mnt/foo", "bar:/mnt/bar"}, + want: bindMounts{ + {tag: "foo", target: "/mnt/foo"}, + {tag: "bar", target: "/mnt/bar"}, + }, + wantStr: "foo:/mnt/foo,bar:/mnt/bar", + }, + { + name: "bind mount with nested path", + inputs: []string{"config:/mnt/etc/config"}, + want: bindMounts{ + {tag: "config", target: "/mnt/etc/config"}, + }, + wantStr: "config:/mnt/etc/config", + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + var b bindMounts + for _, input := range tc.inputs { + err := b.Set(input) + assert.NoError(t, err) + } + assert.Equal(t, tc.want, b) + // Try to convert back the parsed struct into a string to check if it matches the expected output. + assert.Equal(t, tc.wantStr, b.String()) + }) + } +} + +func TestParseBindMountsError(t *testing.T) { + testcases := []struct { + name string + input string + }{ + { + name: "missing target", + input: "foo", + }, + { + name: "empty tag", + input: ":foo", + }, + { + name: "empty target", + input: "foo:", + }, + { + name: "empty string", + input: "", + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + var b bindMounts + err := b.Set(tc.input) + assert.Error(t, err) + }) + } +} diff --git a/cmd/vminitd/main.go b/cmd/vminitd/main.go index c2a95d1..85d6cb7 100644 --- a/cmd/vminitd/main.go +++ b/cmd/vminitd/main.go @@ -64,6 +64,7 @@ func main() { flag.IntVar(&config.StreamPort, "vsock-stream-port", 1025, "vsock port to listen for streams on") flag.IntVar(&config.VSockContextID, "vsock-cid", 0, "vsock context ID for vsock listen") flag.Var(&config.Networks, "network", "network interfaces to set up") + flag.Var(&config.Mounts, "mount", "mounts to set up") args := os.Args[1:] // Strip "tsi_hijack" added by libkrun if len(args) > 0 && args[0] == "tsi_hijack" { @@ -193,6 +194,10 @@ func systemInit(ctx context.Context, config ServiceConfig) (func(context.Context return nil, err } + if err := config.Mounts.mountAll(ctx); err != nil { + return nil, err + } + config.Shutdown.RegisterCallback(func(ctx context.Context) error { return dhcpReleaser() }) @@ -263,6 +268,7 @@ type ServiceConfig struct { RPCPort int StreamPort int Networks networks + Mounts bindMounts Shutdown shutdown.Service Debug bool } diff --git a/docs/bind-mounts.md b/docs/bind-mounts.md new file mode 100644 index 0000000..e208496 --- /dev/null +++ b/docs/bind-mounts.md @@ -0,0 +1,58 @@ +# Bind Mounts + +Nerdbox supports bind mounts from the host into containers running inside the +VM. Bind mounts are implemented using virtiofs to share host paths with the VM, +which then bind-mounts them into containers. + +## How It Works + +When a bind mount is specified in the container spec: + +1. The shim transforms the bind mount into a virtiofs share +2. The host path is shared with the VM via virtiofs with a unique tag +3. Inside the VM, virtiofs is mounted at a temporary location (`/mnt/bind-{hash}`) +4. The container runtime bind-mounts from that location into the container + +## Directory Bind Mounts + +For directory bind mounts, the directory is shared directly via virtiofs: + +``` +Host: /host/data/ → virtiofs share → VM: /mnt/bind-{hash}/ → Container: /container/data/ +``` + +## File Bind Mounts + +When bind-mounting a single file, nerdbox shares the **parent directory** of +the file via virtiofs, then bind-mounts the specific file into the container: + +``` +Host: /host/config/app.yaml + ↓ +virtiofs shares: /host/config/ (parent directory) + ↓ +VM: /mnt/bind-{hash}/app.yaml + ↓ +Container: /container/app.yaml +``` + +### Security Implications + +When using file bind mounts, be aware that the **entire parent directory** is +exposed to the VM, not just the single file. This has security implications if +the VM is considered a security boundary: + +- All files in the parent directory become accessible to the VM +- If an attacker compromises the VM, they can access any file in that directory +- Sensitive files that happen to be siblings of the bind-mounted file are exposed + +**Recommendations:** + +- Avoid bind-mounting files from directories containing secrets, credentials, + or sensitive data +- If the VM is treated as a security boundary, audit what gets exposed when + using file bind mounts +- Place files intended for bind-mounting in dedicated directories with no other + sensitive content +- Consider using directory bind mounts with only the necessary files instead of + file bind mounts diff --git a/internal/shim/task/mount.go b/internal/shim/task/mount.go index e436951..accebc2 100644 --- a/internal/shim/task/mount.go +++ b/internal/shim/task/mount.go @@ -18,13 +18,17 @@ package task import ( "context" + "crypto/sha256" "fmt" + "os" + "path/filepath" "strings" "github.com/containerd/containerd/api/types" "github.com/containerd/errdefs" "github.com/containerd/log" + "github.com/containerd/nerdbox/internal/shim/task/bundle" "github.com/containerd/nerdbox/internal/vm" ) @@ -146,3 +150,70 @@ func filterOptions(options []string) []string { } return filtered } + +type bindMounter struct { + mounts []bindMount +} + +type bindMount struct { + tag string + hostSrc string + vmTarget string +} + +func (bm *bindMounter) FromBundle(ctx context.Context, b *bundle.Bundle) error { + for i, m := range b.Spec.Mounts { + if m.Type != "bind" { + continue + } + + log.G(ctx).WithField("mount", m).Debug("transforming bind mount into a virtiofs mount") + + fi, err := os.Stat(m.Source) + if err != nil { + return fmt.Errorf("failed to stat bind mount source %s: %w", m.Source, err) + } + + hash := sha256.Sum256([]byte(m.Destination)) + tag := fmt.Sprintf("bind-%x", hash[:8]) + vmTarget := "/mnt/" + tag + + // For files, share the parent directory via virtiofs since virtiofs + // operates on directories. The spec source points to the file within + // the mounted directory. + hostSrc := m.Source + specSrc := vmTarget + if !fi.IsDir() { + hostSrc = filepath.Dir(m.Source) + specSrc = filepath.Join(vmTarget, filepath.Base(m.Source)) + } + + transformed := bindMount{ + tag: tag, + hostSrc: hostSrc, + vmTarget: vmTarget, + } + + bm.mounts = append(bm.mounts, transformed) + b.Spec.Mounts[i].Source = specSrc + } + + return nil +} + +func (bm *bindMounter) SetupVM(ctx context.Context, vmi vm.Instance) error { + for _, m := range bm.mounts { + if err := vmi.AddFS(ctx, m.tag, m.hostSrc); err != nil { + return err + } + } + return nil +} + +func (bm *bindMounter) InitArgs() []string { + args := make([]string, 0, len(bm.mounts)) + for _, m := range bm.mounts { + args = append(args, "-mount="+m.tag+":"+m.vmTarget) + } + return args +} diff --git a/internal/shim/task/mount_test.go b/internal/shim/task/mount_test.go new file mode 100644 index 0000000..fdea7ea --- /dev/null +++ b/internal/shim/task/mount_test.go @@ -0,0 +1,173 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package task + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/assert" + + "github.com/containerd/nerdbox/internal/shim/task/bundle" +) + +func TestBindMountsProvider(t *testing.T) { + tmpDir := t.TempDir() + + // Create a test file + testfile := filepath.Join(tmpDir, "testfile.txt") + f, err := os.Create(testfile) + assert.NoError(t, err) + f.Close() + + // Create a test directory + testdirData := filepath.Join(tmpDir, "testdir", "data") + assert.NoError(t, os.MkdirAll(testdirData, 0755)) + testdirConfig := filepath.Join(tmpDir, "testdir", "config") + assert.NoError(t, os.MkdirAll(testdirConfig, 0755)) + + testcases := []struct { + name string + mounts []specs.Mount + wantMounts []bindMount + wantSpecSources []string // expected sources in the OCI spec after transformation + wantInitArgs []string + }{ + { + name: "no mounts", + mounts: nil, + wantMounts: nil, + wantSpecSources: nil, + wantInitArgs: []string{}, + }, + { + name: "no bind mounts", + mounts: []specs.Mount{ + {Type: "tmpfs", Source: "tmpfs", Destination: "/tmp"}, + {Type: "proc", Source: "proc", Destination: "/proc"}, + }, + wantMounts: nil, + wantSpecSources: []string{"tmpfs", "proc"}, + wantInitArgs: []string{}, + }, + { + name: "single bind mount", + mounts: []specs.Mount{ + {Type: "bind", Source: testdirData, Destination: "/container/data"}, + }, + wantMounts: []bindMount{ + { + tag: "bind-8c5eaa445dd84f17", + hostSrc: testdirData, + vmTarget: "/mnt/bind-8c5eaa445dd84f17", + }, + }, + wantSpecSources: []string{"/mnt/bind-8c5eaa445dd84f17"}, + wantInitArgs: []string{"-mount=bind-8c5eaa445dd84f17:/mnt/bind-8c5eaa445dd84f17"}, + }, + { + name: "multiple bind mounts", + mounts: []specs.Mount{ + {Type: "bind", Source: testdirData, Destination: "/container/data"}, + {Type: "bind", Source: testdirConfig, Destination: "/container/config"}, + }, + wantMounts: []bindMount{ + { + tag: "bind-8c5eaa445dd84f17", + hostSrc: testdirData, + vmTarget: "/mnt/bind-8c5eaa445dd84f17", + }, + { + tag: "bind-529984c9ac58b7ec", + hostSrc: testdirConfig, + vmTarget: "/mnt/bind-529984c9ac58b7ec", + }, + }, + wantSpecSources: []string{ + "/mnt/bind-8c5eaa445dd84f17", + "/mnt/bind-529984c9ac58b7ec", + }, + wantInitArgs: []string{ + "-mount=bind-8c5eaa445dd84f17:/mnt/bind-8c5eaa445dd84f17", + "-mount=bind-529984c9ac58b7ec:/mnt/bind-529984c9ac58b7ec", + }, + }, + { + name: "mixed mount types", + mounts: []specs.Mount{ + {Type: "tmpfs", Source: "tmpfs", Destination: "/tmp"}, + {Type: "bind", Source: testdirData, Destination: "/container/data"}, + {Type: "proc", Source: "proc", Destination: "/proc"}, + }, + wantMounts: []bindMount{ + { + tag: "bind-8c5eaa445dd84f17", + hostSrc: testdirData, + vmTarget: "/mnt/bind-8c5eaa445dd84f17", + }, + }, + wantSpecSources: []string{ + "tmpfs", + "/mnt/bind-8c5eaa445dd84f17", + "proc", + }, + wantInitArgs: []string{"-mount=bind-8c5eaa445dd84f17:/mnt/bind-8c5eaa445dd84f17"}, + }, + { + name: "single file bind mount", + mounts: []specs.Mount{ + {Type: "bind", Source: testfile, Destination: "/container/testfile"}, + }, + wantMounts: []bindMount{ + { + tag: "bind-6dace5108a719565", + hostSrc: tmpDir, + vmTarget: "/mnt/bind-6dace5108a719565", + }, + }, + wantSpecSources: []string{"/mnt/bind-6dace5108a719565/testfile.txt"}, + wantInitArgs: []string{"-mount=bind-6dace5108a719565:/mnt/bind-6dace5108a719565"}, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + b := &bundle.Bundle{ + Spec: specs.Spec{ + Mounts: tc.mounts, + }, + } + + bm := &bindMounter{} + err := bm.FromBundle(context.Background(), b) + assert.NoError(t, err) + assert.Equal(t, tc.wantMounts, bm.mounts) + + // Verify that the spec sources were transformed + for i, wantSource := range tc.wantSpecSources { + assert.Equal(t, wantSource, b.Spec.Mounts[i].Source) + } + + // Verify the args passed to vminitd + args := bm.InitArgs() + assert.Equal(t, tc.wantInitArgs, args) + }) + } +} diff --git a/internal/shim/task/service.go b/internal/shim/task/service.go index 6f041f4..9959895 100644 --- a/internal/shim/task/service.go +++ b/internal/shim/task/service.go @@ -136,32 +136,6 @@ func (s *service) shutdown(ctx context.Context) error { return errors.Join(errs...) } -// transformBindMounts transforms bind mounts -func transformBindMounts(ctx context.Context, b *bundle.Bundle) error { - for i, m := range b.Spec.Mounts { - if m.Type == "bind" { - filename := filepath.Base(m.Source) - // Check that the bind is from a path with the bundle id - if filepath.Base(filepath.Dir(m.Source)) != filepath.Base(b.Path) { - log.G(ctx).WithFields(log.Fields{ - "source": m.Source, - "name": filename, - }).Debug("ignoring bind mount") - continue - } - - buf, err := os.ReadFile(m.Source) - if err != nil { - return fmt.Errorf("failed to read mount file %q: %w", filename, err) - } - b.Spec.Mounts[i].Source = filename - b.AddExtraFile(filename, buf) - } - } - - return nil -} - // Create a new initial process and container with the underlying OCI runtime func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *taskAPI.CreateTaskResponse, err error) { log.G(ctx).WithFields(log.Fields{ @@ -186,10 +160,11 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * var nwpr networksProvider var ctrNetCfg ctrNetConfig + var bm bindMounter // Load the OCI bundle and apply transformers to get the bundle that'll be // set up on the VM side. b, err := bundle.Load(ctx, r.Bundle, - transformBindMounts, + bm.FromBundle, nwpr.FromBundle, ctrNetCfg.fromBundle, func(ctx context.Context, b *bundle.Bundle) error { @@ -225,9 +200,14 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * return nil, errgrpc.ToGRPC(err) } + if err := bm.SetupVM(ctx, vmi); err != nil { + return nil, errgrpc.ToGRPC(err) + } + prestart := time.Now() if err := vmi.Start(ctx, vm.WithInitArgs(nwpr.InitArgs()...), + vm.WithInitArgs(bm.InitArgs()...), ); err != nil { return nil, errgrpc.ToGRPC(err) }