Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
rootless: add rootless cgroup manager
The rootless cgroup manager acts as a noop for all set and apply
operations. It is just used for rootless setups. Currently this is far
too simple (we need to add opportunistic cgroup management), but is good
enough as a first-pass at a noop cgroup manager.

Signed-off-by: Aleksa Sarai <[email protected]>
  • Loading branch information
cyphar committed Mar 23, 2017
commit baeef298582869504e73651e2b0fb78b156e5783
24 changes: 5 additions & 19 deletions libcontainer/cgroups/fs/apply_raw.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
}, nil
}

func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
initPath, err := cgroups.GetThisCgroupDir(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relDir, err := filepath.Rel(root, initPath)
if err != nil {
return "", err
}
return filepath.Join(mountpoint, relDir), nil
}

func (raw *cgroupData) path(subsystem string) (string, error) {
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err
Expand All @@ -297,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
}

parentPath, err := raw.parentPath(subsystem, mnt, root)
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
if err != nil {
return "", err
}
Expand Down
128 changes: 128 additions & 0 deletions libcontainer/cgroups/rootless/rootless.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// +build linux

package rootless

import (
"fmt"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
)

// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
// needlessly. We should probably export this list.

var subsystems = []subsystem{
&fs.CpusetGroup{},
&fs.DevicesGroup{},
&fs.MemoryGroup{},
&fs.CpuGroup{},
&fs.CpuacctGroup{},
&fs.PidsGroup{},
&fs.BlkioGroup{},
&fs.HugetlbGroup{},
&fs.NetClsGroup{},
&fs.NetPrioGroup{},
&fs.PerfEventGroup{},
&fs.FreezerGroup{},
&fs.NameGroup{GroupName: "name=systemd"},
}

type subsystem interface {
// Name returns the name of the subsystem.
Name() string

// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error
}

// The noop cgroup manager is used for rootless containers, because we currently
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
// by factory if we are in rootless mode. We error out if any cgroup options are
// set in the config -- this may change in the future with upcoming kernel features
// like the cgroup namespace.

type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}

func (m *Manager) Apply(pid int) error {
// If there are no cgroup settings, there's nothing to do.
if m.Cgroups == nil {
return nil
}

// We can't set paths.
// TODO(cyphar): Implement the case where the runner of a rootless container
// owns their own cgroup, which would allow us to set up a
// cgroup for each path.
if m.Cgroups.Paths != nil {
return fmt.Errorf("cannot change cgroup path in rootless container")
}

// We load the paths into the manager.
paths := make(map[string]string)
for _, sys := range subsystems {
name := sys.Name()

path, err := cgroups.GetOwnCgroupPath(name)
if err != nil {
// Ignore paths we couldn't resolve.
continue
}

paths[name] = path
}

m.Paths = paths
return nil
}

func (m *Manager) GetPaths() map[string]string {
return m.Paths
}

func (m *Manager) Set(container *configs.Config) error {
// We have to re-do the validation here, since someone might decide to
// update a rootless container.
return validate.New().Validate(container)
}

func (m *Manager) GetPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetPids(dir)
}

func (m *Manager) GetAllPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetAllPids(dir)
}

func (m *Manager) GetStats() (*cgroups.Stats, error) {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container. While this doesn't
// actually require write access to a cgroup directory, the
// statistics are not useful if they can be affected by
// non-container processes.
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
}

func (m *Manager) Freeze(state configs.FreezerState) error {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container.
return fmt.Errorf("cannot use freezer cgroup in rootless container")
}

func (m *Manager) Destroy() error {
// We don't have to do anything here because we didn't do any setup.
return nil
}
2 changes: 1 addition & 1 deletion libcontainer/cgroups/systemd/apply_systemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
return "", err
}

initPath, err := cgroups.GetInitCgroupDir(subsystem)
initPath, err := cgroups.GetInitCgroup(subsystem)
if err != nil {
return "", err
}
Expand Down
41 changes: 37 additions & 4 deletions libcontainer/cgroups/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ type Mount struct {
Subsystems []string
}

func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
Expand Down Expand Up @@ -203,8 +203,8 @@ func GetAllSubsystems() ([]string, error) {
return subsystems, nil
}

// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
Expand All @@ -213,8 +213,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}

func GetInitCgroupDir(subsystem string) (string, error) {
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}

return getCgroupPathHelper(subsystem, cgroup)
}

func GetInitCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
Expand All @@ -223,6 +231,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}

func GetInitCgroupPath(subsystem string) (string, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this since it's not used? And I doubt it'll be any usage.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need it now, but the systemd cgroup manager does use GetInitCgroup. I can drop it if you prefer, this is more for the benefit of users of libcontainer.

Copy link
Contributor

@hqhq hqhq Mar 23, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's probably erroneous, maybe just nobody using systemd cgroup inside a container, I'm OK we keep it, so we'll say no to subsequent PR which'll try to remove this unused function :)

cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}

return getCgroupPathHelper(subsystem, cgroup)
}

func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is kind of subtle, can you keep the comments?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only comment that makes sense now is on the filepath.Rel is that the one you want me to keep?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this function yes, and maybe leave the other comment to raw.path() to specify why we use GetOwnCgroupPath instead of GetInitCgroupPath, I used to get a lot of people ask me about these cgroup path functions...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. Not sure why I removed them in the first place.

mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
if err != nil {
return "", err
}

// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}

return filepath.Join(mnt, relCgroup), nil
}

func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
if err != nil {
Expand Down
8 changes: 8 additions & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -520,10 +520,18 @@ func (c *linuxContainer) Resume() error {
}

func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
// XXX(cyphar): This requires cgroups.
if c.config.Rootless {
return nil, fmt.Errorf("cannot get OOM notifications from rootless container")
}
return notifyOnOOM(c.cgroupManager.GetPaths())
}

func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
// XXX(cyphar): This requires cgroups.
if c.config.Rootless {
return nil, fmt.Errorf("cannot get memory pressure notifications from rootless container")
}
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
}

Expand Down
22 changes: 22 additions & 0 deletions libcontainer/factory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
Expand Down Expand Up @@ -73,6 +74,20 @@ func Cgroupfs(l *LinuxFactory) error {
return nil
}

// RootlessCgroups is an options func to configure a LinuxFactory to
// return containers that use the "rootless" cgroup manager, which will
// fail to do any operations not possible to do with an unprivileged user.
// It should only be used in conjunction with rootless containers.
func RootlessCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &rootless.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}

// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root)
Expand Down Expand Up @@ -169,6 +184,9 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
if err := os.Chown(containerRoot, uid, gid); err != nil {
return nil, newGenericError(err, SystemError)
}
if config.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{
id: id,
root: containerRoot,
Expand All @@ -195,6 +213,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
processStartTime: state.InitProcessStartTime,
fds: state.ExternalDescriptors,
}
// We have to use the RootlessManager.
if state.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{
initProcess: r,
initProcessStartTime: state.InitProcessStartTime,
Expand Down
20 changes: 8 additions & 12 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,15 +254,14 @@ func (p *initProcess) start() error {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
}
p.setExternalDescriptors(fds)
if !p.container.config.Rootless {
// Do this before syncing with child so that no children can escape the
// cgroup. We can't do this if we're not running as root.
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
// Do this before syncing with child so that no children can escape the
// cgroup. We don't need to worry about not doing this and not being root
// because we'd be using the rootless cgroup manager in that case.
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
defer func() {
if err != nil && !p.container.config.Rootless {
if err != nil {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
}
Expand All @@ -281,11 +280,8 @@ func (p *initProcess) start() error {
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
switch sync.Type {
case procReady:
// We can't set cgroups if we're in a rootless container.
if !p.container.config.Rootless {
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
}
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
Expand Down
2 changes: 1 addition & 1 deletion libcontainer/rootfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
var binds []*configs.Mount

for _, mm := range mounts {
dir, err := mm.GetThisCgroupDir(cgroupPaths)
dir, err := mm.GetOwnCgroup(cgroupPaths)
if err != nil {
return nil, err
}
Expand Down