github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/cgroups/devices/systemd.go (about) 1 package devices 2 3 import ( 4 "bufio" 5 "fmt" 6 "os" 7 "strconv" 8 "strings" 9 10 systemdDbus "github.com/coreos/go-systemd/v22/dbus" 11 "github.com/godbus/dbus/v5" 12 "github.com/sirupsen/logrus" 13 14 "github.com/opencontainers/runc/libcontainer/configs" 15 "github.com/opencontainers/runc/libcontainer/devices" 16 ) 17 18 // systemdProperties takes the configured device rules and generates a 19 // corresponding set of systemd properties to configure the devices correctly. 20 func systemdProperties(r *configs.Resources, sdVer int) ([]systemdDbus.Property, error) { 21 if r.SkipDevices { 22 return nil, nil 23 } 24 25 properties := []systemdDbus.Property{ 26 // Always run in the strictest white-list mode. 27 newProp("DevicePolicy", "strict"), 28 // Empty the DeviceAllow array before filling it. 29 newProp("DeviceAllow", []deviceAllowEntry{}), 30 } 31 32 // Figure out the set of rules. 33 configEmu := emulator{} 34 for _, rule := range r.Devices { 35 if err := configEmu.Apply(*rule); err != nil { 36 return nil, fmt.Errorf("unable to apply rule for systemd: %w", err) 37 } 38 } 39 // systemd doesn't support blacklists. So we log a warning, and tell 40 // systemd to act as a deny-all whitelist. This ruleset will be replaced 41 // with our normal fallback code. This may result in spurious errors, but 42 // the only other option is to error out here. 43 if configEmu.IsBlacklist() { 44 // However, if we're dealing with an allow-all rule then we can do it. 45 if configEmu.IsAllowAll() { 46 return allowAllDevices(), nil 47 } 48 logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule") 49 return properties, nil 50 } 51 52 // Now generate the set of rules we actually need to apply. Unlike the 53 // normal devices cgroup, in "strict" mode systemd defaults to a deny-all 54 // whitelist which is the default for devices.Emulator. 55 finalRules, err := configEmu.Rules() 56 if err != nil { 57 return nil, fmt.Errorf("unable to get simplified rules for systemd: %w", err) 58 } 59 var deviceAllowList []deviceAllowEntry 60 for _, rule := range finalRules { 61 if !rule.Allow { 62 // Should never happen. 63 return nil, fmt.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule) 64 } 65 switch rule.Type { 66 case devices.BlockDevice, devices.CharDevice: 67 default: 68 // Should never happen. 69 return nil, fmt.Errorf("invalid device type for DeviceAllow: %v", rule.Type) 70 } 71 72 entry := deviceAllowEntry{ 73 Perms: string(rule.Permissions), 74 } 75 76 // systemd has a fairly odd (though understandable) syntax here, and 77 // because of the OCI configuration format we have to do quite a bit of 78 // trickery to convert things: 79 // 80 // * Concrete rules with non-wildcard major/minor numbers have to use 81 // /dev/{block,char}/MAJOR:minor paths. Before v240, systemd uses 82 // stat(2) on such paths to look up device properties, meaning we 83 // cannot add whitelist rules for devices that don't exist. Since v240, 84 // device properties are parsed from the path string. 85 // 86 // However, path globbing is not supported for path-based rules so we 87 // need to handle wildcards in some other manner. 88 // 89 // * If systemd older than v240 is used, wildcard-minor rules 90 // have to specify a "device group name" (the second column 91 // in /proc/devices). 92 // 93 // * Wildcard (major and minor) rules can just specify a glob with the 94 // type ("char-*" or "block-*"). 95 // 96 // The only type of rule we can't handle is wildcard-major rules, and 97 // so we'll give a warning in that case (note that the fallback code 98 // will insert any rules systemd couldn't handle). What amazing fun. 99 100 if rule.Major == devices.Wildcard { 101 // "_ *:n _" rules aren't supported by systemd. 102 if rule.Minor != devices.Wildcard { 103 logrus.Warnf("systemd doesn't support '*:n' device rules -- temporarily ignoring rule: %v", *rule) 104 continue 105 } 106 107 // "_ *:* _" rules just wildcard everything. 108 prefix, err := groupPrefix(rule.Type) 109 if err != nil { 110 return nil, err 111 } 112 entry.Path = prefix + "*" 113 } else if rule.Minor == devices.Wildcard { 114 if sdVer >= 240 { 115 // systemd v240+ allows for {block,char}-MAJOR syntax. 116 prefix, err := groupPrefix(rule.Type) 117 if err != nil { 118 return nil, err 119 } 120 entry.Path = prefix + strconv.FormatInt(rule.Major, 10) 121 } else { 122 // For older systemd, "_ n:* _" rules require a device group from /proc/devices. 123 group, err := findDeviceGroup(rule.Type, rule.Major) 124 if err != nil { 125 return nil, fmt.Errorf("unable to find device '%v/%d': %w", rule.Type, rule.Major, err) 126 } 127 if group == "" { 128 // Couldn't find a group. 129 logrus.Warnf("could not find device group for '%v/%d' in /proc/devices -- temporarily ignoring rule: %v", rule.Type, rule.Major, *rule) 130 continue 131 } 132 entry.Path = group 133 } 134 } else { 135 // "_ n:m _" rules are just a path in /dev/{block,char}/. 136 switch rule.Type { 137 case devices.BlockDevice: 138 entry.Path = fmt.Sprintf("/dev/block/%d:%d", rule.Major, rule.Minor) 139 case devices.CharDevice: 140 entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor) 141 } 142 if sdVer < 240 { 143 // Old systemd versions use stat(2) on path to find out device major:minor 144 // numbers and type. If the path doesn't exist, it will not add the rule, 145 // emitting a warning instead. 146 // Since all of this logic is best-effort anyway (we manually set these 147 // rules separately to systemd) we can safely skip entries that don't 148 // have a corresponding path. 149 if _, err := os.Stat(entry.Path); err != nil { 150 continue 151 } 152 } 153 } 154 deviceAllowList = append(deviceAllowList, entry) 155 } 156 157 properties = append(properties, newProp("DeviceAllow", deviceAllowList)) 158 return properties, nil 159 } 160 161 func newProp(name string, units interface{}) systemdDbus.Property { 162 return systemdDbus.Property{ 163 Name: name, 164 Value: dbus.MakeVariant(units), 165 } 166 } 167 168 func groupPrefix(ruleType devices.Type) (string, error) { 169 switch ruleType { 170 case devices.BlockDevice: 171 return "block-", nil 172 case devices.CharDevice: 173 return "char-", nil 174 default: 175 return "", fmt.Errorf("device type %v has no group prefix", ruleType) 176 } 177 } 178 179 // findDeviceGroup tries to find the device group name (as listed in 180 // /proc/devices) with the type prefixed as required for DeviceAllow, for a 181 // given (type, major) combination. If more than one device group exists, an 182 // arbitrary one is chosen. 183 func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) { 184 fh, err := os.Open("/proc/devices") 185 if err != nil { 186 return "", err 187 } 188 defer fh.Close() 189 190 prefix, err := groupPrefix(ruleType) 191 if err != nil { 192 return "", err 193 } 194 ruleMajorStr := strconv.FormatInt(ruleMajor, 10) + " " 195 196 scanner := bufio.NewScanner(fh) 197 var currentType devices.Type 198 for scanner.Scan() { 199 // We need to strip spaces because the first number is column-aligned. 200 line := strings.TrimSpace(scanner.Text()) 201 202 // Handle the "header" lines. 203 switch line { 204 case "Block devices:": 205 currentType = devices.BlockDevice 206 continue 207 case "Character devices:": 208 currentType = devices.CharDevice 209 continue 210 case "": 211 continue 212 } 213 214 // Skip lines unrelated to our type. 215 if currentType != ruleType { 216 continue 217 } 218 219 group := strings.TrimPrefix(line, ruleMajorStr) 220 if len(group) < len(line) { // got it 221 return prefix + group, nil 222 } 223 } 224 if err := scanner.Err(); err != nil { 225 return "", fmt.Errorf("reading /proc/devices: %w", err) 226 } 227 // Couldn't find the device group. 228 return "", nil 229 } 230 231 // DeviceAllow is the dbus type "a(ss)" which means we need a struct 232 // to represent it in Go. 233 type deviceAllowEntry struct { 234 Path string 235 Perms string 236 } 237 238 func allowAllDevices() []systemdDbus.Property { 239 // Setting mode to auto and removing all DeviceAllow rules 240 // results in allowing access to all devices. 241 return []systemdDbus.Property{ 242 newProp("DevicePolicy", "auto"), 243 newProp("DeviceAllow", []deviceAllowEntry{}), 244 } 245 }