github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/cgroups/devices/devicefilter.go (about) 1 // Implements creation of eBPF device filter program. 2 // 3 // Based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c 4 // 5 // Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano) 6 // agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397 7 package devices 8 9 import ( 10 "errors" 11 "fmt" 12 "math" 13 "strconv" 14 15 "github.com/cilium/ebpf/asm" 16 "github.com/opencontainers/runc/libcontainer/devices" 17 "golang.org/x/sys/unix" 18 ) 19 20 const ( 21 // license string format is same as kernel MODULE_LICENSE macro 22 license = "Apache" 23 ) 24 25 // deviceFilter returns eBPF device filter program and its license string. 26 func deviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) { 27 // Generate the minimum ruleset for the device rules we are given. While we 28 // don't care about minimum transitions in cgroupv2, using the emulator 29 // gives us a guarantee that the behaviour of devices filtering is the same 30 // as cgroupv1, including security hardenings to avoid misconfiguration 31 // (such as punching holes in wildcard rules). 32 emu := new(emulator) 33 for _, rule := range rules { 34 if err := emu.Apply(*rule); err != nil { 35 return nil, "", err 36 } 37 } 38 cleanRules, err := emu.Rules() 39 if err != nil { 40 return nil, "", err 41 } 42 43 p := &program{ 44 defaultAllow: emu.IsBlacklist(), 45 } 46 p.init() 47 48 for idx, rule := range cleanRules { 49 if rule.Type == devices.WildcardDevice { 50 // We can safely skip over wildcard entries because there should 51 // only be one (at most) at the very start to instruct cgroupv1 to 52 // go into allow-list mode. However we do double-check this here. 53 if idx != 0 || rule.Allow != emu.IsBlacklist() { 54 return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString()) 55 } 56 continue 57 } 58 if rule.Allow == p.defaultAllow { 59 // There should be no rules which have an action equal to the 60 // default action, the emulator removes those. 61 return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString()) 62 } 63 if err := p.appendRule(rule); err != nil { 64 return nil, "", err 65 } 66 } 67 return p.finalize(), license, nil 68 } 69 70 type program struct { 71 insts asm.Instructions 72 defaultAllow bool 73 blockID int 74 } 75 76 func (p *program) init() { 77 // struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423 78 /* 79 u32 access_type 80 u32 major 81 u32 minor 82 */ 83 // R2 <- type (lower 16 bit of u32 access_type at R1[0]) 84 p.insts = append(p.insts, 85 asm.LoadMem(asm.R2, asm.R1, 0, asm.Word), 86 asm.And.Imm32(asm.R2, 0xFFFF)) 87 88 // R3 <- access (upper 16 bit of u32 access_type at R1[0]) 89 p.insts = append(p.insts, 90 asm.LoadMem(asm.R3, asm.R1, 0, asm.Word), 91 // RSh: bitwise shift right 92 asm.RSh.Imm32(asm.R3, 16)) 93 94 // R4 <- major (u32 major at R1[4]) 95 p.insts = append(p.insts, 96 asm.LoadMem(asm.R4, asm.R1, 4, asm.Word)) 97 98 // R5 <- minor (u32 minor at R1[8]) 99 p.insts = append(p.insts, 100 asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) 101 } 102 103 // appendRule rule converts an OCI rule to the relevant eBPF block and adds it 104 // to the in-progress filter program. In order to operate properly, it must be 105 // called with a "clean" rule list (generated by devices.Emulator.Rules() -- 106 // with any "a" rules removed). 107 func (p *program) appendRule(rule *devices.Rule) error { 108 if p.blockID < 0 { 109 return errors.New("the program is finalized") 110 } 111 112 var bpfType int32 113 switch rule.Type { 114 case devices.CharDevice: 115 bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) 116 case devices.BlockDevice: 117 bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) 118 default: 119 // We do not permit 'a', nor any other types we don't know about. 120 return fmt.Errorf("invalid type %q", string(rule.Type)) 121 } 122 if rule.Major > math.MaxUint32 { 123 return fmt.Errorf("invalid major %d", rule.Major) 124 } 125 if rule.Minor > math.MaxUint32 { 126 return fmt.Errorf("invalid minor %d", rule.Major) 127 } 128 hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1 129 hasMinor := rule.Minor >= 0 130 bpfAccess := int32(0) 131 for _, r := range rule.Permissions { 132 switch r { 133 case 'r': 134 bpfAccess |= unix.BPF_DEVCG_ACC_READ 135 case 'w': 136 bpfAccess |= unix.BPF_DEVCG_ACC_WRITE 137 case 'm': 138 bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD 139 default: 140 return fmt.Errorf("unknown device access %v", r) 141 } 142 } 143 // If the access is rwm, skip the check. 144 hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD) 145 146 var ( 147 blockSym = "block-" + strconv.Itoa(p.blockID) 148 nextBlockSym = "block-" + strconv.Itoa(p.blockID+1) 149 prevBlockLastIdx = len(p.insts) - 1 150 ) 151 p.insts = append(p.insts, 152 // if (R2 != bpfType) goto next 153 asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), 154 ) 155 if hasAccess { 156 p.insts = append(p.insts, 157 // if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next 158 asm.Mov.Reg32(asm.R1, asm.R3), 159 asm.And.Imm32(asm.R1, bpfAccess), 160 asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym), 161 ) 162 } 163 if hasMajor { 164 p.insts = append(p.insts, 165 // if (R4 != major) goto next 166 asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym), 167 ) 168 } 169 if hasMinor { 170 p.insts = append(p.insts, 171 // if (R5 != minor) goto next 172 asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym), 173 ) 174 } 175 p.insts = append(p.insts, acceptBlock(rule.Allow)...) 176 // set blockSym to the first instruction we added in this iteration 177 p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].WithSymbol(blockSym) 178 p.blockID++ 179 return nil 180 } 181 182 func (p *program) finalize() asm.Instructions { 183 var v int32 184 if p.defaultAllow { 185 v = 1 186 } 187 blockSym := "block-" + strconv.Itoa(p.blockID) 188 p.insts = append(p.insts, 189 // R0 <- v 190 asm.Mov.Imm32(asm.R0, v).WithSymbol(blockSym), 191 asm.Return(), 192 ) 193 p.blockID = -1 194 return p.insts 195 } 196 197 func acceptBlock(accept bool) asm.Instructions { 198 var v int32 199 if accept { 200 v = 1 201 } 202 return []asm.Instruction{ 203 // R0 <- v 204 asm.Mov.Imm32(asm.R0, v), 205 asm.Return(), 206 } 207 }