code-intelligence.com/cifuzz@v0.40.0/third-party/minijail/tools/generate_seccomp_policy.py (about) 1 #!/usr/bin/env python3 2 # -*- coding: utf-8 -*- 3 # 4 # Copyright (C) 2016 The Android Open Source Project 5 # 6 # Licensed under the Apache License, Version 2.0 (the "License"); 7 # you may not use this file except in compliance with the License. 8 # You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 # This script will take any number of trace files generated by strace(1) 19 # and output a system call filtering policy suitable for use with Minijail. 20 21 """Tool to generate a minijail seccomp filter from strace or audit output.""" 22 23 from __future__ import print_function 24 25 import argparse 26 import collections 27 import os 28 import re 29 import sys 30 31 # auparse may not be installed and is currently optional. 32 try: 33 import auparse 34 except ImportError: 35 auparse = None 36 37 38 NOTICE = """# Copyright (C) 2018 The Android Open Source Project 39 # 40 # Licensed under the Apache License, Version 2.0 (the "License"); 41 # you may not use this file except in compliance with the License. 42 # You may obtain a copy of the License at 43 # 44 # http://www.apache.org/licenses/LICENSE-2.0 45 # 46 # Unless required by applicable law or agreed to in writing, software 47 # distributed under the License is distributed on an "AS IS" BASIS, 48 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 49 # See the License for the specific language governing permissions and 50 # limitations under the License. 51 """ 52 53 ALLOW = '1' 54 55 # This ignores any leading PID tag and trailing <unfinished ...>, and extracts 56 # the syscall name and the argument list. 57 LINE_RE = re.compile(r'^\s*(?:\[[^]]*\]|\d+)?\s*([a-zA-Z0-9_]+)\(([^)<]*)') 58 59 SOCKETCALLS = { 60 'accept', 'bind', 'connect', 'getpeername', 'getsockname', 'getsockopt', 61 'listen', 'recv', 'recvfrom', 'recvmsg', 'send', 'sendmsg', 'sendto', 62 'setsockopt', 'shutdown', 'socket', 'socketpair', 63 } 64 65 # List of private ARM syscalls. These can be found in any ARM specific unistd.h 66 # such as Linux's arch/arm/include/uapi/asm/unistd.h. 67 PRIVATE_ARM_SYSCALLS = { 68 983041: 'ARM_breakpoint', 69 983042: 'ARM_cacheflush', 70 983043: 'ARM_usr26', 71 983044: 'ARM_usr32', 72 983045: 'ARM_set_tls', 73 } 74 75 ArgInspectionEntry = collections.namedtuple('ArgInspectionEntry', 76 ('arg_index', 'value_set')) 77 78 79 # pylint: disable=too-few-public-methods 80 class BucketInputFiles(argparse.Action): 81 """Buckets input files using simple content based heuristics. 82 83 Attributes: 84 audit_logs: Mutually exclusive list of audit log filenames. 85 traces: Mutually exclusive list of strace log filenames. 86 """ 87 def __call__(self, parser, namespace, values, option_string=None): 88 audit_logs = [] 89 traces = [] 90 91 strace_line_re = re.compile(r'[a-z]+[0-9]*\(.+\) += ') 92 audit_line_re = re.compile(r'type=(SYSCALL|SECCOMP)') 93 94 for filename in values: 95 if not os.path.exists(filename): 96 parser.error(f'Input file {filename} not found.') 97 with open(filename, mode='r', encoding='utf8') as input_file: 98 for line in input_file.readlines(): 99 if strace_line_re.search(line): 100 traces.append(filename) 101 break 102 if audit_line_re.search(line): 103 audit_logs.append(filename) 104 break 105 else: 106 # Treat it as an strace log to retain legacy behavior and 107 # also just in case the strace regex is imperfect. 108 traces.append(filename) 109 110 setattr(namespace, 'audit_logs', audit_logs) 111 setattr(namespace, 'traces', traces) 112 # pylint: enable=too-few-public-methods 113 114 115 def parse_args(argv): 116 """Returns the parsed CLI arguments for this tool.""" 117 parser = argparse.ArgumentParser(description=__doc__) 118 parser.add_argument('--verbose', action='store_true', 119 help='output informational messages to stderr') 120 parser.add_argument('--frequency', type=argparse.FileType('w'), 121 help='frequency file') 122 parser.add_argument('--policy', type=argparse.FileType('w'), 123 default=sys.stdout, help='policy file') 124 parser.add_argument('input-logs', action=BucketInputFiles, 125 help='strace and/or audit logs', nargs='+') 126 parser.add_argument('--audit-comm', type=str, metavar='PROCESS_NAME', 127 help='relevant process name from the audit.log files') 128 opts = parser.parse_args(argv) 129 130 if opts.audit_logs and not auparse: 131 parser.error('Python bindings for the audit subsystem were not found.\n' 132 'Please install the python3-audit (sometimes python-audit)' 133 ' package for your distro to process audit logs: ' 134 f'{opts.audit_logs}') 135 136 if opts.audit_logs and not opts.audit_comm: 137 parser.error(f'--audit-comm is required when using audit logs as input:' 138 f' {opts.audit_logs}') 139 140 if not opts.audit_logs and opts.audit_comm: 141 parser.error('--audit-comm was specified yet none of the input files ' 142 'matched our hueristic for an audit log') 143 144 return opts 145 146 147 def get_seccomp_bpf_filter(syscall, entry): 148 """Returns a minijail seccomp-bpf filter expression for the syscall.""" 149 arg_index = entry.arg_index 150 arg_values = entry.value_set 151 atoms = [] 152 if syscall in ('mmap', 'mmap2', 'mprotect') and arg_index == 2: 153 # See if there is at least one instance of any of these syscalls trying 154 # to map memory with both PROT_EXEC and PROT_WRITE. If there isn't, we 155 # can craft a concise expression to forbid this. 156 write_and_exec = set(('PROT_EXEC', 'PROT_WRITE')) 157 for arg_value in arg_values: 158 if write_and_exec.issubset(set(p.strip() for p in 159 arg_value.split('|'))): 160 break 161 else: 162 atoms.extend(['arg2 in ~PROT_EXEC', 'arg2 in ~PROT_WRITE']) 163 arg_values = set() 164 atoms.extend(f'arg{arg_index} == {arg_value}' for arg_value in arg_values) 165 return ' || '.join(atoms) 166 167 168 def parse_trace_file(trace_filename, syscalls, arg_inspection): 169 """Parses one file produced by strace.""" 170 uses_socketcall = ('i386' in trace_filename or 171 ('x86' in trace_filename and 172 '64' not in trace_filename)) 173 174 with open(trace_filename, encoding='utf8') as trace_file: 175 for line in trace_file: 176 matches = LINE_RE.match(line) 177 if not matches: 178 continue 179 180 syscall, args = matches.groups() 181 if uses_socketcall and syscall in SOCKETCALLS: 182 syscall = 'socketcall' 183 184 # strace omits the 'ARM_' prefix on all private ARM syscalls. Add 185 # it manually here as a workaround. These syscalls are exclusive 186 # to ARM so we don't need to predicate this on a trace_filename 187 # based heuristic for the arch. 188 if f'ARM_{syscall}' in PRIVATE_ARM_SYSCALLS.values(): 189 syscall = f'ARM_{syscall}' 190 191 syscalls[syscall] += 1 192 193 args = [arg.strip() for arg in args.split(',')] 194 195 if syscall in arg_inspection: 196 arg_value = args[arg_inspection[syscall].arg_index] 197 arg_inspection[syscall].value_set.add(arg_value) 198 199 200 def parse_audit_log(audit_log, audit_comm, syscalls, arg_inspection): 201 """Parses one audit.log file generated by the Linux audit subsystem.""" 202 203 unknown_syscall_re = re.compile(r'unknown-syscall\((?P<syscall_num>\d+)\)') 204 205 au = auparse.AuParser(auparse.AUSOURCE_FILE, audit_log) 206 # Quick validity check for whether this parses as a valid audit log. The 207 # first event should have at least one record. 208 if not au.first_record(): 209 raise ValueError(f'Unable to parse audit log file {audit_log.name}') 210 211 # Iterate through events where _any_ contained record matches 212 # ((type == SECCOMP || type == SYSCALL) && comm == audit_comm). 213 au.search_add_item('type', '=', 'SECCOMP', auparse.AUSEARCH_RULE_CLEAR) 214 au.search_add_item('type', '=', 'SYSCALL', auparse.AUSEARCH_RULE_OR) 215 au.search_add_item('comm', '=', f'"{audit_comm}"', 216 auparse.AUSEARCH_RULE_AND) 217 218 # auparse_find_field(3) will ignore preceding fields in the record and 219 # at the same time happily cross record boundaries when looking for the 220 # field. This helper method always seeks the cursor back to the first 221 # field in the record and stops searching before crossing over to the 222 # next record; making the search far less error prone. 223 # Also implicitly seeks the internal 'cursor' to the matching field 224 # for any subsequent calls like auparse_interpret_field. 225 def _find_field_in_current_record(name): 226 au.first_field() 227 while True: 228 if au.get_field_name() == name: 229 return au.get_field_str() 230 if not au.next_field(): 231 return None 232 233 while au.search_next_event(): 234 # The event may have multiple records. Loop through all. 235 au.first_record() 236 for _ in range(au.get_num_records()): 237 event_type = _find_field_in_current_record('type') 238 comm = _find_field_in_current_record('comm') 239 # Some of the records in this event may not be relevant 240 # despite the event-specific search filter. Skip those. 241 if (event_type not in ('SECCOMP', 'SYSCALL') or 242 comm != f'"{audit_comm}"'): 243 au.next_record() 244 continue 245 246 if not _find_field_in_current_record('syscall'): 247 raise ValueError(f'Could not find field "syscall" in event of ' 248 f'type {event_type}') 249 # Intepret the syscall field that's under our 'cursor' following the 250 # find. Interpreting fields yields human friendly names instead 251 # of integers. E.g '16' -> 'ioctl'. 252 syscall = au.interpret_field() 253 254 # TODO(crbug/1172449): Add these syscalls to upstream 255 # audit-userspace and remove this workaround. 256 # This is redundant but safe for non-ARM architectures due to the 257 # disjoint set of private syscall numbers. 258 match = unknown_syscall_re.match(syscall) 259 if match: 260 syscall_num = int(match.group('syscall_num')) 261 syscall = PRIVATE_ARM_SYSCALLS.get(syscall_num, syscall) 262 263 if ((syscall in arg_inspection and event_type == 'SECCOMP') or 264 (syscall not in arg_inspection and event_type == 'SYSCALL')): 265 # Skip SECCOMP records for syscalls that require argument 266 # inspection. Similarly, skip SYSCALL records for syscalls 267 # that do not require argument inspection. Technically such 268 # records wouldn't exist per our setup instructions but audit 269 # sometimes lets a few records slip through. 270 au.next_record() 271 continue 272 elif event_type == 'SYSCALL': 273 arg_field_name = f'a{arg_inspection[syscall].arg_index}' 274 if not _find_field_in_current_record(arg_field_name): 275 raise ValueError(f'Could not find field "{arg_field_name}"' 276 f'in event of type {event_type}') 277 # Intepret the arg field that's under our 'cursor' following the 278 # find. This may yield a more human friendly name. 279 # E.g '5401' -> 'TCGETS'. 280 arg_inspection[syscall].value_set.add(au.interpret_field()) 281 282 syscalls[syscall] += 1 283 au.next_record() 284 285 286 def main(argv=None): 287 """Main entrypoint.""" 288 289 if argv is None: 290 argv = sys.argv[1:] 291 292 opts = parse_args(argv) 293 294 syscalls = collections.defaultdict(int) 295 296 arg_inspection = { 297 'socket': ArgInspectionEntry(0, set([])), # int domain 298 'ioctl': ArgInspectionEntry(1, set([])), # int request 299 'prctl': ArgInspectionEntry(0, set([])), # int option 300 'mmap': ArgInspectionEntry(2, set([])), # int prot 301 'mmap2': ArgInspectionEntry(2, set([])), # int prot 302 'mprotect': ArgInspectionEntry(2, set([])), # int prot 303 } 304 305 if opts.verbose: 306 # Print an informational message to stderr in case the filetype detection 307 # heuristics are wonky. 308 print('Generating a seccomp policy using these input files:', 309 file=sys.stderr) 310 print(f'Strace logs: {opts.traces}', file=sys.stderr) 311 print(f'Audit logs: {opts.audit_logs}', file=sys.stderr) 312 313 for trace_filename in opts.traces: 314 parse_trace_file(trace_filename, syscalls, arg_inspection) 315 316 for audit_log in opts.audit_logs: 317 parse_audit_log(audit_log, opts.audit_comm, syscalls, arg_inspection) 318 319 # Add the basic set if they are not yet present. 320 basic_set = [ 321 'restart_syscall', 'exit', 'exit_group', 'rt_sigreturn', 322 ] 323 for basic_syscall in basic_set: 324 if basic_syscall not in syscalls: 325 syscalls[basic_syscall] = 1 326 327 # If a frequency file isn't used then sort the syscalls based on frequency 328 # to make the common case fast (by checking frequent calls earlier). 329 # Otherwise, sort alphabetically to make it easier for humans to see which 330 # calls are in use (and if necessary manually add a new syscall to the 331 # list). 332 if opts.frequency is None: 333 sorted_syscalls = list( 334 x[0] for x in sorted(syscalls.items(), key=lambda pair: pair[1], 335 reverse=True) 336 ) 337 else: 338 sorted_syscalls = list( 339 x[0] for x in sorted(syscalls.items(), key=lambda pair: pair[0]) 340 ) 341 342 print(NOTICE, file=opts.policy) 343 if opts.frequency is not None: 344 print(NOTICE, file=opts.frequency) 345 346 for syscall in sorted_syscalls: 347 if syscall in arg_inspection: 348 arg_filter = get_seccomp_bpf_filter(syscall, 349 arg_inspection[syscall]) 350 else: 351 arg_filter = ALLOW 352 print(f'{syscall}: {arg_filter}', file=opts.policy) 353 if opts.frequency is not None: 354 print(f'{syscall}: {syscalls[syscall]}', file=opts.frequency) 355 356 357 if __name__ == '__main__': 358 sys.exit(main(sys.argv[1:]))