code-intelligence.com/cifuzz@v0.40.0/third-party/minijail/tools/parser.py (about)

     1  #!/usr/bin/env python3
     2  # -*- coding: utf-8 -*-
     3  #
     4  # Copyright (C) 2018 The Android Open Source Project
     5  #
     6  # Licensed under the Apache License, Version 2.0 (the "License");
     7  # you may not use this file except in compliance with the License.
     8  # You may obtain a copy of the License at
     9  #
    10  #      http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  # Unless required by applicable law or agreed to in writing, software
    13  # distributed under the License is distributed on an "AS IS" BASIS,
    14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  # See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  """A parser for the Minijail policy file."""
    18  
    19  from __future__ import absolute_import
    20  from __future__ import division
    21  from __future__ import print_function
    22  
    23  import collections
    24  import itertools
    25  import os.path
    26  import re
    27  
    28  try:
    29      import bpf
    30  except ImportError:
    31      from minijail import bpf
    32  
    33  
    34  Token = collections.namedtuple(
    35      'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
    36  
    37  # A regex that can tokenize a Minijail policy file line.
    38  _TOKEN_SPECIFICATION = (
    39      ('COMMENT', r'#.*$'),
    40      ('WHITESPACE', r'\s+'),
    41      ('CONTINUATION', r'\\$'),
    42      ('DEFAULT', r'@default\b'),
    43      ('INCLUDE', r'@include\b'),
    44      ('FREQUENCY', r'@frequency\b'),
    45      ('DENYLIST', r'@denylist$'),
    46      ('PATH', r'(?:\.)?/\S+'),
    47      ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
    48      ('COLON', r':'),
    49      ('SEMICOLON', r';'),
    50      ('COMMA', r','),
    51      ('BITWISE_COMPLEMENT', r'~'),
    52      ('LPAREN', r'\('),
    53      ('RPAREN', r'\)'),
    54      ('LBRACE', r'\{'),
    55      ('RBRACE', r'\}'),
    56      ('RBRACKET', r'\]'),
    57      ('LBRACKET', r'\['),
    58      ('OR', r'\|\|'),
    59      ('AND', r'&&'),
    60      ('BITWISE_OR', r'\|'),
    61      ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
    62      ('EQUAL', r'='),
    63      ('ARGUMENT', r'\barg[0-9]+\b'),
    64      ('RETURN', r'\breturn\b'),
    65      ('ACTION',
    66       r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
    67       r'\btrace\b|\blog\b|\buser-notify\b'
    68      ),
    69      ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
    70  )
    71  _TOKEN_RE = re.compile('|'.join(
    72      r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
    73  
    74  
    75  class ParseException(Exception):
    76      """An exception that is raised when parsing fails."""
    77  
    78      # pylint: disable=too-many-arguments
    79      def __init__(self,
    80                   message,
    81                   filename,
    82                   *,
    83                   line='',
    84                   line_number=1,
    85                   token=None):
    86          if token:
    87              line = token.line
    88              line_number = token.line_number
    89              column = token.column
    90              length = len(token.value)
    91          else:
    92              column = len(line)
    93              length = 1
    94  
    95          message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
    96                                         message)
    97          message += '\n    %s' % line
    98          message += '\n    %s%s' % (' ' * column, '^' * length)
    99          super().__init__(message)
   100  
   101  
   102  class ParserState:
   103      """Stores the state of the Parser to provide better diagnostics."""
   104  
   105      def __init__(self, filename):
   106          self._filename = filename
   107          self._line = ''
   108          self._line_number = 0
   109  
   110      @property
   111      def filename(self):
   112          """Return the name of the file being processed."""
   113          return self._filename
   114  
   115      @property
   116      def line(self):
   117          """Return the current line being processed."""
   118          return self._line
   119  
   120      @property
   121      def line_number(self):
   122          """Return the current line number being processed."""
   123          return self._line_number
   124  
   125      def error(self, message, token=None):
   126          """Raise a ParserException with the provided message."""
   127          raise ParseException(
   128              message,
   129              self.filename,
   130              line=self._line,
   131              line_number=self._line_number,
   132              token=token)
   133  
   134      def tokenize(self, lines):
   135          """Return a list of tokens for the current line."""
   136          tokens = []
   137  
   138          for line_number, line in enumerate(lines):
   139              self._line_number = line_number + 1
   140              self._line = line.rstrip('\r\n')
   141  
   142              last_end = 0
   143              for token in _TOKEN_RE.finditer(self._line):
   144                  if token.start() != last_end:
   145                      self.error(
   146                          'invalid token',
   147                          token=Token('INVALID',
   148                                      self._line[last_end:token.start()],
   149                                      self.filename, self._line,
   150                                      self._line_number, last_end))
   151                  last_end = token.end()
   152  
   153                  # Omit whitespace and comments now to avoid sprinkling this logic
   154                  # elsewhere.
   155                  if token.lastgroup in ('WHITESPACE', 'COMMENT',
   156                                         'CONTINUATION'):
   157                      continue
   158                  tokens.append(
   159                      Token(token.lastgroup, token.group(), self.filename,
   160                            self._line, self._line_number, token.start()))
   161              if last_end != len(self._line):
   162                  self.error(
   163                      'invalid token',
   164                      token=Token('INVALID', self._line[last_end:],
   165                                  self.filename, self._line, self._line_number,
   166                                  last_end))
   167  
   168              if self._line.endswith('\\'):
   169                  # This line is not finished yet.
   170                  continue
   171  
   172              if tokens:
   173                  # Return a copy of the token list so that the caller can be free
   174                  # to modify it.
   175                  yield tokens[::]
   176              tokens.clear()
   177  
   178  
   179  Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
   180  """A single boolean comparison within a filter expression."""
   181  
   182  Filter = collections.namedtuple('Filter', ['expression', 'action'])
   183  """The result of parsing a DNF filter expression, with its action.
   184  
   185  Since the expression is in Disjunctive Normal Form, it is composed of two levels
   186  of lists, one for disjunctions and the inner one for conjunctions. The elements
   187  of the inner list are Atoms.
   188  """
   189  
   190  Syscall = collections.namedtuple('Syscall', ['name', 'number'])
   191  """A system call."""
   192  
   193  ParsedFilterStatement = collections.namedtuple(
   194      'ParsedFilterStatement', ['syscalls', 'filters', 'token'])
   195  """The result of parsing a filter statement.
   196  
   197  Statements have a list of syscalls, and an associated list of filters that will
   198  be evaluated sequentially when any of the syscalls is invoked.
   199  """
   200  
   201  FilterStatement = collections.namedtuple('FilterStatement',
   202                                           ['syscall', 'frequency', 'filters'])
   203  """The filter list for a particular syscall.
   204  
   205  This is a mapping from one syscall to a list of filters that are evaluated
   206  sequentially. The last filter is always an unconditional action.
   207  """
   208  
   209  ParsedPolicy = collections.namedtuple('ParsedPolicy',
   210                                        ['default_action', 'filter_statements'])
   211  """The result of parsing a minijail .policy file."""
   212  
   213  
   214  # pylint: disable=too-few-public-methods
   215  class PolicyParser:
   216      """A parser for the Minijail seccomp policy file format."""
   217  
   218      def __init__(self,
   219                   arch,
   220                   *,
   221                   kill_action,
   222                   include_depth_limit=10,
   223                   override_default_action=None,
   224                   denylist=False):
   225          self._parser_states = [ParserState("<memory>")]
   226          self._kill_action = kill_action
   227          self._include_depth_limit = include_depth_limit
   228          if denylist:
   229              self._default_action = bpf.Allow()
   230          else:
   231              self._default_action = self._kill_action
   232          self._override_default_action = override_default_action
   233          self._frequency_mapping = collections.defaultdict(int)
   234          self._arch = arch
   235          self._denylist = denylist
   236  
   237      @property
   238      def _parser_state(self):
   239          return self._parser_states[-1]
   240  
   241      # single-constant = identifier
   242      #                 | numeric-constant
   243      #                 ;
   244      def _parse_single_constant(self, token):
   245          if token.type == 'IDENTIFIER':
   246              if token.value not in self._arch.constants:
   247                  self._parser_state.error('invalid constant', token=token)
   248              single_constant = self._arch.constants[token.value]
   249          elif token.type == 'NUMERIC_CONSTANT':
   250              try:
   251                  single_constant = int(token.value, base=0)
   252              except ValueError:
   253                  self._parser_state.error('invalid constant', token=token)
   254          else:
   255              self._parser_state.error('invalid constant', token=token)
   256          if single_constant > self._arch.max_unsigned:
   257              self._parser_state.error('unsigned overflow', token=token)
   258          elif single_constant < self._arch.min_signed:
   259              self._parser_state.error('signed underflow', token=token)
   260          elif single_constant < 0:
   261              # This converts the constant to an unsigned representation of the
   262              # same value, since BPF only uses unsigned values.
   263              single_constant = self._arch.truncate_word(single_constant)
   264          return single_constant
   265  
   266      # constant = [ '~' ] , '(' , value , ')'
   267      #          | [ '~' ] , single-constant
   268      #          ;
   269      def _parse_constant(self, tokens):
   270          negate = False
   271          if tokens[0].type == 'BITWISE_COMPLEMENT':
   272              negate = True
   273              tokens.pop(0)
   274              if not tokens:
   275                  self._parser_state.error('empty complement')
   276              if tokens[0].type == 'BITWISE_COMPLEMENT':
   277                  self._parser_state.error(
   278                      'invalid double complement', token=tokens[0])
   279          if tokens[0].type == 'LPAREN':
   280              last_open_paren = tokens.pop(0)
   281              single_value = self.parse_value(tokens)
   282              if not tokens or tokens[0].type != 'RPAREN':
   283                  self._parser_state.error(
   284                      'unclosed parenthesis', token=last_open_paren)
   285          else:
   286              single_value = self._parse_single_constant(tokens[0])
   287          tokens.pop(0)
   288          if negate:
   289              single_value = self._arch.truncate_word(~single_value)
   290          return single_value
   291  
   292      # value = constant , [ { '|' , constant } ]
   293      #       ;
   294      def parse_value(self, tokens):
   295          """Parse constants separated bitwise OR operator |.
   296  
   297          Constants can be:
   298  
   299          - A number that can be parsed with int(..., base=0)
   300          - A named constant expression.
   301          - A parenthesized, valid constant expression.
   302          - A valid constant expression prefixed with the unary bitwise
   303            complement operator ~.
   304          - A series of valid constant expressions separated by bitwise
   305            OR operator |.
   306  
   307          If there is an error parsing any of the constants, the whole process
   308          fails.
   309          """
   310  
   311          value = 0
   312          while tokens:
   313              value |= self._parse_constant(tokens)
   314              if not tokens or tokens[0].type != 'BITWISE_OR':
   315                  break
   316              tokens.pop(0)
   317          else:
   318              self._parser_state.error('empty constant')
   319          return value
   320  
   321      # atom = argument , op , value
   322      #      ;
   323      def _parse_atom(self, tokens):
   324          if not tokens:
   325              self._parser_state.error('missing argument')
   326          argument = tokens.pop(0)
   327          if argument.type != 'ARGUMENT':
   328              self._parser_state.error('invalid argument', token=argument)
   329  
   330          if not tokens:
   331              self._parser_state.error('missing operator')
   332          operator = tokens.pop(0)
   333          if operator.type != 'OP':
   334              self._parser_state.error('invalid operator', token=operator)
   335  
   336          value = self.parse_value(tokens)
   337          argument_index = int(argument.value[3:])
   338          if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
   339              self._parser_state.error('invalid argument', token=argument)
   340          return Atom(argument_index, operator.value, value)
   341  
   342      # clause = atom , [ { '&&' , atom } ]
   343      #        ;
   344      def _parse_clause(self, tokens):
   345          atoms = []
   346          while tokens:
   347              atoms.append(self._parse_atom(tokens))
   348              if not tokens or tokens[0].type != 'AND':
   349                  break
   350              tokens.pop(0)
   351          else:
   352              self._parser_state.error('empty clause')
   353          return atoms
   354  
   355      # argument-expression = clause , [ { '||' , clause } ]
   356      #                   ;
   357      def parse_argument_expression(self, tokens):
   358          """Parse a argument expression in Disjunctive Normal Form.
   359  
   360          Since BPF disallows back jumps, we build the basic blocks in reverse
   361          order so that all the jump targets are known by the time we need to
   362          reference them.
   363          """
   364  
   365          clauses = []
   366          while tokens:
   367              clauses.append(self._parse_clause(tokens))
   368              if not tokens or tokens[0].type != 'OR':
   369                  break
   370              tokens.pop(0)
   371          else:
   372              self._parser_state.error('empty argument expression')
   373          return clauses
   374  
   375      # default-action = 'kill-process'
   376      #                | 'kill-thread'
   377      #                | 'kill'
   378      #                | 'trap'
   379      #                | 'user-notify'
   380      #                ;
   381      def _parse_default_action(self, tokens):
   382          if not tokens:
   383              self._parser_state.error('missing default action')
   384          action_token = tokens.pop(0)
   385          if action_token.type != 'ACTION':
   386              return self._parser_state.error(
   387                  'invalid default action', token=action_token)
   388          if action_token.value == 'kill-process':
   389              return bpf.KillProcess()
   390          if action_token.value == 'kill-thread':
   391              return bpf.KillThread()
   392          if action_token.value == 'kill':
   393              return self._kill_action
   394          if action_token.value == 'trap':
   395              return bpf.Trap()
   396          if action_token.value == 'user-notify':
   397              return bpf.UserNotify()
   398          return self._parser_state.error(
   399              'invalid permissive default action', token=action_token)
   400  
   401      # action = 'allow' | '1'
   402      #        | 'kill-process'
   403      #        | 'kill-thread'
   404      #        | 'kill'
   405      #        | 'trap'
   406      #        | 'trace'
   407      #        | 'log'
   408      #        | 'user-notify'
   409      #        | 'return' , single-constant
   410      #        ;
   411      def parse_action(self, tokens):
   412          if not tokens:
   413              self._parser_state.error('missing action')
   414          action_token = tokens.pop(0)
   415          # denylist policies must specify a return for every line.
   416          if self._denylist:
   417              if action_token.type != 'RETURN':
   418                  self._parser_state.error('invalid denylist policy')
   419  
   420          if action_token.type == 'ACTION':
   421              if action_token.value == 'allow':
   422                  return bpf.Allow()
   423              if action_token.value == 'kill':
   424                  return self._kill_action
   425              if action_token.value == 'kill-process':
   426                  return bpf.KillProcess()
   427              if action_token.value == 'kill-thread':
   428                  return bpf.KillThread()
   429              if action_token.value == 'trap':
   430                  return bpf.Trap()
   431              if action_token.value == 'trace':
   432                  return bpf.Trace()
   433              if action_token.value == 'user-notify':
   434                  return bpf.UserNotify()
   435              if action_token.value == 'log':
   436                  return bpf.Log()
   437          elif action_token.type == 'NUMERIC_CONSTANT':
   438              constant = self._parse_single_constant(action_token)
   439              if constant == 1:
   440                  return bpf.Allow()
   441          elif action_token.type == 'RETURN':
   442              if not tokens:
   443                  self._parser_state.error('missing return value')
   444              return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
   445          return self._parser_state.error('invalid action', token=action_token)
   446  
   447      # single-filter = action
   448      #               | argument-expression , [ ';' , action ]
   449      #               | '!','(', argument-expression, [ ';', action ], ')'
   450      #               ;
   451      def _parse_single_filter(self, tokens):
   452          if not tokens:
   453              self._parser_state.error('missing filter')
   454          if tokens[0].type == 'ARGUMENT':
   455  	    # Only argument expressions can start with an ARGUMENT token.
   456              argument_expression = self.parse_argument_expression(tokens)
   457              if tokens and tokens[0].type == 'SEMICOLON':
   458                  tokens.pop(0)
   459                  action = self.parse_action(tokens)
   460              else:
   461                  action = bpf.Allow()
   462              return Filter(argument_expression, action)
   463          else:
   464              return Filter(None, self.parse_action(tokens))
   465  
   466      # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
   467      #        | single-filter
   468      #        ;
   469      def parse_filter(self, tokens):
   470          """Parse a filter and return a list of Filter objects."""
   471          if not tokens:
   472              self._parser_state.error('missing filter')
   473          filters = []
   474          if tokens[0].type == 'LBRACE':
   475              opening_brace = tokens.pop(0)
   476              while tokens:
   477                  filters.append(self._parse_single_filter(tokens))
   478                  if not tokens or tokens[0].type != 'COMMA':
   479                      break
   480                  tokens.pop(0)
   481              if not tokens or tokens[0].type != 'RBRACE':
   482                  self._parser_state.error('unclosed brace', token=opening_brace)
   483              tokens.pop(0)
   484          else:
   485              filters.append(self._parse_single_filter(tokens))
   486          return filters
   487  
   488      # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
   489      #                ;
   490      def _parse_key_value_pair(self, tokens):
   491          if not tokens:
   492              self._parser_state.error('missing key')
   493          key = tokens.pop(0)
   494          if key.type != 'IDENTIFIER':
   495              self._parser_state.error('invalid key', token=key)
   496          if not tokens:
   497              self._parser_state.error('missing equal')
   498          if tokens[0].type != 'EQUAL':
   499              self._parser_state.error('invalid equal', token=tokens[0])
   500          tokens.pop(0)
   501          value_list = []
   502          while tokens:
   503              value = tokens.pop(0)
   504              if value.type != 'IDENTIFIER':
   505                  self._parser_state.error('invalid value', token=value)
   506              value_list.append(value.value)
   507              if not tokens or tokens[0].type != 'COMMA':
   508                  break
   509              tokens.pop(0)
   510          else:
   511              self._parser_state.error('empty value')
   512          return (key.value, value_list)
   513  
   514      # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
   515      #          ;
   516      def _parse_metadata(self, tokens):
   517          if not tokens:
   518              self._parser_state.error('missing opening bracket')
   519          opening_bracket = tokens.pop(0)
   520          if opening_bracket.type != 'LBRACKET':
   521              self._parser_state.error(
   522                  'invalid opening bracket', token=opening_bracket)
   523          metadata = {}
   524          while tokens:
   525              first_token = tokens[0]
   526              key, value = self._parse_key_value_pair(tokens)
   527              if key in metadata:
   528                  self._parser_state.error(
   529                      'duplicate metadata key: "%s"' % key, token=first_token)
   530              metadata[key] = value
   531              if not tokens or tokens[0].type != 'SEMICOLON':
   532                  break
   533              tokens.pop(0)
   534          if not tokens or tokens[0].type != 'RBRACKET':
   535              self._parser_state.error('unclosed bracket', token=opening_bracket)
   536          tokens.pop(0)
   537          return metadata
   538  
   539      # syscall-descriptor = syscall-name , [ metadata ]
   540      #                    | syscall-group-name , [ metadata ]
   541      #                    ;
   542      def _parse_syscall_descriptor(self, tokens):
   543          if not tokens:
   544              self._parser_state.error('missing syscall descriptor')
   545          syscall_descriptor = tokens.pop(0)
   546          # `kill` as a syscall name is a special case since kill is also a valid
   547          # action and actions have precendence over identifiers.
   548          if (syscall_descriptor.type != 'IDENTIFIER' and
   549              syscall_descriptor.value != 'kill'):
   550              self._parser_state.error(
   551                  'invalid syscall descriptor', token=syscall_descriptor)
   552          if tokens and tokens[0].type == 'LBRACKET':
   553              metadata = self._parse_metadata(tokens)
   554              if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
   555                  return ()
   556          if '@' in syscall_descriptor.value:
   557              # This is a syscall group.
   558              subtokens = syscall_descriptor.value.split('@')
   559              if len(subtokens) != 2:
   560                  self._parser_state.error(
   561                      'invalid syscall group name', token=syscall_descriptor)
   562              syscall_group_name, syscall_namespace_name = subtokens
   563              if syscall_namespace_name not in self._arch.syscall_groups:
   564                  self._parser_state.error(
   565                      'nonexistent syscall group namespace',
   566                      token=syscall_descriptor)
   567              syscall_namespace = self._arch.syscall_groups[
   568                  syscall_namespace_name]
   569              if syscall_group_name not in syscall_namespace:
   570                  self._parser_state.error(
   571                      'nonexistent syscall group', token=syscall_descriptor)
   572              return (Syscall(name, self._arch.syscalls[name])
   573                      for name in syscall_namespace[syscall_group_name])
   574          if syscall_descriptor.value not in self._arch.syscalls:
   575              self._parser_state.error(
   576                  'nonexistent syscall', token=syscall_descriptor)
   577          return (Syscall(syscall_descriptor.value,
   578                          self._arch.syscalls[syscall_descriptor.value]), )
   579  
   580      # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
   581      #                       ':' , filter
   582      #                  | syscall-descriptor , ':' , filter
   583      #                  ;
   584      def parse_filter_statement(self, tokens):
   585          """Parse a filter statement and return a ParsedFilterStatement."""
   586          if not tokens:
   587              self._parser_state.error('empty filter statement')
   588          syscall_descriptors = []
   589          if tokens[0].type == 'LBRACE':
   590              opening_brace = tokens.pop(0)
   591              while tokens:
   592                  syscall_descriptors.extend(
   593                      self._parse_syscall_descriptor(tokens))
   594                  if not tokens or tokens[0].type != 'COMMA':
   595                      break
   596                  tokens.pop(0)
   597              if not tokens or tokens[0].type != 'RBRACE':
   598                  self._parser_state.error('unclosed brace', token=opening_brace)
   599              tokens.pop(0)
   600          else:
   601              syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
   602          if not tokens:
   603              self._parser_state.error('missing colon')
   604          if tokens[0].type != 'COLON':
   605              self._parser_state.error('invalid colon', token=tokens[0])
   606          # Given that there can be multiple syscalls and filters in a single
   607          # filter statement, use the colon token as the anchor for error location
   608          # purposes.
   609          colon_token = tokens.pop(0)
   610          parsed_filter = self.parse_filter(tokens)
   611          if not syscall_descriptors:
   612              return None
   613          return ParsedFilterStatement(
   614              tuple(syscall_descriptors), parsed_filter, colon_token)
   615  
   616      # include-statement = '@include' , posix-path
   617      #                   ;
   618      def _parse_include_statement(self, tokens):
   619          if not tokens:
   620              self._parser_state.error('empty filter statement')
   621          if tokens[0].type != 'INCLUDE':
   622              self._parser_state.error('invalid include', token=tokens[0])
   623          tokens.pop(0)
   624          if not tokens:
   625              self._parser_state.error('empty include path')
   626          include_path = tokens.pop(0)
   627          if include_path.type != 'PATH':
   628              self._parser_state.error(
   629                  'invalid include path', token=include_path)
   630          if len(self._parser_states) == self._include_depth_limit:
   631              self._parser_state.error('@include statement nested too deep')
   632          include_filename = os.path.normpath(
   633              os.path.join(
   634                  os.path.dirname(self._parser_state.filename),
   635                  include_path.value))
   636          if not os.path.isfile(include_filename):
   637              self._parser_state.error(
   638                  'Could not @include %s' % include_filename, token=include_path)
   639          return self._parse_policy_file(include_filename)
   640  
   641      def _parse_frequency_file(self, filename):
   642          self._parser_states.append(ParserState(filename))
   643          try:
   644              frequency_mapping = collections.defaultdict(int)
   645              with open(filename) as frequency_file:
   646                  for tokens in self._parser_state.tokenize(frequency_file):
   647                      syscall_numbers = self._parse_syscall_descriptor(tokens)
   648                      if not tokens:
   649                          self._parser_state.error('missing colon')
   650                      if tokens[0].type != 'COLON':
   651                          self._parser_state.error(
   652                              'invalid colon', token=tokens[0])
   653                      tokens.pop(0)
   654  
   655                      if not tokens:
   656                          self._parser_state.error('missing number')
   657                      number = tokens.pop(0)
   658                      if number.type != 'NUMERIC_CONSTANT':
   659                          self._parser_state.error(
   660                              'invalid number', token=number)
   661                      number_value = int(number.value, base=0)
   662                      if number_value < 0:
   663                          self._parser_state.error(
   664                              'invalid number', token=number)
   665  
   666                      for syscall_number in syscall_numbers:
   667                          frequency_mapping[syscall_number] += number_value
   668              return frequency_mapping
   669          finally:
   670              self._parser_states.pop()
   671  
   672      # frequency-statement = '@frequency' , posix-path
   673      #                      ;
   674      def _parse_frequency_statement(self, tokens):
   675          if not tokens:
   676              self._parser_state.error('empty frequency statement')
   677          if tokens[0].type != 'FREQUENCY':
   678              self._parser_state.error('invalid frequency', token=tokens[0])
   679          tokens.pop(0)
   680          if not tokens:
   681              self._parser_state.error('empty frequency path')
   682          frequency_path = tokens.pop(0)
   683          if frequency_path.type != 'PATH':
   684              self._parser_state.error(
   685                  'invalid frequency path', token=frequency_path)
   686          frequency_filename = os.path.normpath(
   687              os.path.join(
   688                  os.path.dirname(self._parser_state.filename),
   689                  frequency_path.value))
   690          if not os.path.isfile(frequency_filename):
   691              self._parser_state.error(
   692                  'Could not open frequency file %s' % frequency_filename,
   693                  token=frequency_path)
   694          return self._parse_frequency_file(frequency_filename)
   695  
   696      # default-statement = '@default' , default-action
   697      #                   ;
   698      def _parse_default_statement(self, tokens):
   699          if not tokens:
   700              self._parser_state.error('empty default statement')
   701          if tokens[0].type != 'DEFAULT':
   702              self._parser_state.error('invalid default', token=tokens[0])
   703          tokens.pop(0)
   704          if not tokens:
   705              self._parser_state.error('empty action')
   706          return self._parse_default_action(tokens)
   707  
   708      def _parse_policy_file(self, filename):
   709          self._parser_states.append(ParserState(filename))
   710          try:
   711              statements = []
   712              denylist_header = False
   713              with open(filename) as policy_file:
   714                  for tokens in self._parser_state.tokenize(policy_file):
   715                      if tokens[0].type == 'INCLUDE':
   716                          statements.extend(
   717                              self._parse_include_statement(tokens))
   718                      elif tokens[0].type == 'FREQUENCY':
   719                          for syscall_number, frequency in self._parse_frequency_statement(
   720                                  tokens).items():
   721                              self._frequency_mapping[
   722                                  syscall_number] += frequency
   723                      elif tokens[0].type == 'DEFAULT':
   724                          self._default_action = self._parse_default_statement(
   725                              tokens)
   726                      elif tokens[0].type == 'DENYLIST':
   727                          tokens.pop()
   728                          if not self._denylist:
   729                              self._parser_state.error('policy is denylist, but '
   730                                                       'flag --denylist not '
   731                                                       'passed in.')
   732                          else:
   733                              denylist_header = True
   734                      else:
   735                          statement = self.parse_filter_statement(tokens)
   736                          if statement is None:
   737                              # If all the syscalls in the statement are for
   738                              # another arch, skip the whole statement.
   739                              continue
   740                          statements.append(statement)
   741  
   742                      if tokens:
   743                          self._parser_state.error(
   744                              'extra tokens', token=tokens[0])
   745              if self._denylist and not denylist_header:
   746                  self._parser_state.error('policy must contain @denylist flag to'
   747                                           ' be compiled with --denylist flag.')
   748              return statements
   749          finally:
   750              self._parser_states.pop()
   751  
   752      def parse_file(self, filename):
   753          """Parse a file and return the list of FilterStatements."""
   754          self._frequency_mapping = collections.defaultdict(int)
   755          try:
   756              statements = [x for x in self._parse_policy_file(filename)]
   757          except RecursionError:
   758              raise ParseException(
   759                  'recursion limit exceeded',
   760                  filename,
   761                  line=self._parser_states[-1].line)
   762  
   763          # Collapse statements into a single syscall-to-filter-list, remembering
   764          # the token for each filter for better diagnostics.
   765          syscall_filter_mapping = {}
   766          syscall_filter_definitions = {}
   767          filter_statements = []
   768          for syscalls, filters, token in statements:
   769              for syscall in syscalls:
   770                  if syscall not in syscall_filter_mapping:
   771                      filter_statements.append(
   772                          FilterStatement(
   773                              syscall, self._frequency_mapping.get(syscall, 1),
   774                              []))
   775                      syscall_filter_mapping[syscall] = filter_statements[-1]
   776                      syscall_filter_definitions[syscall] = []
   777                  for filt in filters:
   778                      syscall_filter_mapping[syscall].filters.append(filt)
   779                      syscall_filter_definitions[syscall].append(token)
   780          default_action = self._override_default_action or self._default_action
   781          for filter_statement in filter_statements:
   782              unconditional_actions_suffix = list(
   783                  itertools.dropwhile(lambda filt: filt.expression is not None,
   784                                      filter_statement.filters))
   785              if len(unconditional_actions_suffix) == 1:
   786                  # The last filter already has an unconditional action, no need
   787                  # to add another one.
   788                  continue
   789              if len(unconditional_actions_suffix) > 1:
   790                  previous_definition_token = syscall_filter_definitions[
   791                      filter_statement.syscall][
   792                          -len(unconditional_actions_suffix)]
   793                  current_definition_token = syscall_filter_definitions[
   794                      filter_statement.syscall][
   795                          -len(unconditional_actions_suffix) + 1]
   796                  raise ParseException(
   797                      ('Syscall %s (number %d) already had '
   798                       'an unconditional action applied') %
   799                      (filter_statement.syscall.name,
   800                       filter_statement.syscall.number),
   801                      filename=current_definition_token.filename,
   802                      token=current_definition_token) from ParseException(
   803                          'Previous definition',
   804                          filename=previous_definition_token.filename,
   805                          token=previous_definition_token)
   806              assert not unconditional_actions_suffix
   807              filter_statement.filters.append(
   808                  Filter(expression=None, action=default_action))
   809          return ParsedPolicy(default_action, filter_statements)