github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/tools/syz-headerparser/headerlib/struct_walker.py (about)

     1  # Copyright 2017 syzkaller project authors. All rights reserved.
     2  # Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  '''
     5  This module provides classes which implement AST traversal in order to extract
     6  items belonging to a struct.
     7  '''
     8  
     9  import collections
    10  import logging
    11  
    12  from pycparser import c_ast
    13  from .header_preprocessor import HeaderFilePreprocessor
    14  
    15  
    16  class StructWalkerException(Exception):
    17      pass
    18  
    19  
    20  class StructWalker(c_ast.NodeVisitor):
    21      '''
    22      Given an ast obtained by parsing a header file, return a hierarchy
    23      dictionary. The ast is expected to be of type pycparser.c_ast.FileAST.
    24  
    25      Usage :
    26  
    27      >>> import tempfile
    28      >>> t = tempfile.NamedTemporaryFile()
    29      >>> contents = """
    30      ... #define STRUCT_SIZE 1337
    31      ... struct ARRAY_OF_POINTERS_CONTAINER {
    32      ... 	unsigned int *ptr[10];
    33      ... 	int **n;
    34      ... };
    35      ... struct ARRAY_CONTAINER {
    36      ... 	  int g[10];
    37      ... 		  int h[20][30];
    38      ... };
    39      ... struct REGULAR_STRUCT {
    40      ... 	int x;
    41      ... 	char *y;
    42      ... 	void *ptr;
    43      ... };
    44      ... struct STRUCT_WITH_STRUCT_PTR {
    45      ... 	struct REGULAR_STRUCT *struct_ptr;
    46      ... 	int z;
    47      ... };
    48      ... struct STRUCT_WITH_STRUCT_INST {
    49      ... 	struct REGULAR_STRUCT regular_struct_inst;
    50      ... 	int a;
    51      ... };
    52      ... struct STRUCT_WITH_STRUCT_ARRAY {
    53      ... 	struct REGULAR_STRUCT regular_struct_array[100];
    54      ... 	int b;
    55      ... };
    56      ... struct STRUCT_WITH_ANONYMOUS_STRUCT {
    57      ... 	struct {
    58      ... 		int g;
    59      ... 		int h;
    60      ... 		int i;
    61      ... 	} anonymous_struct;
    62      ... };
    63      ... struct STRUCT_WITH_ANONYMOUS_UNION {
    64      ... 	union {
    65      ... 		int t;
    66      ... 		char r[100];
    67      ... 	} anonymous_union;
    68      ... };
    69      ... struct STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO {
    70      ... 	struct REGULAR_STRUCT regular_struct_array[STRUCT_SIZE];
    71      ... };
    72      ... struct STRUCT_WITH_2D_ARRAY_INST {
    73      ... 	struct REGULAR_STRUCT regular_struct_array_2D[10][10];
    74      ... };
    75      ... struct NESTED_ANONYMOUS_STRUCT {
    76      ... 	struct {
    77      ... 		int x;
    78      ... 		struct {
    79      ... 			int y;
    80      ... 			int z;
    81      ... 		} level_2;
    82      ... 	} level_1;
    83      ... };
    84      ... """
    85  	>>> t.write(contents) ; t.flush()
    86      >>> struct_walker = StructWalker(filenames=[t.name])
    87      >>> local_hierarchy = struct_walker.generate_local_hierarchy()
    88      >>> for k in local_hierarchy:
    89      ...     print k
    90      ...     print local_hierarchy[k]
    91      ARRAY_OF_POINTERS_CONTAINER
    92      [('unsigned int*[10]', 'ptr'), ('int**', 'n')]
    93      STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO
    94      [('struct REGULAR_STRUCT[1337]', 'regular_struct_array')]
    95      STRUCT_WITH_2D_ARRAY_INST
    96      [('struct REGULAR_STRUCT[10][10]', 'regular_struct_array_2D')]
    97      STRUCT_WITH_STRUCT_ARRAY
    98      [('struct REGULAR_STRUCT[100]', 'regular_struct_array'), ('int', 'b')]
    99      NESTED_ANONYMOUS_STRUCT
   100      [('int', 'level_1.x'), ('int', 'level_1.level_2.y'), ('int', 'level_1.level_2.z')]
   101      STRUCT_WITH_ANONYMOUS_STRUCT
   102      [('int', 'anonymous_struct.g'), ('int', 'anonymous_struct.h'), ('int', 'anonymous_struct.i')]
   103      STRUCT_WITH_ANONYMOUS_UNION
   104      [('int', 'anonymous_union.t'), ('char[100]', 'anonymous_union.r')]
   105      STRUCT_WITH_STRUCT_INST
   106      [('struct REGULAR_STRUCT', 'regular_struct_inst'), ('int', 'a')]
   107      ARRAY_CONTAINER
   108      [('int[10]', 'g'), ('int[20][30]', 'h')]
   109      REGULAR_STRUCT
   110      [('int', 'x'), ('char*', 'y'), ('void*', 'ptr')]
   111      STRUCT_WITH_STRUCT_PTR
   112      [('struct REGULAR_STRUCT*', 'struct_ptr'), ('int', 'z')]
   113      '''
   114  
   115      def __init__(self, ast=None, filenames=[], include_lines='', loglvl=logging.INFO):
   116          super(StructWalker, self).__init__()
   117          self.ast = ast
   118          self.filenames = filenames
   119  
   120          if not filenames and not ast:
   121              raise StructWalkerException('Specify either "filename" or "ast" to create'
   122                                          'StructParser object')
   123  
   124          if not self.ast:
   125              self.ast = HeaderFilePreprocessor(self.filenames, include_lines=include_lines,
   126                                                loglvl=loglvl).get_ast()
   127  
   128          self.include_lines = include_lines
   129          self.local_structs_hierarchy = {}
   130          self._setuplogging(loglvl)
   131  
   132      def _setuplogging(self, loglvl):
   133          self.logger = logging.getLogger(self.__class__.__name__)
   134          formatter = logging.Formatter('DEBUG:%(name)s:%(message)s')
   135          sh = logging.StreamHandler()
   136          sh.setFormatter(formatter)
   137          sh.setLevel(loglvl)
   138          self.logger.addHandler(sh)
   139          self.logger.setLevel(loglvl)
   140  
   141      def _format_item(self, processed_item):
   142          fmt_type = processed_item['type']
   143          fmt_type = ' '.join(fmt_type)
   144  
   145          self.logger.debug('_format_item : %s', processed_item)
   146  
   147          if 'is_ptr' in processed_item and 'is_fnptr' not in processed_item:
   148              fmt_type = '%s%s' % (fmt_type, '*' * processed_item['is_ptr'])
   149  
   150          if 'is_array' in processed_item and 'array_size' in processed_item:
   151              size_str = str(processed_item['array_size']).replace(', ', '][')
   152              fmt_type = '%s%s' % (fmt_type, size_str)
   153  
   154          fmt_identifier = processed_item['identifier']
   155  
   156          return [(fmt_type, fmt_identifier)]
   157  
   158      def _recursive_process_item(self, item_ast, processed_item, parent):
   159          self.logger.debug('--- _recursive_process_item : %s', type(item_ast))
   160          if isinstance(item_ast, c_ast.Decl):
   161              processed_item['identifier'] = item_ast.name
   162              return self._recursive_process_item(item_ast.type, processed_item, item_ast)
   163  
   164          elif isinstance(item_ast, c_ast.TypeDecl):
   165              return self._recursive_process_item(item_ast.type, processed_item, item_ast)
   166  
   167          elif isinstance(item_ast, c_ast.IdentifierType):
   168              if len(item_ast.names) > 0:
   169                  processed_item['type'] = item_ast.names
   170                  return self._format_item(processed_item)
   171  
   172          elif (isinstance(item_ast, c_ast.Struct) or
   173                isinstance(item_ast, c_ast.Union)):
   174              if not item_ast.name:
   175                  nodename, _items_list = self._traverse_ast(item_ast, toplevel=False)
   176                  try:
   177                      items_list = [(i[0], '%s.%s' % (parent.declname, i[1])) for i in _items_list]
   178                  except AttributeError as e:
   179                      self.logger.info('-- Encountered anonymous_struct/anonymous_union with no name')
   180                      raise StructWalkerException('Encountered anonymous_struct/anonymous_union with no name')
   181  
   182                  return items_list
   183              else:
   184                  processed_item['type'] = ['struct %s' % (item_ast.name)]
   185                  return self._format_item(processed_item)
   186  
   187          elif isinstance(item_ast, c_ast.PtrDecl):
   188              if 'is_ptr' not in processed_item:
   189                  processed_item['is_ptr'] = 0
   190              processed_item['is_ptr'] = processed_item['is_ptr'] + 1
   191              return self._recursive_process_item(item_ast.type, processed_item, item_ast)
   192  
   193          elif isinstance(item_ast, c_ast.ArrayDecl):
   194              processed_item['is_array'] = True
   195              if 'array_size' not in processed_item:
   196                  processed_item['array_size'] = []
   197              processed_item['array_size'].append(int(item_ast.dim.value))
   198              return self._recursive_process_item(item_ast.type, processed_item, item_ast)
   199  
   200          elif isinstance(item_ast, c_ast.Enum):
   201              processed_item['type'] = ['enum %s' % (item_ast.name)]
   202              return self._format_item(processed_item)
   203  
   204          elif isinstance(item_ast, c_ast.FuncDecl):
   205              processed_item['is_fnptr'] = True
   206              processed_item['type'] = ['void (*)()']
   207              return self._format_item(processed_item)
   208  
   209      def _traverse_ast(self, node, toplevel=True):
   210          items_list = []
   211  
   212          # Argument structs are used as types, hence anonymous top-level
   213          # structs are ignored.
   214          if toplevel and not node.name:
   215              return None
   216  
   217          if not node.children():
   218              return None
   219  
   220          self.logger.debug('>>> Struct name = %s, coord: %s', node.name, node.coord)
   221          for child in node.children():
   222              item = self._recursive_process_item(child[1], {}, None)
   223              items_list.extend(item)
   224  
   225          self.logger.debug('_traverse_ast returns: %s', str((node.name, items_list)))
   226          return (node.name, items_list)
   227  
   228      def visit_Struct(self, node, *a):
   229          if node.name in self.local_structs_hierarchy:
   230              self.logger.info('Encountered %s again. Ignoring.', repr(node.name))
   231              return
   232  
   233          try:
   234              desc = self._traverse_ast(node)
   235          except StructWalkerException as e:
   236              self.logger.info('-- Exception raised by StructWalkerException in %s,'
   237                               'inspect manually.',
   238                               repr(node.name))
   239              self.logger.info(str(e))
   240              return
   241  
   242          if not desc:
   243              return
   244  
   245          struct_name, struct_items = desc
   246          self.local_structs_hierarchy[struct_name] = struct_items
   247  
   248      def generate_local_hierarchy(self):
   249          self.visit(self.ast)
   250          return self.local_structs_hierarchy