github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/jenkins/docker_diff.py (about)

     1  #!/usr/bin/env python
     2  
     3  # Copyright 2016 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  """Output the differences between two Docker images.
    18  
    19  Usage:
    20    python docker_diff.py [--deep=path] <image_1> <image_2>
    21  """
    22  
    23  import argparse
    24  import json
    25  import logging
    26  import os
    27  import shutil
    28  import subprocess
    29  import tarfile
    30  import tempfile
    31  
    32  
    33  def call(cmd, **kwargs):
    34      """run call with args."""
    35      logging.info('exec %s', ' '.join(cmd))
    36      return subprocess.call(cmd, **kwargs)
    37  
    38  
    39  def check_call(cmd):
    40      """run check_call with args."""
    41      logging.info('exec %s', ' '.join(cmd))
    42      return subprocess.check_call(cmd)
    43  
    44  
    45  def dockerfile_layers(tarball):
    46      '''Given a `docker save` tarball, return the layer metadata in order.'''
    47  
    48      layer_by_parent = {}
    49  
    50      for member in tarball.getmembers():
    51          if member.name.endswith('/json'):
    52              layer = json.load(tarball.extractfile(member))
    53              layer_by_parent[layer.get('parent')] = layer
    54  
    55      # assemble layers by following parent pointers
    56      layers = []
    57      parent = None  # base image has no parent
    58      while parent in layer_by_parent:
    59          layer = layer_by_parent[parent]
    60          layers.append(layer)
    61          parent = layer['id']
    62  
    63      return layers
    64  
    65  
    66  def is_whiteout(fname):
    67      """Check if whiteout."""
    68      return fname.startswith('.wh.') or '/.wh.' in fname
    69  
    70  
    71  def extract_layers(tarball, layers, outdir):
    72      '''Extract docker layers to a specific directory (fake a union mount).'''
    73      for layer in layers:
    74          obj = tarball.extractfile('%s/layer.tar' % layer['id'])
    75          with tarfile.open(fileobj=obj) as fp:
    76              # Complication: .wh. files indicate deletions.
    77              # https://github.com/docker/docker/blob/master/image/spec/v1.md
    78              members = fp.getmembers()
    79              members_good = [m for m in members if not is_whiteout(m.name)]
    80  
    81              fp.extractall(outdir, members_good)
    82  
    83              for member in members:
    84                  name = member.name
    85                  if is_whiteout(name):
    86                      path = os.path.join(outdir, name.replace('.wh.', ''))
    87                      if os.path.isdir(path):
    88                          shutil.rmtree(path)
    89                      elif os.path.exists(path):
    90                          os.unlink(path)
    91  
    92  
    93  def docker_diff(image_a, image_b, tmpdir, deep):
    94      """Diff two docker images."""
    95  
    96      # dump images for inspection
    97      tf_a_path = '%s/a.tar' % tmpdir
    98      tf_b_path = '%s/b.tar' % tmpdir
    99  
   100      check_call(['docker', 'save', '-o', tf_a_path, image_a])
   101      check_call(['docker', 'save', '-o', tf_b_path, image_b])
   102  
   103      tf_a = tarfile.open(tf_a_path)
   104      tf_b = tarfile.open(tf_b_path)
   105  
   106      # find layers in order
   107      layers_a = dockerfile_layers(tf_a)
   108      layers_b = dockerfile_layers(tf_b)
   109  
   110      # minor optimization: skip identical layers
   111      common = len(os.path.commonprefix([layers_a, layers_b]))
   112  
   113      tf_a_out = '%s/a' % tmpdir
   114      tf_b_out = '%s/b' % tmpdir
   115  
   116      extract_layers(tf_a, layers_a[common:], tf_a_out)
   117      extract_layers(tf_b, layers_b[common:], tf_b_out)
   118  
   119      # actually compare the resulting directories
   120  
   121      # just show whether something changed (OS upgrades change a lot)
   122      call(['diff', '-qr', 'a', 'b'], cwd=tmpdir)
   123  
   124      if deep:
   125          # if requested, do a more in-depth content diff as well.
   126          call([
   127              'diff', '-rU5',
   128              os.path.join('a', deep),
   129              os.path.join('b', deep)],
   130               cwd=tmpdir)
   131  
   132  
   133  def main():
   134      """Run docker_diff."""
   135      logging.basicConfig(level=logging.INFO)
   136      parser = argparse.ArgumentParser()
   137      parser.add_argument('--deep', help='Show full differences for specific directory')
   138      parser.add_argument('image_a')
   139      parser.add_argument('image_b')
   140      options = parser.parse_args()
   141  
   142      tmpdir = tempfile.mkdtemp(prefix='docker_diff_')
   143      try:
   144          docker_diff(options.image_a, options.image_b, tmpdir, options.deep)
   145      finally:
   146          shutil.rmtree(tmpdir)
   147  
   148  
   149  if __name__ == '__main__':
   150      main()