github.com/westcoastroms/westcoastroms-build@v0.0.0-20190928114312-2350e5a73030/build/make/tools/generate-notice-files.py

github.com/westcoastroms/westcoastroms-build@v0.0.0-20190928114312-2350e5a73030/build/make/tools/generate-notice-files.py (about)

     1  #!/usr/bin/env python
     2  #
     3  # Copyright (C) 2012 The Android Open Source Project
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #      http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  """
    17  Usage: generate-notice-files --text-output [plain text output file] \
    18                 --html-output [html output file] \
    19                 --xml-output [xml output file] \
    20                 -t [file title] -s [directory of notices]
    21  
    22  Generate the Android notice files, including both text and html files.
    23  
    24  -h to display this usage message and exit.
    25  """
    26  from collections import defaultdict
    27  import argparse
    28  import hashlib
    29  import itertools
    30  import os
    31  import os.path
    32  import re
    33  import sys
    34  
    35  MD5_BLOCKSIZE = 1024 * 1024
    36  HTML_ESCAPE_TABLE = {
    37      "&": "&amp;",
    38      '"': "&quot;",
    39      "'": "&apos;",
    40      ">": "&gt;",
    41      "<": "&lt;",
    42      }
    43  
    44  def hexify(s):
    45      return ("%02x"*len(s)) % tuple(map(ord, s))
    46  
    47  def md5sum(filename):
    48      """Calculate an MD5 of the file given by FILENAME,
    49      and return hex digest as a string.
    50      Output should be compatible with md5sum command"""
    51  
    52      f = open(filename, "rb")
    53      sum = hashlib.md5()
    54      while 1:
    55          block = f.read(MD5_BLOCKSIZE)
    56          if not block:
    57              break
    58          sum.update(block)
    59      f.close()
    60      return hexify(sum.digest())
    61  
    62  
    63  def html_escape(text):
    64      """Produce entities within text."""
    65      return "".join(HTML_ESCAPE_TABLE.get(c,c) for c in text)
    66  
    67  HTML_OUTPUT_CSS="""
    68  <style type="text/css">
    69  body { padding: 0; font-family: sans-serif; }
    70  .same-license { background-color: #eeeeee; border-top: 20px solid white; padding: 10px; }
    71  .label { font-weight: bold; }
    72  .file-list { margin-left: 1em; color: blue; }
    73  </style>
    74  """
    75  
    76  def combine_notice_files_html(file_hash, input_dir, output_filename):
    77      """Combine notice files in FILE_HASH and output a HTML version to OUTPUT_FILENAME."""
    78  
    79      SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
    80  
    81      # Set up a filename to row id table (anchors inside tables don't work in
    82      # most browsers, but href's to table row ids do)
    83      id_table = {}
    84      id_count = 0
    85      for value in file_hash:
    86          for filename in value:
    87               id_table[filename] = id_count
    88          id_count += 1
    89  
    90      # Open the output file, and output the header pieces
    91      output_file = open(output_filename, "wb")
    92  
    93      print >> output_file, "<html><head>"
    94      print >> output_file, HTML_OUTPUT_CSS
    95      print >> output_file, '</head><body topmargin="0" leftmargin="0" rightmargin="0" bottommargin="0">'
    96  
    97      # Output our table of contents
    98      print >> output_file, '<div class="toc">'
    99      print >> output_file, "<ul>"
   100  
   101      # Flatten the list of lists into a single list of filenames
   102      sorted_filenames = sorted(itertools.chain.from_iterable(file_hash))
   103  
   104      # Print out a nice table of contents
   105      for filename in sorted_filenames:
   106          stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename)
   107          print >> output_file, '<li><a href="#id%d">%s</a></li>' % (id_table.get(filename), stripped_filename)
   108  
   109      print >> output_file, "</ul>"
   110      print >> output_file, "</div><!-- table of contents -->"
   111      # Output the individual notice file lists
   112      print >>output_file, '<table cellpadding="0" cellspacing="0" border="0">'
   113      for value in file_hash:
   114          print >> output_file, '<tr id="id%d"><td class="same-license">' % id_table.get(value[0])
   115          print >> output_file, '<div class="label">Notices for file(s):</div>'
   116          print >> output_file, '<div class="file-list">'
   117          for filename in value:
   118              print >> output_file, "%s <br/>" % (SRC_DIR_STRIP_RE.sub(r"\1", filename))
   119          print >> output_file, "</div><!-- file-list -->"
   120          print >> output_file
   121          print >> output_file, '<pre class="license-text">'
   122          print >> output_file, html_escape(open(value[0]).read())
   123          print >> output_file, "</pre><!-- license-text -->"
   124          print >> output_file, "</td></tr><!-- same-license -->"
   125          print >> output_file
   126          print >> output_file
   127          print >> output_file
   128  
   129      # Finish off the file output
   130      print >> output_file, "</table>"
   131      print >> output_file, "</body></html>"
   132      output_file.close()
   133  
   134  def combine_notice_files_text(file_hash, input_dir, output_filename, file_title):
   135      """Combine notice files in FILE_HASH and output a text version to OUTPUT_FILENAME."""
   136  
   137      SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
   138      output_file = open(output_filename, "wb")
   139      print >> output_file, file_title
   140      for value in file_hash:
   141        print >> output_file, "============================================================"
   142        print >> output_file, "Notices for file(s):"
   143        for filename in value:
   144          print >> output_file, SRC_DIR_STRIP_RE.sub(r"\1", filename)
   145        print >> output_file, "------------------------------------------------------------"
   146        print >> output_file, open(value[0]).read()
   147      output_file.close()
   148  
   149  def combine_notice_files_xml(files_with_same_hash, input_dir, output_filename):
   150      """Combine notice files in FILE_HASH and output a XML version to OUTPUT_FILENAME."""
   151  
   152      SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
   153  
   154      # Set up a filename to row id table (anchors inside tables don't work in
   155      # most browsers, but href's to table row ids do)
   156      id_table = {}
   157      for file_key in files_with_same_hash.keys():
   158          for filename in files_with_same_hash[file_key]:
   159               id_table[filename] = file_key
   160  
   161      # Open the output file, and output the header pieces
   162      output_file = open(output_filename, "wb")
   163  
   164      print >> output_file, '<?xml version="1.0" encoding="utf-8"?>'
   165      print >> output_file, "<licenses>"
   166  
   167      # Flatten the list of lists into a single list of filenames
   168      sorted_filenames = sorted(id_table.keys())
   169  
   170      # Print out a nice table of contents
   171      for filename in sorted_filenames:
   172          stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename)
   173          print >> output_file, '<file-name contentId="%s">%s</file-name>' % (id_table.get(filename), stripped_filename)
   174  
   175      print >> output_file
   176      print >> output_file
   177  
   178      processed_file_keys = []
   179      # Output the individual notice file lists
   180      for filename in sorted_filenames:
   181          file_key = id_table.get(filename)
   182          if file_key in processed_file_keys:
   183              continue
   184          processed_file_keys.append(file_key)
   185  
   186          print >> output_file, '<file-content contentId="%s"><![CDATA[%s]]></file-content>' % (file_key, html_escape(open(filename).read()))
   187          print >> output_file
   188  
   189      # Finish off the file output
   190      print >> output_file, "</licenses>"
   191      output_file.close()
   192  
   193  def get_args():
   194      parser = argparse.ArgumentParser()
   195      parser.add_argument(
   196          '--text-output', required=True,
   197          help='The text output file path.')
   198      parser.add_argument(
   199          '--html-output',
   200          help='The html output file path.')
   201      parser.add_argument(
   202          '--xml-output',
   203          help='The xml output file path.')
   204      parser.add_argument(
   205          '-t', '--title', required=True,
   206          help='The file title.')
   207      parser.add_argument(
   208          '-s', '--source-dir', required=True,
   209          help='The directory containing notices.')
   210      parser.add_argument(
   211          '-i', '--included-subdirs', action='append',
   212          help='The sub directories which should be included.')
   213      parser.add_argument(
   214          '-e', '--excluded-subdirs', action='append',
   215          help='The sub directories which should be excluded.')
   216      return parser.parse_args()
   217  
   218  def main(argv):
   219      args = get_args()
   220  
   221      txt_output_file = args.text_output
   222      html_output_file = args.html_output
   223      xml_output_file = args.xml_output
   224      file_title = args.title
   225      included_subdirs = []
   226      excluded_subdirs = []
   227      if args.included_subdirs is not None:
   228          included_subdirs = args.included_subdirs
   229      if args.excluded_subdirs is not None:
   230          excluded_subdirs = args.excluded_subdirs
   231  
   232      # Find all the notice files and md5 them
   233      input_dir = os.path.normpath(args.source_dir)
   234      files_with_same_hash = defaultdict(list)
   235      for root, dir, files in os.walk(input_dir):
   236          for file in files:
   237              matched = True
   238              if len(included_subdirs) > 0:
   239                  matched = False
   240                  for subdir in included_subdirs:
   241                      if root.startswith(input_dir + '/' + subdir):
   242                          matched = True
   243                          break
   244              elif len(excluded_subdirs) > 0:
   245                  for subdir in excluded_subdirs:
   246                      if root.startswith(input_dir + '/' + subdir):
   247                          matched = False
   248                          break
   249              if matched and file.endswith(".txt"):
   250                  filename = os.path.join(root, file)
   251                  file_md5sum = md5sum(filename)
   252                  files_with_same_hash[file_md5sum].append(filename)
   253  
   254      filesets = [sorted(files_with_same_hash[md5]) for md5 in sorted(files_with_same_hash.keys())]
   255  
   256      combine_notice_files_text(filesets, input_dir, txt_output_file, file_title)
   257  
   258      if html_output_file is not None:
   259          combine_notice_files_html(filesets, input_dir, html_output_file)
   260  
   261      if xml_output_file is not None:
   262          combine_notice_files_xml(files_with_same_hash, input_dir, xml_output_file)
   263  
   264  if __name__ == "__main__":
   265      main(sys.argv)