github.com/apache/beam/sdks/v2@v2.48.2/python/container/license_scripts/pull_licenses_py.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """ 19 A script to pull licenses for Python. 20 The script is executed within Docker. 21 """ 22 import csv 23 import json 24 import logging 25 import os 26 import shutil 27 import subprocess 28 import sys 29 import tempfile 30 import traceback 31 import yaml 32 33 from urllib.request import urlopen, Request 34 from urllib.parse import urlparse 35 from urllib.parse import urljoin 36 from tenacity import retry 37 from tenacity import stop_after_attempt 38 from tenacity import wait_exponential 39 40 LICENSE_DIR = '/opt/apache/beam/third_party_licenses' 41 42 43 def run_bash_command(command): 44 return subprocess.check_output(command.split()).decode('utf-8') 45 46 47 def run_pip_licenses(): 48 command = 'pip-licenses --with-license-file --with-urls --from=mixed --ignore apache-beam --format=json' 49 dependencies = run_bash_command(command) 50 return json.loads(dependencies) 51 52 53 @retry(stop=stop_after_attempt(3)) 54 def copy_license_files(dep): 55 source_license_file = dep['LicenseFile'] 56 if source_license_file.lower() == 'unknown': 57 return False 58 name = dep['Name'].lower() 59 dest_dir = os.path.join(LICENSE_DIR, name) 60 try: 61 os.mkdir(dest_dir) 62 shutil.copy(source_license_file, dest_dir + '/LICENSE') 63 logging.debug( 64 'Successfully pulled license for {dep} with pip-licenses.'.format( 65 dep=name)) 66 return True 67 except Exception as e: 68 logging.error( 69 'Failed to copy from {source} to {dest}'.format( 70 source=source_license_file, dest=dest_dir + '/LICENSE')) 71 traceback.print_exc() 72 raise 73 74 75 @retry( 76 reraise=True, 77 wait=wait_exponential(multiplier=2), 78 stop=stop_after_attempt(5)) 79 def pull_from_url(dep, configs): 80 ''' 81 :param dep: name of a dependency 82 :param configs: a dict from dep_urls_py.yaml 83 :return: boolean 84 85 It downloads files form urls to a temp directory first in order to avoid 86 to deal with any temp files. It helps keep clean final directory. 87 ''' 88 if dep in configs: 89 config = configs[dep] 90 dest_dir = os.path.join(LICENSE_DIR, dep) 91 cur_temp_dir = tempfile.mkdtemp() 92 93 try: 94 if config['license'] == 'skip': 95 print('Skip pulling license for ', dep) 96 else: 97 url_read = urlopen(Request(config['license'], 98 headers={'User-Agent': 'Apache Beam'})) 99 with open(cur_temp_dir + '/LICENSE', 'wb') as temp_write: 100 shutil.copyfileobj(url_read, temp_write) 101 logging.debug( 102 'Successfully pulled license for {dep} from {url}.'.format( 103 dep=dep, url=config['license'])) 104 105 # notice is optional. 106 if 'notice' in config: 107 url_read = urlopen(config['notice']) 108 with open(cur_temp_dir + '/NOTICE', 'wb') as temp_write: 109 shutil.copyfileobj(url_read, temp_write) 110 111 shutil.copytree(cur_temp_dir, dest_dir) 112 return True 113 except Exception as e: 114 logging.error( 115 'Error occurred when pull license for {dep} from {url}.'.format( 116 dep=dep, url=config)) 117 traceback.print_exc() 118 raise 119 finally: 120 shutil.rmtree(cur_temp_dir) 121 122 123 def license_url(name, project_url, dep_config): 124 ''' 125 Gets the license URL for a dependency, either from the parsed yaml or, 126 if it is github, by looking for a license file in the repo. 127 ''' 128 configs = dep_config['pip_dependencies'] 129 if name.lower() in configs: 130 return configs[name.lower()]['license'] 131 p = urlparse(project_url) 132 if p.netloc != "github.com": 133 return project_url 134 raw = "https://raw.githubusercontent.com" 135 path = p.path 136 if not path.endswith("/"): 137 path = path + "/" 138 for license in ("LICENSE", "LICENSE.txt", "LICENSE.md", "LICENSE.rst", "COPYING"): 139 try: 140 url = raw + urljoin(path,"master/"+license) 141 with urlopen(url) as a: 142 if a.getcode() == 200: 143 return url 144 except: 145 pass 146 return project_url 147 148 149 def save_license_list(csv_filename, dependencies, dep_config): 150 ''' 151 Save the names, URLs, and license type for python dependency licenses in a CSV file. 152 ''' 153 with open(csv_filename, mode='w') as f: 154 writer = csv.writer(f) 155 for dep in dependencies: 156 url = license_url(dep['Name'], dep['URL'], dep_config) 157 writer.writerow([dep['Name'], url, dep['License']]) 158 159 160 if __name__ == "__main__": 161 no_licenses = [] 162 logging.getLogger().setLevel(logging.INFO) 163 164 with open('/tmp/license_scripts/dep_urls_py.yaml') as file: 165 dep_config = yaml.full_load(file) 166 167 dependencies = run_pip_licenses() 168 csv_filename = os.path.join(LICENSE_DIR, 'python-licenses.csv') 169 save_license_list(csv_filename, dependencies, dep_config) 170 171 # add licenses for pip installed packages. 172 # try to pull licenses with pip-licenses tool first, if no license pulled, 173 # then pull from URLs. 174 for dep in dependencies: 175 if not (copy_license_files(dep) or 176 pull_from_url(dep['Name'].lower(), dep_config['pip_dependencies'])): 177 no_licenses.append(dep['Name'].lower()) 178 179 if no_licenses: 180 py_ver = '%d.%d' % (sys.version_info[0], sys.version_info[1]) 181 how_to = 'These licenses were not able to be pulled automatically. ' \ 182 'Please search code source of the dependencies on the internet ' \ 183 'and add urls to RAW license file at sdks/python/container/' \ 184 'license_scripts/dep_urls_py.yaml for each missing license ' \ 185 'and rerun the test. If no such urls can be found, you need ' \ 186 'to manually add LICENSE and NOTICE (if available) files at ' \ 187 'sdks/python/container/license_scripts/manual_licenses/{dep}/ ' \ 188 'and add entries to sdks/python/container/license_scripts/' \ 189 'dep_urls_py.yaml.' 190 raise RuntimeError( 191 'Could not retrieve licences for packages {license_list} in ' 192 'Python{py_ver} environment. \n {how_to}'.format( 193 py_ver=py_ver, 194 license_list=sorted(no_licenses), 195 how_to=how_to)) 196 else: 197 logging.info( 198 'Successfully pulled licenses for {n} dependencies'.format( 199 n=len(dependencies)))