github.com/apache/beam/sdks/v2@v2.48.2/python/container/piputil.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package main
    17  
    18  import (
    19  	"bufio"
    20  	"bytes"
    21  	"errors"
    22  	"fmt"
    23  	"io/ioutil"
    24  	"log"
    25  	"os"
    26  	"path/filepath"
    27  	"strings"
    28  
    29  	"github.com/apache/beam/sdks/v2/go/pkg/beam/util/execx"
    30  )
    31  
    32  // pipInstallRequirements installs the given requirement, if present.
    33  func pipInstallRequirements(files []string, dir, name string) error {
    34  	for _, file := range files {
    35  		if file == name {
    36  			// We run the install process in two rounds in order to avoid as much
    37  			// as possible PyPI downloads. In the first round the --find-links
    38  			// option will make sure that only things staged in the worker will be
    39  			// used without following their dependencies.
    40  			args := []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--disable-pip-version-check", "--no-index", "--no-deps", "--find-links", dir}
    41  			if err := execx.Execute("python", args...); err != nil {
    42  				fmt.Println("Some packages could not be installed solely from the requirements cache. Installing packages from PyPI.")
    43  			}
    44  			// The second install round opens up the search for packages on PyPI and
    45  			// also installs dependencies. The key is that if all the packages have
    46  			// been installed in the first round then this command will be a no-op.
    47  			args = []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--disable-pip-version-check", "--find-links", dir}
    48  			return execx.Execute("python", args...)
    49  		}
    50  	}
    51  	return nil
    52  }
    53  
    54  // pipInstallPackage installs the given package, if present.
    55  func pipInstallPackage(files []string, dir, name string, force, optional bool, extras []string) error {
    56  	for _, file := range files {
    57  		if file == name {
    58  			var packageSpec = name
    59  			if extras != nil {
    60  				packageSpec += "[" + strings.Join(extras, ",") + "]"
    61  			}
    62  			if force {
    63  				// We only use force reinstallation for packages specified using the
    64  				// --extra_package flag.  In this case, we always want to use the
    65  				// user-specified package, overwriting any existing package already
    66  				// installed.  At the same time, we want to avoid reinstalling any
    67  				// dependencies.  The "pip install" command doesn't have a clean way to do
    68  				// this, so we do this in two steps.
    69  				//
    70  				// First, we use the three flags "--upgrade --force-reinstall --no-deps"
    71  				// to "pip install" so as to force the package to be reinstalled, while
    72  				// avoiding reinstallation of dependencies.  Note now that if any needed
    73  				// dependencies were not installed, they will still be missing.
    74  				//
    75  				// Next, we run "pip install" on the package without these flags.  Since the
    76  				// installed version will match the package specified, the package itself
    77  				// will not be reinstalled, but its dependencies will now be resolved and
    78  				// installed if necessary.  This achieves our goal outlined above.
    79  				args := []string{"-m", "pip", "install", "--disable-pip-version-check", "--upgrade", "--force-reinstall", "--no-deps",
    80  					filepath.Join(dir, packageSpec)}
    81  				err := execx.Execute("python", args...)
    82  				if err != nil {
    83  					return err
    84  				}
    85  				args = []string{"-m", "pip", "install", "--disable-pip-version-check", filepath.Join(dir, packageSpec)}
    86  				return execx.Execute("python", args...)
    87  			}
    88  
    89  			// Case when we do not perform a forced reinstall.
    90  			args := []string{"-m", "pip", "install", "--disable-pip-version-check", filepath.Join(dir, packageSpec)}
    91  			return execx.Execute("python", args...)
    92  		}
    93  	}
    94  	if optional {
    95  		return nil
    96  	}
    97  	return errors.New("package '" + name + "' not found")
    98  }
    99  
   100  // installExtraPackages installs all the packages declared in the extra
   101  // packages manifest file.
   102  func installExtraPackages(files []string, extraPackagesFile, dir string) error {
   103  	// First check that extra packages manifest file is present.
   104  	for _, file := range files {
   105  		if file != extraPackagesFile {
   106  			continue
   107  		}
   108  
   109  		// Found the manifest. Install extra packages.
   110  		manifest, err := ioutil.ReadFile(filepath.Join(dir, extraPackagesFile))
   111  		if err != nil {
   112  			return fmt.Errorf("failed to read extra packages manifest file: %v", err)
   113  		}
   114  
   115  		s := bufio.NewScanner(bytes.NewReader(manifest))
   116  		s.Split(bufio.ScanLines)
   117  
   118  		for s.Scan() {
   119  			extraPackage := s.Text()
   120  			log.Printf("Installing extra package: %s", extraPackage)
   121  			if err = pipInstallPackage(files, dir, extraPackage, true, false, nil); err != nil {
   122  				return fmt.Errorf("failed to install extra package %s: %v", extraPackage, err)
   123  			}
   124  		}
   125  		return nil
   126  	}
   127  	return nil
   128  }
   129  
   130  func findBeamSdkWhl(files []string, acceptableWhlSpecs []string) string {
   131  	for _, file := range files {
   132  		if strings.HasPrefix(file, "apache_beam") {
   133  			for _, s := range acceptableWhlSpecs {
   134  				if strings.HasSuffix(file, s) {
   135  					log.Printf("Found Apache Beam SDK wheel: %v", file)
   136  					return file
   137  				}
   138  			}
   139  		}
   140  	}
   141  	return ""
   142  }
   143  
   144  // InstallSdk installs Beam SDK: First, we try to find a compiled
   145  // wheel distribution of Apache Beam among staged files. If we find it, we
   146  // assume that the pipleine was started with the Beam SDK found in the wheel
   147  // file, and we try to install it. If not successful, we fall back to installing
   148  // SDK from source tarball provided in sdkSrcFile.
   149  func installSdk(files []string, workDir string, sdkSrcFile string, acceptableWhlSpecs []string, required bool) error {
   150  	sdkWhlFile := findBeamSdkWhl(files, acceptableWhlSpecs)
   151  	if sdkWhlFile != "" {
   152  		err := pipInstallPackage(files, workDir, sdkWhlFile, false, false, []string{"gcp"})
   153  		if err == nil {
   154  			return nil
   155  		}
   156  		log.Printf("Could not install Apache Beam SDK from a wheel: %v, proceeding to install SDK from source tarball.", err)
   157  	}
   158  	if !required {
   159  		_, err := os.Stat(filepath.Join(workDir, sdkSrcFile))
   160  		if os.IsNotExist(err) {
   161  			return nil
   162  		}
   163  	}
   164  	err := pipInstallPackage(files, workDir, sdkSrcFile, false, false, []string{"gcp"})
   165  	return err
   166  }