github.com/apache/beam/sdks/v2@v2.48.2/python/container/piputil.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package main 17 18 import ( 19 "bufio" 20 "bytes" 21 "errors" 22 "fmt" 23 "io/ioutil" 24 "log" 25 "os" 26 "path/filepath" 27 "strings" 28 29 "github.com/apache/beam/sdks/v2/go/pkg/beam/util/execx" 30 ) 31 32 // pipInstallRequirements installs the given requirement, if present. 33 func pipInstallRequirements(files []string, dir, name string) error { 34 for _, file := range files { 35 if file == name { 36 // We run the install process in two rounds in order to avoid as much 37 // as possible PyPI downloads. In the first round the --find-links 38 // option will make sure that only things staged in the worker will be 39 // used without following their dependencies. 40 args := []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--disable-pip-version-check", "--no-index", "--no-deps", "--find-links", dir} 41 if err := execx.Execute("python", args...); err != nil { 42 fmt.Println("Some packages could not be installed solely from the requirements cache. Installing packages from PyPI.") 43 } 44 // The second install round opens up the search for packages on PyPI and 45 // also installs dependencies. The key is that if all the packages have 46 // been installed in the first round then this command will be a no-op. 47 args = []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--disable-pip-version-check", "--find-links", dir} 48 return execx.Execute("python", args...) 49 } 50 } 51 return nil 52 } 53 54 // pipInstallPackage installs the given package, if present. 55 func pipInstallPackage(files []string, dir, name string, force, optional bool, extras []string) error { 56 for _, file := range files { 57 if file == name { 58 var packageSpec = name 59 if extras != nil { 60 packageSpec += "[" + strings.Join(extras, ",") + "]" 61 } 62 if force { 63 // We only use force reinstallation for packages specified using the 64 // --extra_package flag. In this case, we always want to use the 65 // user-specified package, overwriting any existing package already 66 // installed. At the same time, we want to avoid reinstalling any 67 // dependencies. The "pip install" command doesn't have a clean way to do 68 // this, so we do this in two steps. 69 // 70 // First, we use the three flags "--upgrade --force-reinstall --no-deps" 71 // to "pip install" so as to force the package to be reinstalled, while 72 // avoiding reinstallation of dependencies. Note now that if any needed 73 // dependencies were not installed, they will still be missing. 74 // 75 // Next, we run "pip install" on the package without these flags. Since the 76 // installed version will match the package specified, the package itself 77 // will not be reinstalled, but its dependencies will now be resolved and 78 // installed if necessary. This achieves our goal outlined above. 79 args := []string{"-m", "pip", "install", "--disable-pip-version-check", "--upgrade", "--force-reinstall", "--no-deps", 80 filepath.Join(dir, packageSpec)} 81 err := execx.Execute("python", args...) 82 if err != nil { 83 return err 84 } 85 args = []string{"-m", "pip", "install", "--disable-pip-version-check", filepath.Join(dir, packageSpec)} 86 return execx.Execute("python", args...) 87 } 88 89 // Case when we do not perform a forced reinstall. 90 args := []string{"-m", "pip", "install", "--disable-pip-version-check", filepath.Join(dir, packageSpec)} 91 return execx.Execute("python", args...) 92 } 93 } 94 if optional { 95 return nil 96 } 97 return errors.New("package '" + name + "' not found") 98 } 99 100 // installExtraPackages installs all the packages declared in the extra 101 // packages manifest file. 102 func installExtraPackages(files []string, extraPackagesFile, dir string) error { 103 // First check that extra packages manifest file is present. 104 for _, file := range files { 105 if file != extraPackagesFile { 106 continue 107 } 108 109 // Found the manifest. Install extra packages. 110 manifest, err := ioutil.ReadFile(filepath.Join(dir, extraPackagesFile)) 111 if err != nil { 112 return fmt.Errorf("failed to read extra packages manifest file: %v", err) 113 } 114 115 s := bufio.NewScanner(bytes.NewReader(manifest)) 116 s.Split(bufio.ScanLines) 117 118 for s.Scan() { 119 extraPackage := s.Text() 120 log.Printf("Installing extra package: %s", extraPackage) 121 if err = pipInstallPackage(files, dir, extraPackage, true, false, nil); err != nil { 122 return fmt.Errorf("failed to install extra package %s: %v", extraPackage, err) 123 } 124 } 125 return nil 126 } 127 return nil 128 } 129 130 func findBeamSdkWhl(files []string, acceptableWhlSpecs []string) string { 131 for _, file := range files { 132 if strings.HasPrefix(file, "apache_beam") { 133 for _, s := range acceptableWhlSpecs { 134 if strings.HasSuffix(file, s) { 135 log.Printf("Found Apache Beam SDK wheel: %v", file) 136 return file 137 } 138 } 139 } 140 } 141 return "" 142 } 143 144 // InstallSdk installs Beam SDK: First, we try to find a compiled 145 // wheel distribution of Apache Beam among staged files. If we find it, we 146 // assume that the pipleine was started with the Beam SDK found in the wheel 147 // file, and we try to install it. If not successful, we fall back to installing 148 // SDK from source tarball provided in sdkSrcFile. 149 func installSdk(files []string, workDir string, sdkSrcFile string, acceptableWhlSpecs []string, required bool) error { 150 sdkWhlFile := findBeamSdkWhl(files, acceptableWhlSpecs) 151 if sdkWhlFile != "" { 152 err := pipInstallPackage(files, workDir, sdkWhlFile, false, false, []string{"gcp"}) 153 if err == nil { 154 return nil 155 } 156 log.Printf("Could not install Apache Beam SDK from a wheel: %v, proceeding to install SDK from source tarball.", err) 157 } 158 if !required { 159 _, err := os.Stat(filepath.Join(workDir, sdkSrcFile)) 160 if os.IsNotExist(err) { 161 return nil 162 } 163 } 164 err := pipInstallPackage(files, workDir, sdkSrcFile, false, false, []string{"gcp"}) 165 return err 166 }