k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/cluster/gce/windows/k8s-node-setup.psm1 (about)

     1  # Copyright 2019 The Kubernetes Authors.
     2  #
     3  # Licensed under the Apache License, Version 2.0 (the "License");
     4  # you may not use this file except in compliance with the License.
     5  # You may obtain a copy of the License at
     6  #
     7  #     http://www.apache.org/licenses/LICENSE-2.0
     8  #
     9  # Unless required by applicable law or agreed to in writing, software
    10  # distributed under the License is distributed on an "AS IS" BASIS,
    11  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  # See the License for the specific language governing permissions and
    13  # limitations under the License.
    14  
    15  <#
    16  .SYNOPSIS
    17    Library for configuring Windows nodes and joining them to the cluster.
    18  
    19  .NOTES
    20    This module depends on common.psm1.
    21  
    22    Some portions copied / adapted from
    23    https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1.
    24  
    25  .EXAMPLE
    26    Suggested usage for dev/test:
    27      [Net.ServicePointManager]::SecurityProtocol = `
    28          [Net.SecurityProtocolType]::Tls12
    29      Invoke-WebRequest `
    30          https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/k8s-node-setup.psm1 `
    31          -OutFile C:\k8s-node-setup.psm1
    32      Invoke-WebRequest `
    33          https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/configure.ps1 `
    34          -OutFile C:\configure.ps1
    35      Import-Module -Force C:\k8s-node-setup.psm1  # -Force to override existing
    36      # Execute functions manually or run configure.ps1.
    37  #>
    38  
    39  # IMPORTANT PLEASE NOTE:
    40  # Any time the file structure in the `windows` directory changes, `windows/BUILD`
    41  # and `k8s.io/release/lib/releaselib.sh` must be manually updated with the changes.
    42  # We HIGHLY recommend not changing the file structure, because consumers of
    43  # Kubernetes releases depend on the release structure remaining stable.
    44  
    45  # TODO: update scripts for these style guidelines:
    46  #  - Remove {} around variable references unless actually needed for clarity.
    47  #  - Always use single-quoted strings unless actually interpolating variables
    48  #    or using escape characters.
    49  #  - Use "approved verbs":
    50  #    https://docs.microsoft.com/en-us/powershell/developer/cmdlet/approved-verbs-for-windows-powershell-commands
    51  #  - Document functions using proper syntax:
    52  #    https://technet.microsoft.com/en-us/library/hh847834(v=wps.620).aspx
    53  
    54  $GCE_METADATA_SERVER = "169.254.169.254"
    55  # The "management" interface is used by the kubelet and by Windows pods to talk
    56  # to the rest of the Kubernetes cluster *without NAT*. This interface does not
    57  # exist until an initial HNS network has been created on the Windows node - see
    58  # Add_InitialHnsNetwork().
    59  $MGMT_ADAPTER_NAME = "vEthernet (Ethernet*"
    60  $CRICTL_VERSION = 'v1.30.0'
    61  $CRICTL_SHA256 = '43d37d94c0dc03830c0988049537fc22fe4b0ad4273ec9066e03586dc8920eb0'
    62  
    63  Import-Module -Force C:\common.psm1
    64  
    65  # Writes a TODO with $Message to the console.
    66  function Log_Todo {
    67    param (
    68      [parameter(Mandatory=$true)] [string]$Message
    69    )
    70    Log-Output "TODO: ${Message}"
    71  }
    72  
    73  # Writes a not-implemented warning with $Message to the console and exits the
    74  # script.
    75  function Log_NotImplemented {
    76    param (
    77      [parameter(Mandatory=$true)] [string]$Message
    78    )
    79    Log-Output "Not implemented yet: ${Message}" -Fatal
    80  }
    81  
    82  # Fails and exits if the route to the GCE metadata server is not present,
    83  # otherwise does nothing and emits nothing.
    84  function Verify_GceMetadataServerRouteIsPresent {
    85    Try {
    86      Get-NetRoute `
    87          -ErrorAction "Stop" `
    88          -AddressFamily IPv4 `
    89          -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null
    90    } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] {
    91      Log-Output -Fatal `
    92          ("GCE metadata server route is not present as expected.`n" +
    93           "$(Get-NetRoute -AddressFamily IPv4 | Out-String)")
    94    }
    95  }
    96  
    97  # Checks if the route to the GCE metadata server is present. Returns when the
    98  # route is NOT present or after a timeout has expired.
    99  function WaitFor_GceMetadataServerRouteToBeRemoved {
   100    $elapsed = 0
   101    $timeout = 60
   102    Log-Output ("Waiting up to ${timeout} seconds for GCE metadata server " +
   103                "route to be removed")
   104    while (${elapsed} -lt ${timeout}) {
   105      Try {
   106        Get-NetRoute `
   107            -ErrorAction "Stop" `
   108            -AddressFamily IPv4 `
   109            -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null
   110      } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] {
   111        break
   112      }
   113      $sleeptime = 2
   114      Start-Sleep ${sleeptime}
   115      ${elapsed} += ${sleeptime}
   116    }
   117  }
   118  
   119  # Adds a route to the GCE metadata server to every network interface.
   120  function Add_GceMetadataServerRoute {
   121    # Before setting up HNS the Windows VM has a "vEthernet (nat)" interface and
   122    # a "Ethernet" interface, and the route to the metadata server exists on the
   123    # Ethernet interface. After adding the HNS network a "vEthernet (Ethernet)"
   124    # interface is added, and it seems to subsume the routes of the "Ethernet"
   125    # interface (trying to add routes on the Ethernet interface at this point just
   126    # results in "New-NetRoute : Element not found" errors). I don't know what's
   127    # up with that, but since it's hard to know what's the right thing to do here
   128    # we just try to add the route on all of the network adapters.
   129    Get-NetAdapter | ForEach-Object {
   130      $adapter_index = $_.InterfaceIndex
   131      New-NetRoute `
   132          -ErrorAction Ignore `
   133          -DestinationPrefix "${GCE_METADATA_SERVER}/32" `
   134          -InterfaceIndex ${adapter_index} | Out-Null
   135    }
   136  }
   137  
   138  # Returns a PowerShell object representing the Windows version.
   139  function Get_WindowsVersion {
   140    # Unlike checking `[System.Environment]::OSVersion.Version`, this long-winded
   141    # approach gets the OS revision/patch number correctly
   142    # (https://superuser.com/a/1160428/652018).
   143    $win_ver = New-Object -TypeName PSObject
   144    $win_ver | Add-Member -MemberType NoteProperty -Name Major -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' CurrentMajorVersionNumber).CurrentMajorVersionNumber
   145    $win_ver | Add-Member -MemberType NoteProperty -Name Minor -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' CurrentMinorVersionNumber).CurrentMinorVersionNumber
   146    $win_ver | Add-Member -MemberType NoteProperty -Name Build -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' CurrentBuild).CurrentBuild
   147    $win_ver | Add-Member -MemberType NoteProperty -Name Revision -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' UBR).UBR
   148    return $win_ver
   149  }
   150  
   151  # Writes debugging information, such as Windows version and patch info, to the
   152  # console.
   153  function Dump-DebugInfoToConsole {
   154    Try {
   155      $version = Get_WindowsVersion | Out-String
   156      $hotfixes = "$(Get-Hotfix | Out-String)"
   157      $image = "$(Get-InstanceMetadata 'image' | Out-String)"
   158      Log-Output "Windows version:`n$version"
   159      Log-Output "Installed hotfixes:`n$hotfixes"
   160      Log-Output "GCE Windows image:`n$image"
   161    } Catch { }
   162  }
   163  
   164  # Configures Window Defender preferences
   165  function Configure-WindowsDefender {
   166    if ((Get-WindowsFeature -Name 'Windows-Defender').Installed) {
   167      Log-Output "Configuring Windows Defender preferences"
   168      Set-MpPreference -SubmitSamplesConsent NeverSend
   169      Log-Output "Disabling Windows Defender sample submission"
   170      Set-MpPreference -MAPSReporting Disabled
   171      Log-Output "Disabling Windows Defender Microsoft Active Protection Service Reporting"
   172  
   173      Log-Output "Defender Preferences"
   174      Get-MpPreference
   175    }
   176  }
   177  
   178  # Converts the kube-env string in Yaml
   179  #
   180  # Returns: a PowerShell Hashtable object containing the key-value pairs from
   181  #   kube-env.
   182  function ConvertFrom_Yaml_KubeEnv {
   183    param (
   184      [parameter(Mandatory=$true)] [string]$kube_env_str
   185    )
   186    $kube_env_table = @{}
   187    $currentLine = $null
   188    switch -regex (${kube_env_str} -split '\r?\n') {
   189        '^(\S.*)' {
   190            # record start pattern, line that doesn't start with a whitespace
   191            if ($null -ne $currentLine) {
   192                $key, $val = $currentLine -split ":",2
   193                $kube_env_table[$key] = $val.Trim("'", " ", "`"")
   194            }
   195            $currentLine = $matches.1
   196            continue
   197        }
   198  
   199        '^(\s+.*)' {
   200            # line that start with whitespace
   201            $currentLine += $matches.1
   202            continue
   203        }
   204    }
   205  
   206    # Handle the last line if any
   207    if ($currentLine) {
   208        $key, $val = $currentLine -split ":",2
   209        $kube_env_table[$key] = $val.Trim("'", " ", "`"")
   210    }
   211  
   212    return ${kube_env_table}
   213  }
   214  
   215  # Fetches the kube-env from the instance metadata.
   216  #
   217  # Returns: a PowerShell Hashtable object containing the key-value pairs from
   218  #   kube-env.
   219  function Fetch-KubeEnv {
   220    # Testing / debugging:
   221    # First:
   222    #   ${kube_env} = Get-InstanceMetadataAttribute 'kube-env'
   223    # or:
   224    #   ${kube_env} = [IO.File]::ReadAllText(".\kubeEnv.txt")
   225    # ${kube_env_table} = ConvertFrom_Yaml_KubeEnv ${kube_env}
   226    # ${kube_env_table}
   227    # ${kube_env_table}.GetType()
   228  
   229    # The type of kube_env is a powershell String.
   230    $kube_env = Get-InstanceMetadataAttribute 'kube-env'
   231    $kube_env_table = ConvertFrom_Yaml_KubeEnv ${kube_env}
   232  
   233    Log-Output "Logging kube-env key-value pairs except CERT and KEY values"
   234    foreach ($entry in $kube_env_table.GetEnumerator()) {
   235      if ((-not ($entry.Name.contains("CERT"))) -and (-not ($entry.Name.contains("KEY")))) {
   236        Log-Output "$($entry.Name): $($entry.Value)"
   237      }
   238    }
   239    return ${kube_env_table}
   240  }
   241  
   242  # Sets the environment variable $Key to $Value at the Machine scope (will
   243  # be present in the environment for all new shells after a reboot).
   244  function Set_MachineEnvironmentVar {
   245    param (
   246      [parameter(Mandatory=$true)] [string]$Key,
   247      [parameter(Mandatory=$true)] [AllowEmptyString()] [string]$Value
   248    )
   249    [Environment]::SetEnvironmentVariable($Key, $Value, "Machine")
   250  }
   251  
   252  # Sets the environment variable $Key to $Value in the current shell.
   253  function Set_CurrentShellEnvironmentVar {
   254    param (
   255      [parameter(Mandatory=$true)] [string]$Key,
   256      [parameter(Mandatory=$true)] [AllowEmptyString()] [string]$Value
   257    )
   258    $expression = '$env:' + $Key + ' = "' + $Value + '"'
   259    Invoke-Expression ${expression}
   260  }
   261  
   262  # Sets environment variables used by Kubernetes binaries and by other functions
   263  # in this module. Depends on numerous ${kube_env} keys.
   264  function Set-EnvironmentVars {
   265    if ($kube_env.ContainsKey('WINDOWS_CONTAINER_RUNTIME_ENDPOINT')) {
   266        $container_runtime_endpoint = ${kube_env}['WINDOWS_CONTAINER_RUNTIME_ENDPOINT']
   267    } else {
   268        Log-Output "ERROR: WINDOWS_CONTAINER_RUNTIME_ENDPOINT not set in kube-env, falling back in CONTAINER_RUNTIME_ENDPOINT"
   269        $container_runtime_endpoint = ${kube_env}['CONTAINER_RUNTIME_ENDPOINT']
   270    }
   271    # Turning the kube-env values into environment variables is not required but
   272    # it makes debugging this script easier, and it also makes the syntax a lot
   273    # easier (${env:K8S_DIR} can be expanded within a string but
   274    # ${kube_env}['K8S_DIR'] cannot be afaik).
   275    $env_vars = @{
   276      "K8S_DIR" = ${kube_env}['K8S_DIR']
   277      # Typically 'C:\etc\kubernetes\node\bin' (not just 'C:\etc\kubernetes\node')
   278      "NODE_DIR" = ${kube_env}['NODE_DIR']
   279      "CNI_DIR" = ${kube_env}['CNI_DIR']
   280      "CNI_CONFIG_DIR" = ${kube_env}['CNI_CONFIG_DIR']
   281      "WINDOWS_CNI_STORAGE_PATH" = ${kube_env}['WINDOWS_CNI_STORAGE_PATH']
   282      "WINDOWS_CNI_VERSION" = ${kube_env}['WINDOWS_CNI_VERSION']
   283      "CSI_PROXY_STORAGE_PATH" = ${kube_env}['CSI_PROXY_STORAGE_PATH']
   284      "CSI_PROXY_VERSION" = ${kube_env}['CSI_PROXY_VERSION']
   285      "CSI_PROXY_FLAGS" = ${kube_env}['CSI_PROXY_FLAGS']
   286      "ENABLE_CSI_PROXY" = ${kube_env}['ENABLE_CSI_PROXY']
   287      "PKI_DIR" = ${kube_env}['PKI_DIR']
   288      "CA_FILE_PATH" = ${kube_env}['CA_FILE_PATH']
   289      "KUBELET_CONFIG" = ${kube_env}['KUBELET_CONFIG_FILE']
   290      "BOOTSTRAP_KUBECONFIG" = ${kube_env}['BOOTSTRAP_KUBECONFIG_FILE']
   291      "KUBECONFIG" = ${kube_env}['KUBECONFIG_FILE']
   292      "KUBEPROXY_KUBECONFIG" = ${kube_env}['KUBEPROXY_KUBECONFIG_FILE']
   293      "LOGS_DIR" = ${kube_env}['LOGS_DIR']
   294      "MANIFESTS_DIR" = ${kube_env}['MANIFESTS_DIR']
   295      "INFRA_CONTAINER" = ${kube_env}['WINDOWS_INFRA_CONTAINER']
   296      "WINDOWS_ENABLE_PIGZ" = ${kube_env}['WINDOWS_ENABLE_PIGZ']
   297      "WINDOWS_ENABLE_HYPERV" = ${kube_env}['WINDOWS_ENABLE_HYPERV']
   298      "ENABLE_NODE_PROBLEM_DETECTOR" = ${kube_env}['ENABLE_NODE_PROBLEM_DETECTOR']
   299      "NODEPROBLEMDETECTOR_KUBECONFIG_FILE" = ${kube_env}['WINDOWS_NODEPROBLEMDETECTOR_KUBECONFIG_FILE']
   300      "ENABLE_AUTH_PROVIDER_GCP" = ${kube_env}['ENABLE_AUTH_PROVIDER_GCP']
   301      "AUTH_PROVIDER_GCP_STORAGE_PATH" = ${kube_env}['AUTH_PROVIDER_GCP_STORAGE_PATH']
   302      "AUTH_PROVIDER_GCP_VERSION" = ${kube_env}['AUTH_PROVIDER_GCP_VERSION']
   303      "AUTH_PROVIDER_GCP_HASH_WINDOWS_AMD64" = ${kube_env}['AUTH_PROVIDER_GCP_HASH_WINDOWS_AMD64']
   304      "AUTH_PROVIDER_GCP_WINDOWS_BIN_DIR" = ${kube_env}['AUTH_PROVIDER_GCP_WINDOWS_BIN_DIR']
   305      "AUTH_PROVIDER_GCP_WINDOWS_CONF_FILE" = ${kube_env}['AUTH_PROVIDER_GCP_WINDOWS_CONF_FILE']
   306  
   307      "Path" = ${env:Path} + ";" + ${kube_env}['NODE_DIR']
   308      "KUBE_NETWORK" = "l2bridge".ToLower()
   309      "KUBELET_CERT_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.crt'
   310      "KUBELET_KEY_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.key'
   311  
   312      "CONTAINER_RUNTIME_ENDPOINT" = $container_runtime_endpoint
   313  
   314      'LICENSE_DIR' = 'C:\Program Files\Google\Compute Engine\THIRD_PARTY_NOTICES'
   315    }
   316  
   317    # Set the environment variables in two ways: permanently on the machine (only
   318    # takes effect after a reboot), and in the current shell.
   319    $env_vars.GetEnumerator() | ForEach-Object{
   320      $message = "Setting environment variable: " + $_.key + " = " + $_.value
   321      Log-Output ${message}
   322      Set_MachineEnvironmentVar $_.key $_.value
   323      Set_CurrentShellEnvironmentVar $_.key $_.value
   324    }
   325  }
   326  
   327  # Configures various settings and prerequisites needed for the rest of the
   328  # functions in this module and the Kubernetes binaries to operate properly.
   329  function Set-PrerequisiteOptions {
   330    # Windows updates cause the node to reboot at arbitrary times.
   331    Log-Output "Disabling Windows Update service"
   332    & sc.exe config wuauserv start=disabled
   333    & sc.exe stop wuauserv
   334    Write-VerboseServiceInfoToConsole -Service 'wuauserv' -Delay 1
   335  
   336    # Use TLS 1.2: needed for Invoke-WebRequest downloads from github.com.
   337    [Net.ServicePointManager]::SecurityProtocol = `
   338        [Net.SecurityProtocolType]::Tls12
   339  
   340    Configure-WindowsDefender
   341  }
   342  
   343  # Creates directories where other functions in this module will read and write
   344  # data.
   345  # Note: C:\tmp is required for running certain kubernetes tests.
   346  #       C:\var\log is used by kubelet to stored container logs and also
   347  #       hard-coded in the fluentd/stackdriver config for log collection.
   348  function Create-Directories {
   349    Log-Output "Creating ${env:K8S_DIR} and its subdirectories."
   350    ForEach ($dir in ("${env:K8S_DIR}", "${env:NODE_DIR}", "${env:LOGS_DIR}",
   351      "${env:CNI_DIR}", "${env:CNI_CONFIG_DIR}", "${env:MANIFESTS_DIR}",
   352      "${env:PKI_DIR}", "${env:LICENSE_DIR}"), "C:\tmp", "C:\var\log") {
   353      mkdir -Force $dir
   354    }
   355  }
   356  
   357  # Downloads some external helper scripts needed by other functions in this
   358  # module.
   359  function Download-HelperScripts {
   360    if (ShouldWrite-File ${env:K8S_DIR}\hns.psm1) {
   361      MustDownload-File `
   362          -OutFile ${env:K8S_DIR}\hns.psm1 `
   363          -URLs 'https://storage.googleapis.com/gke-release/winnode/config/sdn/master/hns.psm1'
   364    }
   365  }
   366  
   367  # Downloads the Kubernetes binaries from kube-env's NODE_BINARY_TAR_URL and
   368  # puts them in a subdirectory of $env:K8S_DIR.
   369  #
   370  # Required ${kube_env} keys:
   371  #   NODE_BINARY_TAR_URL
   372  function DownloadAndInstall-KubernetesBinaries {
   373    # Assume that presence of kubelet.exe indicates that the kubernetes binaries
   374    # were already previously downloaded to this node.
   375    if (-not (ShouldWrite-File ${env:NODE_DIR}\kubelet.exe)) {
   376      return
   377    }
   378  
   379    $tmp_dir = 'C:\k8s_tmp'
   380    New-Item -Force -ItemType 'directory' $tmp_dir | Out-Null
   381  
   382    $urls = ${kube_env}['NODE_BINARY_TAR_URL'].Split(",")
   383    $filename = Split-Path -leaf $urls[0]
   384    $hash = $null
   385    if ($kube_env.ContainsKey('NODE_BINARY_TAR_HASH')) {
   386      $hash = ${kube_env}['NODE_BINARY_TAR_HASH']
   387    }
   388    MustDownload-File -Hash $hash -OutFile $tmp_dir\$filename -URLs $urls
   389  
   390    tar xzvf $tmp_dir\$filename -C $tmp_dir
   391    Move-Item -Force $tmp_dir\kubernetes\node\bin\* ${env:NODE_DIR}\
   392    Move-Item -Force `
   393        $tmp_dir\kubernetes\LICENSES ${env:LICENSE_DIR}\LICENSES_kubernetes
   394  
   395    # Clean up the temporary directory
   396    Remove-Item -Force -Recurse $tmp_dir
   397  }
   398  
   399  # Downloads the csi-proxy binaries from kube-env's CSI_PROXY_STORAGE_PATH and
   400  # CSI_PROXY_VERSION, and then puts them in a subdirectory of $env:NODE_DIR.
   401  # Note: for now the installation is skipped for non-test clusters. Will be
   402  # installed for all cluster after tests pass.
   403  # Required ${kube_env} keys:
   404  #   CSI_PROXY_STORAGE_PATH and CSI_PROXY_VERSION
   405  function DownloadAndInstall-CSIProxyBinaries {
   406    if ("${env:ENABLE_CSI_PROXY}" -eq "true") {
   407      if (ShouldWrite-File ${env:NODE_DIR}\csi-proxy.exe) {
   408        $tmp_dir = 'C:\k8s_tmp'
   409        New-Item -Force -ItemType 'directory' $tmp_dir | Out-Null
   410        $filename = 'csi-proxy.exe'
   411        $urls = "${env:CSI_PROXY_STORAGE_PATH}/${env:CSI_PROXY_VERSION}/$filename"
   412        MustDownload-File -OutFile $tmp_dir\$filename -URLs $urls
   413        Move-Item -Force $tmp_dir\$filename ${env:NODE_DIR}\$filename
   414        # Clean up the temporary directory
   415        Remove-Item -Force -Recurse $tmp_dir
   416      }
   417    }
   418  }
   419  
   420  function Start-CSIProxy {
   421    if ("${env:ENABLE_CSI_PROXY}" -eq "true") {
   422      Log-Output "Creating CSI Proxy Service"
   423      $flags = "-windows-service -log_file=${env:LOGS_DIR}\csi-proxy.log -logtostderr=false ${env:CSI_PROXY_FLAGS}"
   424      & sc.exe create csiproxy binPath= "${env:NODE_DIR}\csi-proxy.exe $flags"
   425      & sc.exe failure csiproxy reset= 0 actions= restart/10000
   426      Log-Output "Starting CSI Proxy Service"
   427      & sc.exe start csiproxy
   428      Write-VerboseServiceInfoToConsole -Service 'csiproxy' -Delay 1
   429    }
   430  }
   431  
   432  # TODO(pjh): this is copied from
   433  # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
   434  # See if there's a way to fetch or construct the "management subnet" so that
   435  # this is not needed.
   436  function ConvertTo_DecimalIP
   437  {
   438    param(
   439      [parameter(Mandatory = $true, Position = 0)]
   440      [Net.IPAddress] $IPAddress
   441    )
   442  
   443    $i = 3; $decimal_ip = 0;
   444    $IPAddress.GetAddressBytes() | % {
   445      $decimal_ip += $_ * [Math]::Pow(256, $i); $i--
   446    }
   447    return [UInt32]$decimal_ip
   448  }
   449  
   450  # TODO(pjh): this is copied from
   451  # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
   452  # See if there's a way to fetch or construct the "management subnet" so that
   453  # this is not needed.
   454  function ConvertTo_DottedDecimalIP
   455  {
   456    param(
   457      [parameter(Mandatory = $true, Position = 0)]
   458      [Uint32] $IPAddress
   459    )
   460  
   461    $dotted_ip = $(for ($i = 3; $i -gt -1; $i--) {
   462      $remainder = $IPAddress % [Math]::Pow(256, $i)
   463      ($IPAddress - $remainder) / [Math]::Pow(256, $i)
   464      $IPAddress = $remainder
   465    })
   466    return [String]::Join(".", $dotted_ip)
   467  }
   468  
   469  # TODO(pjh): this is copied from
   470  # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
   471  # See if there's a way to fetch or construct the "management subnet" so that
   472  # this is not needed.
   473  function ConvertTo_MaskLength
   474  {
   475    param(
   476      [parameter(Mandatory = $True, Position = 0)]
   477      [Net.IPAddress] $SubnetMask
   478    )
   479  
   480    $bits = "$($SubnetMask.GetAddressBytes() | % {
   481      [Convert]::ToString($_, 2)
   482    } )" -replace "[\s0]"
   483    return $bits.Length
   484  }
   485  
   486  # Returns a network adapter object for the "management" interface via which the
   487  # Windows pods+kubelet will communicate with the rest of the Kubernetes cluster.
   488  #
   489  # This function will fail if Add_InitialHnsNetwork() has not been called first.
   490  function Get_MgmtNetAdapter {
   491    $net_adapter = Get-NetAdapter | Where-Object Name -like ${MGMT_ADAPTER_NAME}
   492    if (-not ${net_adapter}) {
   493      Throw ("Failed to find a suitable network adapter, check your network " +
   494             "settings.")
   495    }
   496  
   497    return $net_adapter
   498  }
   499  
   500  # Decodes the base64 $Data string and writes it as binary to $File. Does
   501  # nothing if $File already exists and $REDO_STEPS is not set.
   502  function Write_PkiData {
   503    param (
   504      [parameter(Mandatory=$true)] [string] $Data,
   505      [parameter(Mandatory=$true)] [string] $File
   506    )
   507  
   508    if (-not (ShouldWrite-File $File)) {
   509      return
   510    }
   511  
   512    # This command writes out a PEM certificate file, analogous to "base64
   513    # --decode" on Linux. See https://stackoverflow.com/a/51914136/1230197.
   514    [IO.File]::WriteAllBytes($File, [Convert]::FromBase64String($Data))
   515    Log_Todo ("need to set permissions correctly on ${File}; not sure what the " +
   516              "Windows equivalent of 'umask 077' is")
   517    # Linux: owned by root, rw by user only.
   518    #   -rw------- 1 root root 1.2K Oct 12 00:56 ca-certificates.crt
   519    #   -rw------- 1 root root 1.3K Oct 12 00:56 kubelet.crt
   520    #   -rw------- 1 root root 1.7K Oct 12 00:56 kubelet.key
   521    # Windows:
   522    #   https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes
   523    #   https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes
   524  }
   525  
   526  # Creates the node PKI files in $env:PKI_DIR.
   527  #
   528  # Required ${kube_env} keys:
   529  #   CA_CERT
   530  # ${kube_env} keys that can be omitted for nodes that do not use an
   531  # authentication plugin:
   532  #   KUBELET_CERT
   533  #   KUBELET_KEY
   534  function Create-NodePki {
   535    Log-Output 'Creating node pki files'
   536  
   537    if ($kube_env.ContainsKey('CA_CERT')) {
   538      $CA_CERT_BUNDLE = ${kube_env}['CA_CERT']
   539      Write_PkiData "${CA_CERT_BUNDLE}" ${env:CA_FILE_PATH}
   540    }
   541    else {
   542      Log-Output -Fatal 'CA_CERT not present in kube-env'
   543    }
   544  
   545    if ($kube_env.ContainsKey('KUBELET_CERT')) {
   546      $KUBELET_CERT = ${kube_env}['KUBELET_CERT']
   547      Write_PkiData "${KUBELET_CERT}" ${env:KUBELET_CERT_PATH}
   548    }
   549    else {
   550      Log-Output -Fatal 'KUBELET_CERT not present in kube-env'
   551    }
   552    if ($kube_env.ContainsKey('KUBELET_KEY')) {
   553      $KUBELET_KEY = ${kube_env}['KUBELET_KEY']
   554      Write_PkiData "${KUBELET_KEY}" ${env:KUBELET_KEY_PATH}
   555    }
   556    else {
   557      Log-Output -Fatal 'KUBELET_KEY not present in kube-env'
   558    }
   559  
   560    Get-ChildItem ${env:PKI_DIR}
   561  }
   562  
   563  # Creates the bootstrap kubelet kubeconfig at $env:BOOTSTRAP_KUBECONFIG.
   564  # https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet-tls-bootstrapping/
   565  #
   566  # Create-NodePki() must be called first.
   567  #
   568  # Required ${kube_env} keys:
   569  #   KUBERNETES_MASTER_NAME: the apiserver IP address.
   570  function Write_BootstrapKubeconfig {
   571    if (-not (ShouldWrite-File ${env:BOOTSTRAP_KUBECONFIG})) {
   572      return
   573    }
   574  
   575    # TODO(mtaufen): is user "kubelet" correct? Other examples use e.g.
   576    # "system:node:$(hostname)".
   577  
   578    $apiserverAddress = ${kube_env}['KUBERNETES_MASTER_NAME']
   579    New-Item -Force -ItemType file ${env:BOOTSTRAP_KUBECONFIG} | Out-Null
   580    Set-Content ${env:BOOTSTRAP_KUBECONFIG} `
   581  'apiVersion: v1
   582  kind: Config
   583  users:
   584  - name: kubelet
   585    user:
   586      client-certificate: KUBELET_CERT_PATH
   587      client-key: KUBELET_KEY_PATH
   588  clusters:
   589  - name: local
   590    cluster:
   591      server: https://APISERVER_ADDRESS
   592      certificate-authority: CA_FILE_PATH
   593  contexts:
   594  - context:
   595      cluster: local
   596      user: kubelet
   597    name: service-account-context
   598  current-context: service-account-context'.`
   599      replace('KUBELET_CERT_PATH', ${env:KUBELET_CERT_PATH}).`
   600      replace('KUBELET_KEY_PATH', ${env:KUBELET_KEY_PATH}).`
   601      replace('APISERVER_ADDRESS', ${apiserverAddress}).`
   602      replace('CA_FILE_PATH', ${env:CA_FILE_PATH})
   603    Log-Output ("kubelet bootstrap kubeconfig:`n" +
   604                "$(Get-Content -Raw ${env:BOOTSTRAP_KUBECONFIG})")
   605  }
   606  
   607  # Fetches the kubelet kubeconfig from the metadata server and writes it to
   608  # $env:KUBECONFIG.
   609  #
   610  # Create-NodePki() must be called first.
   611  function Write_KubeconfigFromMetadata {
   612    if (-not (ShouldWrite-File ${env:KUBECONFIG})) {
   613      return
   614    }
   615  
   616    $kubeconfig = Get-InstanceMetadataAttribute 'kubeconfig'
   617    if ($kubeconfig -eq $null) {
   618      Log-Output `
   619          "kubeconfig metadata key not found, can't write ${env:KUBECONFIG}" `
   620          -Fatal
   621    }
   622    Set-Content ${env:KUBECONFIG} $kubeconfig
   623    Log-Output ("kubelet kubeconfig from metadata (non-bootstrap):`n" +
   624                "$(Get-Content -Raw ${env:KUBECONFIG})")
   625  }
   626  
   627  # Creates the kubelet kubeconfig at $env:KUBECONFIG for nodes that use an
   628  # authentication plugin, or at $env:BOOTSTRAP_KUBECONFIG for nodes that do not.
   629  #
   630  # Create-NodePki() must be called first.
   631  #
   632  # Required ${kube_env} keys:
   633  #   KUBERNETES_MASTER_NAME: the apiserver IP address.
   634  function Create-KubeletKubeconfig {
   635    Write_BootstrapKubeconfig
   636  }
   637  
   638  # Creates the kubeconfig user file for applications that communicate with Kubernetes.
   639  #
   640  # Create-NodePki() must be called first.
   641  #
   642  # Required ${kube_env} keys:
   643  #   CA_CERT
   644  #   KUBERNETES_MASTER_NAME
   645  function Create-Kubeconfig {
   646    param (
   647      [parameter(Mandatory=$true)] [string]$Name,
   648      [parameter(Mandatory=$true)] [string]$Path,
   649      [parameter(Mandatory=$true)] [string]$Token
   650    )
   651    if (-not (ShouldWrite-File $Path)) {
   652      return
   653    }
   654  
   655    New-Item -Force -ItemType file $Path | Out-Null
   656  
   657    # In configure-helper.sh kubelet kubeconfig uses certificate-authority while
   658    # kubeproxy kubeconfig uses certificate-authority-data, ugh. Does it matter?
   659    # Use just one or the other for consistency?
   660    Set-Content $Path `
   661  'apiVersion: v1
   662  kind: Config
   663  users:
   664  - name: APP_NAME
   665    user:
   666      token: APP_TOKEN
   667  clusters:
   668  - name: local
   669    cluster:
   670      server: https://APISERVER_ADDRESS
   671      certificate-authority-data: CA_CERT
   672  contexts:
   673  - context:
   674      cluster: local
   675      user: APP_NAME
   676    name: service-account-context
   677  current-context: service-account-context'.`
   678    replace('APP_NAME', $Name).`
   679    replace('APP_TOKEN', $Token).`
   680    replace('CA_CERT', ${kube_env}['CA_CERT']).`
   681    replace('APISERVER_ADDRESS', ${kube_env}['KUBERNETES_MASTER_NAME'])
   682  
   683    Log-Output ("${Name} kubeconfig:`n" +
   684                "$(Get-Content -Raw ${Path})")
   685  }
   686  
   687  # Creates the kube-proxy user kubeconfig file at $env:KUBEPROXY_KUBECONFIG.
   688  #
   689  # Create-NodePki() must be called first.
   690  #
   691  # Required ${kube_env} keys:
   692  #   CA_CERT
   693  #   KUBE_PROXY_TOKEN
   694  function Create-KubeproxyKubeconfig {
   695    Create-Kubeconfig -Name 'kube-proxy' `
   696      -Path ${env:KUBEPROXY_KUBECONFIG} `
   697      -Token ${kube_env}['KUBE_PROXY_TOKEN']
   698  }
   699  
   700  # Returns the IP alias range configured for this GCE instance.
   701  function Get_IpAliasRange {
   702    $url = ("http://${GCE_METADATA_SERVER}/computeMetadata/v1/instance/" +
   703            "network-interfaces/0/ip-aliases/0")
   704    $client = New-Object Net.WebClient
   705    $client.Headers.Add('Metadata-Flavor', 'Google')
   706    return ($client.DownloadString($url)).Trim()
   707  }
   708  
   709  # Retrieves the pod CIDR and sets it in $env:POD_CIDR.
   710  function Set-PodCidr {
   711    while($true) {
   712      $pod_cidr = Get_IpAliasRange
   713      if (-not $?) {
   714        Log-Output ${pod_cIDR}
   715        Log-Output "Retrying Get_IpAliasRange..."
   716        Start-Sleep -sec 1
   717        continue
   718      }
   719      break
   720    }
   721  
   722    Log-Output "fetched pod CIDR (same as IP alias range): ${pod_cidr}"
   723    Set_MachineEnvironmentVar "POD_CIDR" ${pod_cidr}
   724    Set_CurrentShellEnvironmentVar "POD_CIDR" ${pod_cidr}
   725  }
   726  
   727  # Adds an initial HNS network on the Windows node which forces the creation of
   728  # a virtual switch and the "management" interface that will be used to
   729  # communicate with the rest of the Kubernetes cluster without NAT.
   730  #
   731  # Note that adding the initial HNS network may cause connectivity to the GCE
   732  # metadata server to be lost due to a Windows bug.
   733  # Configure-HostNetworkingService() restores connectivity, look there for
   734  # details.
   735  #
   736  # Download-HelperScripts() must have been called first.
   737  function Add_InitialHnsNetwork {
   738    $INITIAL_HNS_NETWORK = 'External'
   739  
   740    # This comes from
   741    # https://github.com/Microsoft/SDN/blob/master/Kubernetes/flannel/l2bridge/start.ps1#L74
   742    # (or
   743    # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L206).
   744    #
   745    # daschott noted on Slack: "L2bridge networks require an external vSwitch.
   746    # The first network ("External") with hardcoded values in the script is just
   747    # a placeholder to create an external vSwitch. This is purely for convenience
   748    # to be able to remove/modify the actual HNS network ("cbr0") or rejoin the
   749    # nodes without a network blip. Creating a vSwitch takes time, causes network
   750    # blips, and it makes it more likely to hit the issue where flanneld is
   751    # stuck, so we want to do this as rarely as possible."
   752    $hns_network = Get-HnsNetwork | Where-Object Name -eq $INITIAL_HNS_NETWORK
   753    if ($hns_network) {
   754      if ($REDO_STEPS) {
   755        Log-Output ("Warning: initial '$INITIAL_HNS_NETWORK' HNS network " +
   756                    "already exists, removing it and recreating it")
   757        $hns_network | Remove-HnsNetwork
   758        $hns_network = $null
   759      }
   760      else {
   761        Log-Output ("Skip: initial '$INITIAL_HNS_NETWORK' HNS network " +
   762                    "already exists, not recreating it")
   763        return
   764      }
   765    }
   766    Log-Output ("Creating initial HNS network to force creation of " +
   767                "${MGMT_ADAPTER_NAME} interface")
   768    # Note: RDP connection will hiccup when running this command.
   769    New-HNSNetwork `
   770        -Type "L2Bridge" `
   771        -AddressPrefix "192.168.255.0/30" `
   772        -Gateway "192.168.255.1" `
   773        -Name $INITIAL_HNS_NETWORK `
   774        -Verbose
   775  }
   776  
   777  # Get the network in uint32 for the given cidr
   778  function Get_NetworkDecimal_From_CIDR([string] $cidr) {
   779    $network, [int]$subnetlen = $cidr.Split('/')
   780    $decimal_network = ConvertTo_DecimalIP($network)
   781    return $decimal_network
   782  }
   783  
   784  # Get gateway ip string (the first address) based on pod cidr.
   785  # For Windows nodes the pod gateway IP address is the first address in the pod
   786  # CIDR for the host.
   787  function Get_Gateway_From_CIDR([string] $cidr) {
   788    $network=Get_NetworkDecimal_From_CIDR($cidr)
   789    $gateway=ConvertTo_DottedDecimalIP($network+1)
   790    return $gateway
   791  }
   792  
   793  # Get endpoint gateway ip string (the second address) based on pod cidr.
   794  # For Windows nodes the pod gateway IP address is the first address in the pod
   795  # CIDR for the host, but from inside containers it's the second address.
   796  function Get_Endpoint_Gateway_From_CIDR([string] $cidr) {
   797    $network=Get_NetworkDecimal_From_CIDR($cidr)
   798    $gateway=ConvertTo_DottedDecimalIP($network+2)
   799    return $gateway
   800  }
   801  
   802  # Get pod IP range start based (the third address) on pod cidr
   803  # We reserve the first two in the cidr range for gateways. Start the cidr
   804  # range from the third so that IPAM does not allocate those IPs to pods.
   805  function Get_PodIP_Range_Start([string] $cidr) {
   806    $network=Get_NetworkDecimal_From_CIDR($cidr)
   807    $start=ConvertTo_DottedDecimalIP($network+3)
   808    return $start
   809  }
   810  
   811  # Configures HNS on the Windows node to enable Kubernetes networking:
   812  #   - Creates the "management" interface associated with an initial HNS network.
   813  #   - Creates the HNS network $env:KUBE_NETWORK for pod networking.
   814  #   - Creates an HNS endpoint for pod networking.
   815  #   - Adds necessary routes on the management interface.
   816  #   - Verifies that the GCE metadata server connection remains intact.
   817  #
   818  # Prerequisites:
   819  #   $env:POD_CIDR is set (by Set-PodCidr).
   820  #   Download-HelperScripts() has been called.
   821  function Configure-HostNetworkingService {
   822    Import-Module -Force ${env:K8S_DIR}\hns.psm1
   823  
   824    Add_InitialHnsNetwork
   825  
   826    $pod_gateway = Get_Gateway_From_CIDR(${env:POD_CIDR})
   827    $pod_endpoint_gateway = Get_Endpoint_Gateway_From_CIDR(${env:POD_CIDR})
   828    Log-Output ("Setting up Windows node HNS networking: " +
   829                "podCidr = ${env:POD_CIDR}, podGateway = ${pod_gateway}, " +
   830                "podEndpointGateway = ${pod_endpoint_gateway}")
   831  
   832    $hns_network = Get-HnsNetwork | Where-Object Name -eq ${env:KUBE_NETWORK}
   833    if ($hns_network) {
   834      if ($REDO_STEPS) {
   835        Log-Output ("Warning: ${env:KUBE_NETWORK} HNS network already exists, " +
   836                    "removing it and recreating it")
   837        $hns_network | Remove-HnsNetwork
   838        $hns_network = $null
   839      }
   840      else {
   841        Log-Output "Skip: ${env:KUBE_NETWORK} HNS network already exists"
   842      }
   843    }
   844    $created_hns_network = $false
   845    if (-not $hns_network) {
   846      # Note: RDP connection will hiccup when running this command.
   847      $hns_network = New-HNSNetwork `
   848          -Type "L2Bridge" `
   849          -AddressPrefix ${env:POD_CIDR} `
   850          -Gateway ${pod_gateway} `
   851          -Name ${env:KUBE_NETWORK} `
   852          -Verbose
   853      $created_hns_network = $true
   854    }
   855    # This name of endpoint is referred in pkg/proxy/winkernel/proxier.go as part of
   856    # kube-proxy as well. A health check port for every service that is specified as
   857    # "externalTrafficPolicy: local" will be added on the endpoint.
   858    # PLEASE KEEP THEM CONSISTENT!!!
   859    $endpoint_name = "cbr0"
   860  
   861    $vnic_name = "vEthernet (${endpoint_name})"
   862  
   863    $hns_endpoint = Get-HnsEndpoint | Where-Object Name -eq $endpoint_name
   864    # Note: we don't expect to ever enter this block currently - while the HNS
   865    # network does seem to persist across reboots, the HNS endpoints do not.
   866    if ($hns_endpoint) {
   867      if ($REDO_STEPS) {
   868        Log-Output ("Warning: HNS endpoint $endpoint_name already exists, " +
   869                    "removing it and recreating it")
   870        $hns_endpoint | Remove-HnsEndpoint
   871        $hns_endpoint = $null
   872      }
   873      else {
   874        Log-Output "Skip: HNS endpoint $endpoint_name already exists"
   875      }
   876    }
   877    if (-not $hns_endpoint) {
   878      $hns_endpoint = New-HnsEndpoint `
   879          -NetworkId ${hns_network}.Id `
   880          -Name ${endpoint_name} `
   881          -IPAddress ${pod_endpoint_gateway} `
   882          -Gateway "0.0.0.0" `
   883          -Verbose
   884      # TODO(pjh): find out: why is this always CompartmentId 1?
   885      Attach-HnsHostEndpoint `
   886          -EndpointID ${hns_endpoint}.Id `
   887          -CompartmentID 1 `
   888          -Verbose
   889      netsh interface ipv4 set interface "${vnic_name}" forwarding=enabled
   890    }
   891  
   892    Try {
   893      Get-HNSPolicyList | Remove-HnsPolicyList
   894    } Catch { }
   895  
   896    # Add a route from the management NIC to the pod CIDR.
   897    #
   898    # When a packet from a Kubernetes service backend arrives on the destination
   899    # Windows node, the reverse SNAT will be applied and the source address of
   900    # the packet gets replaced from the pod IP to the service VIP. The packet
   901    # will then leave the VM and return back through hairpinning.
   902    #
   903    # When IP alias is enabled, IP forwarding is disabled for anti-spoofing;
   904    # the packet with the service VIP will get blocked and be lost. With this
   905    # route, the packet will be routed to the pod subnetwork, and not leave the
   906    # VM.
   907    $mgmt_net_adapter = Get_MgmtNetAdapter
   908    New-NetRoute `
   909        -ErrorAction Ignore `
   910        -InterfaceAlias ${mgmt_net_adapter}.ifAlias `
   911        -DestinationPrefix ${env:POD_CIDR} `
   912        -NextHop "0.0.0.0" `
   913        -Verbose
   914  
   915    if ($created_hns_network) {
   916      # There is an HNS bug where the route to the GCE metadata server will be
   917      # removed when the HNS network is created:
   918      # https://github.com/Microsoft/hcsshim/issues/299#issuecomment-425491610.
   919      # The behavior here is very unpredictable: the route may only be removed
   920      # after some delay, or it may appear to be removed then you'll add it back
   921      # but then it will be removed once again. So, we first wait a long
   922      # unfortunate amount of time to ensure that things have quiesced, then we
   923      # wait until we're sure the route is really gone before re-adding it again.
   924      Log-Output "Waiting 45 seconds for host network state to quiesce"
   925      Start-Sleep 45
   926      WaitFor_GceMetadataServerRouteToBeRemoved
   927      Log-Output "Re-adding the GCE metadata server route"
   928      Add_GceMetadataServerRoute
   929    }
   930    Verify_GceMetadataServerRouteIsPresent
   931  
   932    Log-Output "Host network setup complete"
   933  }
   934  
   935  function Configure-GcePdTools {
   936    if (ShouldWrite-File ${env:K8S_DIR}\GetGcePdName.dll) {
   937      MustDownload-File -OutFile ${env:K8S_DIR}\GetGcePdName.dll `
   938        -URLs "https://storage.googleapis.com/gke-release/winnode/config/gce-tools/master/GetGcePdName/GetGcePdName.dll"
   939    }
   940    if (-not (Test-Path $PsHome\profile.ps1)) {
   941      New-Item -path $PsHome\profile.ps1 -type file
   942    }
   943  
   944    Add-Content $PsHome\profile.ps1 `
   945    '$modulePath = "K8S_DIR\GetGcePdName.dll"
   946    Unblock-File $modulePath
   947    Import-Module -Name $modulePath'.replace('K8S_DIR', ${env:K8S_DIR})
   948  }
   949  
   950  # Setup cni network for containerd.
   951  function Prepare-CniNetworking {
   952      Configure_Containerd_CniNetworking
   953  }
   954  
   955  # Obtain the host dns conf and save it to a file so that kubelet/CNI
   956  # can use it to configure dns suffix search list for pods.
   957  # The value of DNS server is ignored right now because the pod will
   958  # always only use cluster DNS service, but for consistency, we still
   959  # parsed them here in the same format as Linux resolv.conf.
   960  # This function must be called after Configure-HostNetworkingService.
   961  function Configure-HostDnsConf {
   962    $net_adapter = Get_MgmtNetAdapter
   963    $server_ips = (Get-DnsClientServerAddress `
   964            -InterfaceAlias ${net_adapter}.Name).ServerAddresses
   965    $search_list = (Get-DnsClient).ConnectionSpecificSuffixSearchList
   966    $conf = ""
   967    ForEach ($ip in $server_ips)  {
   968      $conf = $conf + "nameserver $ip`r`n"
   969    }
   970    $conf = $conf + "search $search_list"
   971    # Do not put hostdns.conf into the CNI config directory so as to
   972    # avoid the container runtime treating it as CNI config.
   973    $hostdns_conf = "${env:CNI_DIR}\hostdns.conf"
   974    New-Item -Force -ItemType file ${hostdns_conf} | Out-Null
   975    Set-Content ${hostdns_conf} $conf
   976    Log-Output "HOST dns conf:`n$(Get-Content -Raw ${hostdns_conf})"
   977  }
   978  
   979  # Fetches the kubelet config from the instance metadata and puts it at
   980  # $env:KUBELET_CONFIG.
   981  function Configure-Kubelet {
   982    if (-not (ShouldWrite-File ${env:KUBELET_CONFIG})) {
   983      return
   984    }
   985  
   986    # The Kubelet config is built by build-kubelet-config() in
   987    # cluster/gce/util.sh, and stored in the metadata server under the
   988    # 'kubelet-config' key.
   989    $kubelet_config = Get-InstanceMetadataAttribute 'kubelet-config'
   990    Set-Content ${env:KUBELET_CONFIG} $kubelet_config
   991    Log-Output "Kubelet config:`n$(Get-Content -Raw ${env:KUBELET_CONFIG})"
   992  }
   993  
   994  # Sets up the kubelet and kube-proxy arguments and starts them as native
   995  # Windows services.
   996  #
   997  # Required ${kube_env} keys:
   998  #   KUBELET_ARGS
   999  #   KUBEPROXY_ARGS
  1000  #   CLUSTER_IP_RANGE
  1001  function Start-WorkerServices {
  1002    # Compute kubelet args
  1003    $kubelet_args_str = ${kube_env}['KUBELET_ARGS']
  1004    $kubelet_args = $kubelet_args_str.Split(" ")
  1005    Log-Output "kubelet_args from metadata: ${kubelet_args}"
  1006  
  1007    # To join GCE instances to AD, we need to shorten their names, as NetBIOS name
  1008    # must be <= 15 characters, and GKE generated names are longer than that.
  1009    # To perform the join in an automated way, it's preferable to apply the rename
  1010    # and domain join in the GCESysprep step. However, after sysprep is complete
  1011    # and the machine restarts, kubelet bootstrapping should not use the shortened
  1012    # computer name, and instead use the instance's name by using --hostname-override,
  1013    # otherwise kubelet and kube-proxy will not be able to run properly.
  1014    $instance_name = "$(Get-InstanceMetadata 'name' | Out-String)"
  1015    $default_kubelet_args = @(`
  1016        "--pod-infra-container-image=${env:INFRA_CONTAINER}",
  1017        "--hostname-override=${instance_name}"
  1018    )
  1019  
  1020    $kubelet_args = ${default_kubelet_args} + ${kubelet_args}
  1021    Log-Output 'Using bootstrap kubeconfig for authentication'
  1022    $kubelet_args = (${kubelet_args} +
  1023                     "--bootstrap-kubeconfig=${env:BOOTSTRAP_KUBECONFIG}")
  1024    Log-Output "Final kubelet_args: ${kubelet_args}"
  1025  
  1026    # Compute kube-proxy args
  1027    $kubeproxy_args_str = ${kube_env}['KUBEPROXY_ARGS']
  1028    $kubeproxy_args = $kubeproxy_args_str.Split(" ")
  1029    Log-Output "kubeproxy_args from metadata: ${kubeproxy_args}"
  1030  
  1031    # kubeproxy is started on Linux nodes using
  1032    # kube-manifests/kubernetes/gci-trusty/kube-proxy.manifest, which is
  1033    # generated by start-kube-proxy in configure-helper.sh and contains e.g.:
  1034    #   kube-proxy --master=https://35.239.84.171
  1035    #   --kubeconfig=/var/lib/kube-proxy/kubeconfig --cluster-cidr=10.64.0.0/14
  1036    #   --oom-score-adj=-998 --v=2
  1037    #   --iptables-sync-period=1m --iptables-min-sync-period=10s
  1038    #   --ipvs-sync-period=1m --ipvs-min-sync-period=10s
  1039    # And also with various volumeMounts and "securityContext: privileged: true".
  1040    $default_kubeproxy_args = @(`
  1041        "--kubeconfig=${env:KUBEPROXY_KUBECONFIG}",
  1042        "--cluster-cidr=$(${kube_env}['CLUSTER_IP_RANGE'])",
  1043        "--hostname-override=${instance_name}"
  1044    )
  1045  
  1046    $kubeproxy_args = ${default_kubeproxy_args} + ${kubeproxy_args}
  1047    Log-Output "Final kubeproxy_args: ${kubeproxy_args}"
  1048  
  1049    # TODO(pjh): kubelet is emitting these messages:
  1050    # I1023 23:44:11.761915    2468 kubelet.go:274] Adding pod path:
  1051    # C:\etc\kubernetes
  1052    # I1023 23:44:11.775601    2468 file.go:68] Watching path
  1053    # "C:\\etc\\kubernetes"
  1054    # ...
  1055    # E1023 23:44:31.794327    2468 file.go:182] Can't process manifest file
  1056    # "C:\\etc\\kubernetes\\hns.psm1": C:\etc\kubernetes\hns.psm1: couldn't parse
  1057    # as pod(yaml: line 10: did not find expected <document start>), please check
  1058    # config file.
  1059    #
  1060    # Figure out how to change the directory that the kubelet monitors for new
  1061    # pod manifests.
  1062  
  1063    # We configure the service to restart on failure, after 10s wait. We reset
  1064    # the restart count to 0 each time, so we re-use our restart/10000 action on
  1065    # each failure. Note it currently restarts even when explicitly stopped, you
  1066    # have to delete the service entry to *really* kill it (e.g. `sc.exe delete
  1067    # kubelet`). See issue #72900.
  1068    if (Get-Process | Where-Object Name -eq "kubelet") {
  1069      Log-Output -Fatal `
  1070          "A kubelet process is already running, don't know what to do"
  1071    }
  1072    Log-Output "Creating kubelet service"
  1073    & sc.exe create kubelet binPath= "${env:NODE_DIR}\kube-log-runner.exe -log-file=${env:LOGS_DIR}\kubelet.log ${env:NODE_DIR}\kubelet.exe ${kubelet_args}" start= demand
  1074    & sc.exe failure kubelet reset= 0 actions= restart/10000
  1075    Log-Output "Starting kubelet service"
  1076    & sc.exe start kubelet
  1077  
  1078    Log-Output "Waiting 10 seconds for kubelet to stabilize"
  1079    Start-Sleep 10
  1080    Write-VerboseServiceInfoToConsole -Service 'kubelet'
  1081  
  1082    if (Get-Process | Where-Object Name -eq "kube-proxy") {
  1083      Log-Output -Fatal `
  1084          "A kube-proxy process is already running, don't know what to do"
  1085    }
  1086    Log-Output "Creating kube-proxy service"
  1087    & sc.exe create kube-proxy binPath= "${env:NODE_DIR}\kube-log-runner.exe -log-file=${env:LOGS_DIR}\kube-proxy.log ${env:NODE_DIR}\kube-proxy.exe ${kubeproxy_args}" start= demand
  1088    & sc.exe failure kube-proxy reset= 0 actions= restart/10000
  1089    Log-Output "Starting kube-proxy service"
  1090    & sc.exe start kube-proxy
  1091    Write-VerboseServiceInfoToConsole -Service 'kube-proxy' -Delay 1
  1092  
  1093    # F1020 23:08:52.000083    9136 server.go:361] unable to load in-cluster
  1094    # configuration, KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT must be
  1095    # defined
  1096    # TODO(pjh): still getting errors like these in kube-proxy log:
  1097    # E1023 04:03:58.143449    4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Endpoints: Get https://35.239.84.171/api/v1/endpoints?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.
  1098    # E1023 04:03:58.150266    4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Service: Get https://35.239.84.171/api/v1/services?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.
  1099    WaitFor_KubeletAndKubeProxyReady
  1100    Verify_GceMetadataServerRouteIsPresent
  1101    Log-Output "Kubernetes components started successfully"
  1102  }
  1103  
  1104  # Stop and unregister both kubelet & kube-proxy services.
  1105  function Unregister-WorkerServices {
  1106    & sc.exe delete kube-proxy
  1107    & sc.exe delete kubelet
  1108  }
  1109  
  1110  # Wait for kubelet and kube-proxy to be ready within 10s.
  1111  function WaitFor_KubeletAndKubeProxyReady {
  1112    $waited = 0
  1113    $timeout = 10
  1114    while (((Get-Service kube-proxy).Status -ne 'Running' -or (Get-Service kubelet).Status -ne 'Running') -and $waited -lt $timeout) {
  1115      Start-Sleep 1
  1116      $waited++
  1117    }
  1118  
  1119    # Timeout occurred
  1120    if ($waited -ge $timeout) {
  1121      Log-Output "$(Get-Service kube* | Out-String)"
  1122      Throw ("Timeout while waiting ${timeout} seconds for kubelet and kube-proxy services to start")
  1123    }
  1124  }
  1125  
  1126  # Runs 'kubectl get nodes'.
  1127  # Runs additional verification commands to ensure node successfully joined cluster
  1128  # and that it connects to the API Server.
  1129  function Verify-WorkerServices {
  1130    $timeout = 12
  1131    $retries = 0
  1132    $retryDelayInSeconds = 5
  1133    
  1134    Log-Output ("Testing node connection to API server...")
  1135    do {
  1136        $retries++
  1137        $nodes_list = & "${env:NODE_DIR}\kubectl.exe" get nodes -o=custom-columns=:.metadata.name -A | Out-String
  1138        $host_status = & "${env:NODE_DIR}\kubectl.exe" get nodes (hostname) -o=custom-columns=:.status.conditions[4].type | Out-String
  1139        Start-Sleep $retryDelayInSeconds
  1140    } while (((-Not $nodes_list) -or (-Not $nodes_list.contains((hostname))) -or (-Not $host_status.contains("Ready")))-and ($retries -le $timeout))
  1141    
  1142    If (-Not $nodes_list){
  1143        Throw ("Node: '$(hostname)' failed to connect to API server")
  1144    
  1145    }ElseIf (-Not $nodes_list.contains((hostname))) {
  1146        Throw ("Node: '$(hostname)' failed to join the cluster; NODES: '`n $($nodes_list)'")
  1147  
  1148    }ELseIf (-Not $host_status.contains("Ready")) {
  1149        Throw ("Node: '$(hostname)' is not in Ready state")
  1150    }
  1151    
  1152    Log-Output ("Node: $(hostname) successfully joined cluster `n NODES: `n $($nodes_list)")
  1153    Verify_GceMetadataServerRouteIsPresent
  1154  
  1155  }
  1156  
  1157  # Downloads the Windows crictl package and installs its contents (e.g.
  1158  # crictl.exe) in $env:NODE_DIR.
  1159  function DownloadAndInstall-Crictl {
  1160    if (-not (ShouldWrite-File ${env:NODE_DIR}\crictl.exe)) {
  1161      return
  1162    }
  1163    $CRI_TOOLS_GCS_BUCKET = 'k8s-artifacts-cri-tools'
  1164    $url = ('https://storage.googleapis.com/' + $CRI_TOOLS_GCS_BUCKET +
  1165            '/release/' + $CRICTL_VERSION + '/crictl-' + $CRICTL_VERSION +
  1166            '-windows-amd64.tar.gz')
  1167    MustDownload-File `
  1168        -URLs $url `
  1169        -OutFile ${env:NODE_DIR}\crictl.tar.gz `
  1170        -Hash $CRICTL_SHA256 `
  1171        -Algorithm SHA256
  1172    tar xzvf ${env:NODE_DIR}\crictl.tar.gz -C ${env:NODE_DIR}
  1173  }
  1174  
  1175  # Sets crictl configuration values.
  1176  function Configure-Crictl {
  1177    if (${env:CONTAINER_RUNTIME_ENDPOINT}) {
  1178      & "${env:NODE_DIR}\crictl.exe" config runtime-endpoint `
  1179          ${env:CONTAINER_RUNTIME_ENDPOINT}
  1180    }
  1181  }
  1182  
  1183  # Pulls the infra/pause container image onto the node so that it will be
  1184  # immediately available when the kubelet tries to run pods.
  1185  # TODO(pjh): downloading the container container image may take a few minutes;
  1186  # figure out how to run this in the background while perform the rest of the
  1187  # node startup steps!
  1188  # Pull-InfraContainer must be called AFTER Verify-WorkerServices.
  1189  function Pull-InfraContainer {
  1190    $name, $label = ${env:INFRA_CONTAINER} -split ':',2
  1191    if (-not ("$(& crictl images)" -match "$name.*$label")) {
  1192      & crictl pull ${env:INFRA_CONTAINER}
  1193      if (!$?) {
  1194        throw "Error running 'crictl pull ${env:INFRA_CONTAINER}'"
  1195      }
  1196    }
  1197    $inspect = "$(& crictl inspecti ${env:INFRA_CONTAINER} | Out-String)"
  1198    Log-Output "Infra/pause container:`n$inspect"
  1199  }
  1200  
  1201  # Setup the containerd on the node.
  1202  function Setup-ContainerRuntime {
  1203    Install-Pigz
  1204    Install_Containerd
  1205    Configure_Containerd
  1206    Start_Containerd
  1207  }
  1208  
  1209  function Test-ContainersFeatureInstalled {
  1210    return (Get-WindowsFeature Containers).Installed
  1211  }
  1212  
  1213  # After this function returns, the computer must be restarted to complete
  1214  # the installation!
  1215  function Install-ContainersFeature {
  1216    Log-Output "Installing Windows 'Containers' feature"
  1217    Install-WindowsFeature Containers
  1218  }
  1219  
  1220  # Verifies if Hyper-V should be enabled in the node
  1221  function Test-ShouldEnableHyperVFeature {
  1222    return "${env:WINDOWS_ENABLE_HYPERV}" -eq "true"
  1223  }
  1224  
  1225  # Check if Hyper-V feature is enabled
  1226  function Test-HyperVFeatureEnabled {
  1227    return ((Get-WindowsOptionalFeature -Online -FeatureName Microsoft-Hyper-V).State -eq 'Enabled')
  1228  }
  1229  
  1230  # After this function returns, the computer must be restarted to complete
  1231  # the installation!
  1232  function Enable-HyperVFeature {
  1233    Log-Output "Enabling Windows 'HyperV' feature"
  1234    Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Hyper-V -All -NoRestart
  1235    Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Hyper-V-Management-PowerShell -All -NoRestart
  1236  }
  1237  
  1238  # Configures the TCP/IP parameters to be in sync with the GCP recommendation.
  1239  # Not setting these values correctly can cause network issues for connections
  1240  # that live longer than 10 minutes.
  1241  # See: https://cloud.google.com/compute/docs/troubleshooting/general-tips#idle-connections
  1242  function Set-WindowsTCPParameters {
  1243    Set-ItemProperty -Force -Confirm:$false -Path `
  1244      'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters' `
  1245      -Name 'KeepAliveInterval' -Type Dword -Value 1000
  1246    Set-ItemProperty -Force -Confirm:$false `
  1247      -Path 'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters' `
  1248      -Name 'KeepAliveTime' -Type Dword -Value 60000
  1249    Set-ItemProperty -Force -Confirm:$false `
  1250      -Path 'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters' `
  1251      -Name 'TcpMaxDataRetransmissions' -Type Dword -Value 10
  1252  
  1253    Log-Output 'TCP/IP Parameters'
  1254    Get-ItemProperty -Path 'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters'
  1255  }
  1256  
  1257  # Writes a CNI config file under $env:CNI_CONFIG_DIR for containerd.
  1258  #
  1259  # Prerequisites:
  1260  #   $env:POD_CIDR is set (by Set-PodCidr).
  1261  #   The "management" interface exists (Configure-HostNetworkingService).
  1262  #   The HNS network for pod networking has been configured
  1263  #     (Configure-HostNetworkingService).
  1264  #   Containerd is installed (Install_Containerd).
  1265  #
  1266  # Required ${kube_env} keys:
  1267  #   DNS_SERVER_IP
  1268  #   DNS_DOMAIN
  1269  #   SERVICE_CLUSTER_IP_RANGE
  1270  function Configure_Containerd_CniNetworking {
  1271    $l2bridge_conf = "${env:CNI_CONFIG_DIR}\l2bridge.conf"
  1272    if (-not (ShouldWrite-File ${l2bridge_conf})) {
  1273      return
  1274    }
  1275  
  1276    $mgmt_ip = (Get_MgmtNetAdapter |
  1277                Get-NetIPAddress -AddressFamily IPv4).IPAddress
  1278  
  1279    $pod_gateway = Get_Endpoint_Gateway_From_CIDR(${env:POD_CIDR})
  1280  
  1281    # Explanation of the CNI config values:
  1282    #   POD_CIDR: the pod CIDR assigned to this node.
  1283    #   POD_GATEWAY: the gateway IP.
  1284    #   MGMT_IP: the IP address assigned to the node's primary network interface
  1285    #     (i.e. the internal IP of the GCE VM).
  1286    #   SERVICE_CIDR: the CIDR used for kubernetes services.
  1287    #   DNS_SERVER_IP: the cluster's DNS server IP address.
  1288    #   DNS_DOMAIN: the cluster's DNS domain, e.g. "cluster.local".
  1289    #
  1290    # OutBoundNAT ExceptionList: No SNAT for CIDRs in the list, the same as default GKE non-masquerade destination ranges listed at https://cloud.google.com/kubernetes-engine/docs/how-to/ip-masquerade-agent#default-non-masq-dests
  1291  
  1292    New-Item -Force -ItemType file ${l2bridge_conf} | Out-Null
  1293    Set-Content ${l2bridge_conf} `
  1294  '{
  1295    "cniVersion":  "0.2.0",
  1296    "name":  "l2bridge",
  1297    "type":  "sdnbridge",
  1298    "master": "Ethernet",
  1299    "capabilities":  {
  1300      "portMappings":  true,
  1301      "dns": true
  1302    },
  1303    "ipam":  {
  1304      "subnet": "POD_CIDR",
  1305      "routes": [
  1306        {
  1307          "GW": "POD_GATEWAY"
  1308        }
  1309      ]
  1310    },
  1311    "dns":  {
  1312      "Nameservers":  [
  1313        "DNS_SERVER_IP"
  1314      ],
  1315      "Search": [
  1316        "DNS_DOMAIN"
  1317      ]
  1318    },
  1319    "AdditionalArgs": [
  1320      {
  1321        "Name":  "EndpointPolicy",
  1322        "Value":  {
  1323          "Type":  "OutBoundNAT",
  1324          "Settings": {
  1325            "Exceptions":  [
  1326              "169.254.0.0/16",
  1327              "10.0.0.0/8",
  1328              "172.16.0.0/12",
  1329              "192.168.0.0/16",
  1330              "100.64.0.0/10",
  1331              "192.0.0.0/24",
  1332              "192.0.2.0/24",
  1333              "192.88.99.0/24",
  1334              "198.18.0.0/15",
  1335              "198.51.100.0/24",
  1336              "203.0.113.0/24",
  1337              "240.0.0.0/4"
  1338            ]
  1339          }
  1340        }
  1341      },
  1342      {
  1343        "Name":  "EndpointPolicy",
  1344        "Value":  {
  1345          "Type":  "SDNRoute",
  1346          "Settings": {
  1347            "DestinationPrefix":  "SERVICE_CIDR",
  1348            "NeedEncap":  true
  1349          }
  1350        }
  1351      },
  1352      {
  1353        "Name":  "EndpointPolicy",
  1354        "Value":  {
  1355          "Type":  "SDNRoute",
  1356          "Settings": {
  1357            "DestinationPrefix":  "MGMT_IP/32",
  1358            "NeedEncap":  true
  1359          }
  1360        }
  1361      }
  1362    ]
  1363  }'.replace('POD_CIDR', ${env:POD_CIDR}).`
  1364    replace('POD_GATEWAY', ${pod_gateway}).`
  1365    replace('DNS_SERVER_IP', ${kube_env}['DNS_SERVER_IP']).`
  1366    replace('DNS_DOMAIN', ${kube_env}['DNS_DOMAIN']).`
  1367    replace('MGMT_IP', ${mgmt_ip}).`
  1368    replace('SERVICE_CIDR', ${kube_env}['SERVICE_CLUSTER_IP_RANGE'])
  1369  
  1370    Log-Output "containerd CNI config:`n$(Get-Content -Raw ${l2bridge_conf})"
  1371  }
  1372  
  1373  # Download and install containerd and CNI binaries into $env:NODE_DIR.
  1374  function Install_Containerd {
  1375    # Assume that presence of containerd.exe indicates that all containerd
  1376    # binaries were already previously downloaded to this node.
  1377    if (-not (ShouldWrite-File ${env:NODE_DIR}\containerd.exe)) {
  1378      return
  1379    }
  1380  
  1381    $tmp_dir = 'C:\containerd_tmp'
  1382    New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null
  1383  
  1384    # TODO(ibrahimab) Change this to a gcs bucket with CI maintained and accessible by community.
  1385    $version = '1.6.2'
  1386    $tar_url = ("https://github.com/containerd/containerd/releases/download/v${version}/" +
  1387                "cri-containerd-cni-${version}-windows-amd64.tar.gz")
  1388    $sha_url = $tar_url + ".sha256sum"
  1389    MustDownload-File -URLs $sha_url -OutFile $tmp_dir\sha256sum
  1390    $sha = $(Get-Content $tmp_dir\sha256sum).Split(" ")[0].ToUpper()
  1391  
  1392    MustDownload-File `
  1393        -URLs $tar_url `
  1394        -OutFile $tmp_dir\containerd.tar.gz `
  1395        -Hash $sha `
  1396        -Algorithm SHA256
  1397  
  1398    tar xzvf $tmp_dir\containerd.tar.gz -C $tmp_dir
  1399    Move-Item -Force $tmp_dir\cni\bin\*.exe "${env:CNI_DIR}\"
  1400    Move-Item -Force $tmp_dir\*.exe "${env:NODE_DIR}\"
  1401    Remove-Item -Force -Recurse $tmp_dir
  1402  
  1403    # Exclusion for Defender.
  1404    Add-MpPreference -ExclusionProcess "${env:NODE_DIR}\containerd.exe"
  1405  }
  1406  
  1407  # Lookup the path of containerd config if exists, else returns a default.
  1408  function Get_Containerd_ConfigPath {
  1409    $service = Get-WMIObject -Class Win32_Service -Filter  "Name='containerd'"
  1410    if (!($service -eq $null) -and
  1411        $service.PathName -match ".*\s--config\s*(\S+).*" -and
  1412        $matches.Count -eq 2) {
  1413      return $matches[1]
  1414    } else {
  1415      return 'C:\Program Files\containerd\config.toml'
  1416    }
  1417  }
  1418  
  1419  # Generates the containerd config.toml file.
  1420  function Configure_Containerd {
  1421    $config_path = Get_Containerd_ConfigPath
  1422    $config_dir = [System.IO.Path]::GetDirectoryName($config_path)
  1423    New-Item $config_dir -ItemType 'directory' -Force | Out-Null
  1424    Set-Content ${config_path} @"
  1425  [plugins.scheduler]
  1426    schedule_delay = '0s'
  1427    startup_delay = '0s'
  1428  [plugins.cri]
  1429    sandbox_image = 'INFRA_CONTAINER_IMAGE'
  1430  [plugins.cri.containerd]
  1431    snapshotter = 'windows'
  1432    default_runtime_name = 'runhcs-wcow-process'
  1433    disable_snapshot_annotations = true
  1434    discard_unpacked_layers = true
  1435  [plugins.cri.cni]
  1436    bin_dir = 'CNI_BIN_DIR'
  1437    conf_dir = 'CNI_CONF_DIR'
  1438  "@.replace('INFRA_CONTAINER_IMAGE', ${env:INFRA_CONTAINER}).`
  1439      replace('CNI_BIN_DIR', "${env:CNI_DIR}").`
  1440      replace('CNI_CONF_DIR', "${env:CNI_CONFIG_DIR}")
  1441  }
  1442  
  1443  # Register if needed and start containerd service.
  1444  function Start_Containerd {
  1445    # Do the registration only if the containerd service does not exist.
  1446    if ((Get-WMIObject -Class Win32_Service -Filter  "Name='containerd'") -eq $null) {
  1447      Log-Output "Creating containerd service"
  1448      & containerd.exe --register-service --log-file "${env:LOGS_DIR}/containerd.log"
  1449    }
  1450  
  1451    Log-Output "Starting containerd service"
  1452    Restart-Service containerd
  1453  }
  1454  
  1455  # Pigz Resources
  1456  $PIGZ_ROOT = 'C:\pigz'
  1457  $PIGZ_VERSION = '2.3.1'
  1458  $PIGZ_TAR_URL = "https://storage.googleapis.com/gke-release/winnode/pigz/prod/gke_windows/pigz/release/5/20201104-134221/pigz-$PIGZ_VERSION.zip"
  1459  $PIGZ_TAR_HASH = '5a6f8f5530acc85ea51797f58c1409e5af6b69e55da243ffc608784cf14fec0cd16f74cc61c564d69e1a267750aecfc1e4c53b5219ff5f893b42a7576306f34c'
  1460  
  1461  # Install Pigz (https://github.com/madler/pigz) into Windows for improved image
  1462  # extraction performance.
  1463  function Install-Pigz {
  1464    if ("${env:WINDOWS_ENABLE_PIGZ}" -eq "true") {
  1465      if (-not (Test-Path $PIGZ_ROOT)) {
  1466        Log-Output "Installing Pigz $PIGZ_VERSION"
  1467        New-Item -Path $PIGZ_ROOT -ItemType Directory
  1468        MustDownload-File `
  1469          -Url $PIGZ_TAR_URL `
  1470          -OutFile "$PIGZ_ROOT\pigz-$PIGZ_VERSION.zip" `
  1471          -Hash $PIGZ_TAR_HASH `
  1472          -Algorithm SHA512
  1473        Expand-Archive -Path "$PIGZ_ROOT\pigz-$PIGZ_VERSION.zip" `
  1474          -DestinationPath $PIGZ_ROOT
  1475        Remove-Item -Path "$PIGZ_ROOT\pigz-$PIGZ_VERSION.zip"
  1476        # Containerd search for unpigz.exe on the first container image
  1477        # pull request after the service is started. If unpigz.exe is in the
  1478        # Windows path it'll use it instead of the default unzipper.
  1479        # See: https://github.com/containerd/containerd/issues/1896
  1480        Add-MachineEnvironmentPath -Path $PIGZ_ROOT
  1481        # Add process exclusion for Windows Defender to boost performance.
  1482        Add-MpPreference -ExclusionProcess "$PIGZ_ROOT\unpigz.exe"
  1483        Log-Output "Installed Pigz $PIGZ_VERSION"
  1484      } else {
  1485        Log-Output "Pigz already installed."
  1486      }
  1487    }
  1488  }
  1489  
  1490  # Node Problem Detector Resources
  1491  $NPD_SERVICE = "node-problem-detector"
  1492  $DEFAULT_NPD_VERSION = '0.8.10-gke0.1'
  1493  $DEFAULT_NPD_RELEASE_PATH = 'https://storage.googleapis.com/gke-release/winnode'
  1494  $DEFAULT_NPD_HASH = '97ddfe3544da9e02a1cfb55d24f329eb29d606fca7fbbf800415d5de9dbc29a00563f8e0d1919595c8e316fd989d45b09b13c07be528841fc5fd37e21d016a2d'
  1495  
  1496  # Install Node Problem Detector (NPD).
  1497  # NPD analyzes the host for problems that can disrupt workloads.
  1498  # https://github.com/kubernetes/node-problem-detector
  1499  function DownloadAndInstall-NodeProblemDetector {
  1500    if ("${env:ENABLE_NODE_PROBLEM_DETECTOR}" -eq "standalone") {
  1501      if (ShouldWrite-File "${env:NODE_DIR}\node-problem-detector.exe") {
  1502        $npd_version = $DEFAULT_NPD_VERSION
  1503        $npd_hash = $DEFAULT_NPD_HASH
  1504        if (-not [string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_VERSION'])) {
  1505          $npd_version = ${kube_env}['NODE_PROBLEM_DETECTOR_VERSION']
  1506          $npd_hash = ${kube_env}['NODE_PROBLEM_DETECTOR_TAR_HASH']
  1507        }
  1508        $npd_release_path = $DEFAULT_NPD_RELEASE_PATH
  1509        if (-not [string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_RELEASE_PATH'])) {
  1510          $npd_release_path = ${kube_env}['NODE_PROBLEM_DETECTOR_RELEASE_PATH']
  1511        }
  1512  
  1513        $npd_tar = "node-problem-detector-v${npd_version}-windows_amd64.tar.gz"
  1514  
  1515        Log-Output "Downloading ${npd_tar}."
  1516  
  1517        $npd_dir = "${env:K8S_DIR}\node-problem-detector"
  1518        New-Item -Path $npd_dir -ItemType Directory -Force -Confirm:$false
  1519  
  1520        MustDownload-File `
  1521            -URLs "${npd_release_path}/node-problem-detector/${npd_tar}" `
  1522            -Hash $npd_hash `
  1523            -Algorithm SHA512 `
  1524            -OutFile "${npd_dir}\${npd_tar}"
  1525  
  1526        tar xzvf "${npd_dir}\${npd_tar}" -C $npd_dir
  1527        Move-Item "${npd_dir}\bin\*" "${env:NODE_DIR}\" -Force -Confirm:$false
  1528        Remove-Item "${npd_dir}\bin" -Force -Confirm:$false
  1529        Remove-Item "${npd_dir}\${npd_tar}" -Force -Confirm:$false
  1530      }
  1531      else {
  1532          Log-Output "Node Problem Detector already installed."
  1533      }
  1534    }
  1535  }
  1536  
  1537  # Creates the node-problem-detector user kubeconfig file at
  1538  # $env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE (if defined).
  1539  #
  1540  # Create-NodePki() must be called first.
  1541  #
  1542  # Required ${kube_env} keys:
  1543  #   CA_CERT
  1544  #   NODE_PROBLEM_DETECTOR_TOKEN
  1545  function Create-NodeProblemDetectorKubeConfig {
  1546    if (-not [string]::IsNullOrEmpty(${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE})) {
  1547      Create-Kubeconfig -Name 'node-problem-detector' `
  1548        -Path ${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE} `
  1549        -Token ${kube_env}['NODE_PROBLEM_DETECTOR_TOKEN']
  1550    }
  1551  }
  1552  
  1553  # Configures NPD to run with the bundled monitor configs and report against the Kubernetes api server.
  1554  function Configure-NodeProblemDetector {
  1555    $npd_bin = "${env:NODE_DIR}\node-problem-detector.exe"
  1556    if ("${env:ENABLE_NODE_PROBLEM_DETECTOR}" -eq "standalone" -and (Test-Path $npd_bin)) {
  1557      $npd_svc = Get-Service -Name $NPD_SERVICE -ErrorAction SilentlyContinue
  1558      if ($npd_svc -eq $null) {
  1559        $npd_dir = "${env:K8S_DIR}\node-problem-detector"
  1560        $npd_logs_dir = "${env:LOGS_DIR}\node-problem-detector"
  1561  
  1562        New-Item -Path $npd_logs_dir -Type Directory -Force -Confirm:$false
  1563  
  1564        $flags = ''
  1565        if ([string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS'])) {
  1566          $system_log_monitors = @()
  1567          $system_stats_monitors = @()
  1568          $custom_plugin_monitors = @()
  1569  
  1570          # Custom Plugin Monitors
  1571          $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-kubelet.json")
  1572          $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-kubeproxy.json")
  1573          $custom_plugin_monitors += @("${npd_dir}\config\windows-defender-monitor.json")
  1574  
  1575          # System Stats Monitors
  1576          $system_stats_monitors += @("${npd_dir}\config\windows-system-stats-monitor.json")
  1577  
  1578          # NPD Configuration for CRI monitor
  1579          $system_log_monitors += @("${npd_dir}\config\windows-containerd-monitor-filelog.json")
  1580          $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-containerd.json")
  1581  
  1582          $flags="--v=2 --port=20256 --log_dir=${npd_logs_dir}"
  1583          if ($system_log_monitors.count -gt 0) {
  1584            $flags+=" --config.system-log-monitor={0}" -f ($system_log_monitors -join ",")
  1585          }
  1586          if ($system_stats_monitors.count -gt 0) {
  1587            $flags+=" --config.system-stats-monitor={0}" -f ($system_stats_monitors -join ",")
  1588          }
  1589          if ($custom_plugin_monitors.count -gt 0) {
  1590            $flags+=" --config.custom-plugin-monitor={0}" -f ($custom_plugin_monitors -join ",")
  1591          }
  1592        }
  1593        else {
  1594          $flags = ${kube_env}['NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS']
  1595        }
  1596        $kubernetes_master_name = ${kube_env}['KUBERNETES_MASTER_NAME']
  1597        $flags = "${flags} --apiserver-override=`"https://${kubernetes_master_name}?inClusterConfig=false&auth=${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE}`""
  1598  
  1599        Log-Output "Creating service: ${NPD_SERVICE}"
  1600        Log-Output "${npd_bin} ${flags}"
  1601        sc.exe create $NPD_SERVICE binpath= "${npd_bin} ${flags}" displayName= "Node Problem Detector"
  1602        sc.exe failure $NPD_SERVICE reset= 30 actions= restart/5000
  1603        sc.exe start $NPD_SERVICE
  1604  
  1605        Write-VerboseServiceInfoToConsole -Service $NPD_SERVICE
  1606      }
  1607      else {
  1608        Log-Output "${NPD_SERVICE} already configured."
  1609      }
  1610    }
  1611  }
  1612  
  1613  # TODO(pjh): move the logging agent code below into a separate
  1614  # module; it was put here temporarily to avoid disrupting the file layout in
  1615  # the K8s release machinery.
  1616  $LOGGINGAGENT_VERSION = '1.8.10'
  1617  $LOGGINGAGENT_ROOT = 'C:\fluent-bit'
  1618  $LOGGINGAGENT_SERVICE = 'fluent-bit'
  1619  $LOGGINGAGENT_CMDLINE = '*fluent-bit.exe*'
  1620  
  1621  $LOGGINGEXPORTER_VERSION = 'v0.17.0'
  1622  $LOGGINGEXPORTER_ROOT = 'C:\flb-exporter'
  1623  $LOGGINGEXPORTER_SERVICE = 'flb-exporter'
  1624  $LOGGINGEXPORTER_CMDLINE = '*flb-exporter.exe*'
  1625  $LOGGINGEXPORTER_HASH = 'c808c9645d84b06b89932bd707d51a9d1d0b451b5a702a5f9b2b4462c8be6502'
  1626  
  1627  # Restart Logging agent or starts it if it is not currently running
  1628  function Restart-LoggingAgent {
  1629    if (IsStackdriverAgentInstalled) {
  1630        Restart-StackdriverAgent
  1631        return
  1632    }
  1633  
  1634     Restart-LogService $LOGGINGEXPORTER_SERVICE $LOGGINGEXPORTER_CMDLINE
  1635     Restart-LogService $LOGGINGAGENT_SERVICE $LOGGINGAGENT_CMDLINE
  1636  }
  1637  
  1638  # Restarts the service, or starts it if it is not currently
  1639  # running. A standard `Restart-Service` may fail because
  1640  # the process is sometimes unstoppable, so this function works around it
  1641  # by killing the processes.
  1642  function Restart-LogService([string]$service, [string]$cmdline) {
  1643    Stop-Service -NoWait -ErrorAction Ignore $service
  1644  
  1645    # Wait (if necessary) for service to stop.
  1646    $timeout = 10
  1647    $stopped = (Get-service $service).Status -eq 'Stopped'
  1648    for ($i = 0; $i -lt $timeout -and !($stopped); $i++) {
  1649        Start-Sleep 1
  1650        $stopped = (Get-service $service).Status -eq 'Stopped'
  1651    }
  1652  
  1653    if ((Get-service $service).Status -ne 'Stopped') {
  1654      # Force kill the processes.
  1655      Stop-Process -Force -PassThru -Id (Get-WmiObject win32_process |
  1656        Where CommandLine -Like $cmdline).ProcessId
  1657  
  1658      # Wait until process has stopped.
  1659      $waited = 0
  1660      $log_period = 10
  1661      $timeout = 60
  1662      while ((Get-service $service).Status -ne 'Stopped' -and $waited -lt $timeout) {
  1663        Start-Sleep 1
  1664        $waited++
  1665  
  1666        if ($waited % $log_period -eq 0) {
  1667          Log-Output "Waiting for ${service} service to stop"
  1668        }
  1669      }
  1670  
  1671      # Timeout occurred
  1672      if ($waited -ge $timeout) {
  1673        Throw ("Timeout while waiting for ${service} service to stop")
  1674      }
  1675    }
  1676  
  1677    Start-Service $service
  1678  }
  1679  
  1680  # Check whether the logging agent is installed by whether it's registered as service
  1681  function IsLoggingAgentInstalled {
  1682    $logging_status = (Get-Service $LOGGINGAGENT_SERVICE -ErrorAction Ignore).Status
  1683    return -not [string]::IsNullOrEmpty($logging_status)
  1684  }
  1685  
  1686  # Installs the logging agent according to https://docs.fluentbit.io/manual/installation/windows#
  1687  # Also installs fluent bit stackdriver exporter
  1688  function Install-LoggingAgent {
  1689    if (IsStackdriverAgentInstalled) {
  1690      # Remove the existing storage.json file if it exists. This is a workaround
  1691      # for the bug where the logging agent cannot start up if the file is
  1692      # corrupted.
  1693      Remove-Item `
  1694        -Force `
  1695        -ErrorAction Ignore `
  1696        ("$STACKDRIVER_ROOT\LoggingAgent\Main\pos\winevtlog.pos\worker0\" +
  1697         "storage.json")
  1698      Log-Output ("Skip: Stackdriver logging agent is already installed")
  1699      return
  1700    }
  1701  
  1702    if (IsLoggingAgentInstalled) {
  1703      # Note: we should reinstall the agent if $REDO_STEPS is true
  1704      # here, but we don't know how to run the installer without it prompting
  1705      # when logging agent is already installed. We dumped the strings in the
  1706      # installer binary and searched for flags to do this but found nothing. Oh
  1707      # well.
  1708      Log-Output ("Skip: Fluentbit logging agent is already installed")
  1709      return
  1710    }
  1711  
  1712    DownloadAndInstall-LoggingAgents
  1713    Create-LoggingAgentServices
  1714  }
  1715  
  1716  function DownloadAndInstall-LoggingAgents {
  1717    # Install Logging agent if not present
  1718    if (ShouldWrite-File $LOGGINGAGENT_ROOT\td-agent-bit-${LOGGINGAGENT_VERSION}-win64) {
  1719        $install_dir = 'C:\flb-installers'
  1720        $url = ("https://storage.googleapis.com/gke-release/winnode/fluentbit/td-agent-bit-${LOGGINGAGENT_VERSION}-win64.zip")
  1721  
  1722        Log-Output 'Downloading Logging agent'
  1723        New-Item $install_dir -ItemType 'directory' -Force | Out-Null
  1724        MustDownload-File -OutFile $install_dir\td.zip -URLs $url
  1725  
  1726        cd $install_dir
  1727        Log-Output 'Extracting Logging agent'
  1728        Expand-Archive td.zip
  1729        mv .\td\td-agent-bit-${LOGGINGAGENT_VERSION}-win64\ $LOGGINGAGENT_ROOT
  1730        cd C:\
  1731        Remove-Item -Force -Recurse $install_dir
  1732    }
  1733  
  1734    # Download Logging exporter if needed
  1735    if (ShouldWrite-File $LOGGINGEXPORTER_ROOT\flb-exporter.exe) {
  1736        $url = ("https://storage.googleapis.com/gke-release/winnode/fluentbit-exporter/${LOGGINGEXPORTER_VERSION}/flb-exporter-${LOGGINGEXPORTER_VERSION}.exe")
  1737        Log-Output 'Downloading logging exporter'
  1738        New-Item $LOGGINGEXPORTER_ROOT -ItemType 'directory' -Force | Out-Null
  1739        MustDownload-File `
  1740            -OutFile $LOGGINGEXPORTER_ROOT\flb-exporter.exe `
  1741            -URLs $url `
  1742            -Hash $LOGGINGEXPORTER_HASH `
  1743            -Algorithm SHA256
  1744    }
  1745  }
  1746  
  1747  function Create-LoggingAgentServices {
  1748    cd $LOGGINGAGENT_ROOT
  1749  
  1750    Log-Output "Creating service: ${LOGGINGAGENT_SERVICE}"
  1751    sc.exe create $LOGGINGAGENT_SERVICE binpath= "${LOGGINGAGENT_ROOT}\bin\fluent-bit.exe -c \fluent-bit\conf\fluent-bit.conf"
  1752    sc.exe failure $LOGGINGAGENT_SERVICE reset= 30 actions= restart/5000
  1753    Write-VerboseServiceInfoToConsole -Service $LOGGINGAGENT_SERVICE
  1754  
  1755    Log-Output "Creating service: ${LOGGINGEXPORTER_SERVICE}"
  1756    sc.exe create  $LOGGINGEXPORTER_SERVICE  binpath= "${LOGGINGEXPORTER_ROOT}\flb-exporter.exe --kubernetes-separator=_ --stackdriver-resource-model=k8s --enable-pod-label-discovery --logtostderr --winsvc  --pod-label-dot-replacement=_"
  1757    sc.exe failure $LOGGINGEXPORTER_SERVICE reset= 30 actions= restart/5000
  1758    Write-VerboseServiceInfoToConsole -Service $LOGGINGEXPORTER_SERVICE
  1759  }
  1760  
  1761  # Writes the logging configuration file for Logging agent. Restart-LoggingAgent
  1762  # should then be called to pick up the new configuration.
  1763  function Configure-LoggingAgent {
  1764    if (IsStackdriverAgentInstalled) {
  1765        Configure-StackdriverAgent
  1766        return
  1767    }
  1768  
  1769    $fluentbit_config_file = "$LOGGINGAGENT_ROOT\conf\fluent-bit.conf"
  1770    $FLUENTBIT_CONFIG | Out-File -FilePath $fluentbit_config_file -Encoding ASCII
  1771    Log-Output "Wrote logging config to $fluentbit_config_file"
  1772  
  1773    $fluentbit_parser_file = "$LOGGINGAGENT_ROOT\conf\parsers.conf"
  1774    $PARSERS_CONFIG | Out-File -FilePath $fluentbit_parser_file -Encoding ASCII
  1775  
  1776    # Create directory for all the log position files.
  1777    New-Item -Type Directory -Path "/var/run/google-fluentbit/pos-files/" -Force | Out-Null
  1778  
  1779    Log-Output "Wrote logging config to $fluentbit_parser_file"
  1780  }
  1781  
  1782  # Fluentbit main config file
  1783  $FLUENTBIT_CONFIG = @'
  1784  [SERVICE]
  1785      Flush         5
  1786      Grace         120
  1787      Log_Level     info
  1788      Log_File      /var/log/fluentbit.log
  1789      Daemon        off
  1790      Parsers_File  parsers.conf
  1791      HTTP_Server   off
  1792      HTTP_Listen   0.0.0.0
  1793      HTTP_PORT     2020
  1794      plugins_file plugins.conf
  1795  
  1796      # Storage
  1797      # =======
  1798      # Fluent Bit can use memory and filesystem buffering based mechanisms
  1799      #
  1800      # - https://docs.fluentbit.io/manual/administration/buffering-and-storage
  1801      #
  1802      # storage metrics
  1803      # ---------------
  1804      # publish storage pipeline metrics in '/api/v1/storage'. The metrics are
  1805      # exported only if the 'http_server' option is enabled.
  1806      #
  1807      # storage.metrics on
  1808  
  1809      # storage.path
  1810      # ------------
  1811      # absolute file system path to store filesystem data buffers (chunks).
  1812      #
  1813      # storage.path /tmp/storage
  1814  
  1815      # storage.sync
  1816      # ------------
  1817      # configure the synchronization mode used to store the data into the
  1818      # filesystem. It can take the values normal or full.
  1819      #
  1820      # storage.sync normal
  1821  
  1822      # storage.checksum
  1823      # ----------------
  1824      # enable the data integrity check when writing and reading data from the
  1825      # filesystem. The storage layer uses the CRC32 algorithm.
  1826      #
  1827      # storage.checksum off
  1828  
  1829      # storage.backlog.mem_limit
  1830      # -------------------------
  1831      # if storage.path is set, Fluent Bit will look for data chunks that were
  1832      # not delivered and are still in the storage layer, these are called
  1833      # backlog data. This option configure a hint of maximum value of memory
  1834      # to use when processing these records.
  1835      #
  1836      # storage.backlog.mem_limit 5M
  1837  
  1838  [INPUT]
  1839      Name         winlog
  1840      Interval_Sec 2
  1841      # Channels Setup,Windows PowerShell
  1842      Channels     application,system,security
  1843      Tag          winevt.raw
  1844      DB           /var/run/google-fluentbit/pos-files/winlog.db
  1845  
  1846  # Json Log Example:
  1847  # {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"}
  1848  [INPUT]
  1849      Name             tail
  1850      Alias            kube_containers
  1851      Tag              kube_<namespace_name>_<pod_name>_<container_name>
  1852      Tag_Regex        (?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-
  1853      Mem_Buf_Limit    5MB
  1854      Skip_Long_Lines  On
  1855      Refresh_Interval 5
  1856      Path             C:\var\log\containers\*.log
  1857      DB               /var/run/google-fluentbit/pos-files/flb_kube.db
  1858  
  1859  [FILTER]
  1860      Name         parser
  1861      Match        kube_*
  1862      Key_Name     log
  1863      Reserve_Data True
  1864      Parser       docker
  1865      Parser       containerd
  1866  
  1867  # Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
  1868  # Example:
  1869  # I0716 02:08:55.559351    3356 log_spam.go:42] Command line arguments:
  1870  [INPUT]
  1871      Name             tail
  1872      Alias            node-problem-detector
  1873      Tag              node-problem-detector
  1874      Mem_Buf_Limit    5MB
  1875      Skip_Long_Lines  On
  1876      Refresh_Interval 5
  1877      Path             C:\etc\kubernetes\logs\node-problem-detector\*.log.INFO*
  1878      DB               /var/run/google-fluentbit/pos-files/node-problem-detector.db
  1879      Multiline        On
  1880      Parser_Firstline glog
  1881  
  1882  # Example:
  1883  # I0928 03:15:50.440223    4880 main.go:51] Starting CSI-Proxy Server ...
  1884  [INPUT]
  1885      Name             tail
  1886      Alias            csi-proxy
  1887      Tag              csi-proxy
  1888      Mem_Buf_Limit    5MB
  1889      Skip_Long_Lines  On
  1890      Refresh_Interval 5
  1891      Path             /etc/kubernetes/logs/csi-proxy.log
  1892      DB               /var/run/google-fluentbit/pos-files/csi-proxy.db
  1893      Multiline        On
  1894      Parser_Firstline glog
  1895  
  1896  # I1118 21:26:53.975789       6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
  1897  [INPUT]
  1898      Name             tail
  1899      Alias            kube-proxy
  1900      Tag              kube-proxy
  1901      Mem_Buf_Limit    5MB
  1902      Skip_Long_Lines  On
  1903      Refresh_Interval 5
  1904      Path             /etc/kubernetes/logs/kube-proxy.log
  1905      DB               /var/run/google-fluentbit/pos-files/kube-proxy.db
  1906      Multiline        On
  1907      Parser_Firstline glog
  1908  
  1909  # Example:
  1910  # time="2019-12-10T21:27:59.836946700Z" level=info msg="loading plugin \"io.containerd.grpc.v1.cri\"..." type=io.containerd.grpc.v1
  1911  [INPUT]
  1912      Name             tail
  1913      Alias            container-runtime
  1914      Tag              container-runtime
  1915      Mem_Buf_Limit    5MB
  1916      Skip_Long_Lines  On
  1917      Refresh_Interval 5
  1918      Path             /etc/kubernetes/logs/containerd.log
  1919      DB               /var/run/google-fluentbit/pos-files/container-runtime.db
  1920      # TODO: Add custom parser for containerd logs once format is settled.
  1921  
  1922  # Example:
  1923  # I0204 07:32:30.020537    3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537]
  1924  [INPUT]
  1925      Name             tail
  1926      Alias            kubelet
  1927      Tag              kubelet
  1928      Mem_Buf_Limit    5MB
  1929      Skip_Long_Lines  On
  1930      Refresh_Interval 5
  1931      Path             /etc/kubernetes/logs/kubelet.log
  1932      DB               /var/run/google-fluentbit/pos-files/kubelet.db
  1933      Multiline        On
  1934      Parser_Firstline glog
  1935  
  1936  [FILTER]
  1937      Name        modify
  1938      Match       *
  1939      Hard_rename log message
  1940  
  1941  [FILTER]
  1942      Name        modify
  1943      Match       winevt.raw
  1944      Hard_rename Message message
  1945  
  1946  [FILTER]
  1947      Name         parser
  1948      Match        kube_*
  1949      Key_Name     message
  1950      Reserve_Data True
  1951      Parser       glog
  1952      Parser       json
  1953  
  1954  [OUTPUT]
  1955      Name        http
  1956      Match       *
  1957      Host        127.0.0.1
  1958      Port        2021
  1959      URI         /logs
  1960      header_tag  FLUENT-TAG
  1961      Format      msgpack
  1962      Retry_Limit 2
  1963  '@
  1964  
  1965  # Fluentbit parsers config file
  1966  $PARSERS_CONFIG = @'
  1967  [PARSER]
  1968      Name        docker
  1969      Format      json
  1970      Time_Key    time
  1971      Time_Format %Y-%m-%dT%H:%M:%S.%L%z
  1972  
  1973  [PARSER]
  1974      Name        containerd
  1975      Format      regex
  1976      Regex       ^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$
  1977      Time_Key    time
  1978      Time_Format %Y-%m-%dT%H:%M:%S.%L%z
  1979  
  1980  [PARSER]
  1981      Name        json
  1982      Format      json
  1983  
  1984  [PARSER]
  1985      Name        syslog
  1986      Format      regex
  1987      Regex       ^\<(?<pri>[0-9]+)\>(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$
  1988      Time_Key    time
  1989      Time_Format %b %d %H:%M:%S
  1990  
  1991  [PARSER]
  1992      Name        glog
  1993      Format      regex
  1994      Regex       ^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source_file>[^ \]]+)\:(?<source_line>\d+)\]\s(?<message>.*)$
  1995      Time_Key    time
  1996      Time_Format %m%d %H:%M:%S.%L
  1997  
  1998  [PARSER]
  1999      Name        network-log
  2000      Format      json
  2001      Time_Key    timestamp
  2002      Time_Format %Y-%m-%dT%H:%M:%S.%L%z
  2003  
  2004  [PARSER]
  2005      Name        syslog-rfc5424
  2006      Format      regex
  2007      Regex       ^\<(?<pri>[0-9]{1,5})\>1 (?<time>[^ ]+) (?<host>[^ ]+) (?<ident>[^ ]+) (?<pid>[-0-9]+) (?<msgid>[^ ]+) (?<extradata>(\[(.*?)\]|-)) (?<message>.+)$
  2008      Time_Key    time
  2009      Time_Format %Y-%m-%dT%H:%M:%S.%L%z
  2010      Time_Keep   On
  2011  
  2012  [PARSER]
  2013      Name        syslog-rfc3164-local
  2014      Format      regex
  2015      Regex       ^\<(?<pri>[0-9]+)\>(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$
  2016      Time_Key    time
  2017      Time_Format %b %d %H:%M:%S
  2018      Time_Keep   On
  2019  
  2020  [PARSER]
  2021      Name        syslog-rfc3164
  2022      Format      regex
  2023      Regex       /^\<(?<pri>[0-9]+)\>(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$/
  2024      Time_Key    time
  2025      Time_Format %b %d %H:%M:%S
  2026      Time_Keep   On
  2027  
  2028  [PARSER]
  2029      Name    kube-custom
  2030      Format  regex
  2031      Regex   (?<tag>[^.]+)?\.?(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$
  2032  '@
  2033  
  2034  
  2035  # ----------- Stackdriver logging setup --------------------------
  2036  # This section would be deprecated soon
  2037  #
  2038  
  2039  $STACKDRIVER_ROOT = 'C:\Program Files (x86)\Stackdriver'
  2040  
  2041  # Restarts the Stackdriver logging agent, or starts it if it is not currently
  2042  # running. A standard `Restart-Service StackdriverLogging` may fail because
  2043  # StackdriverLogging sometimes is unstoppable, so this function works around it
  2044  # by killing the processes.
  2045  function Restart-StackdriverAgent {
  2046    Stop-Service -NoWait -ErrorAction Ignore StackdriverLogging
  2047  
  2048    # Wait (if necessary) for service to stop.
  2049    $timeout = 10
  2050    $stopped = (Get-service StackdriverLogging).Status -eq 'Stopped'
  2051    for ($i = 0; $i -lt $timeout -and !($stopped); $i++) {
  2052        Start-Sleep 1
  2053        $stopped = (Get-service StackdriverLogging).Status -eq 'Stopped'
  2054    }
  2055  
  2056    if ((Get-service StackdriverLogging).Status -ne 'Stopped') {
  2057      # Force kill the processes.
  2058      Stop-Process -Force -PassThru -Id (Get-WmiObject win32_process |
  2059        Where CommandLine -Like '*Stackdriver/logging*').ProcessId
  2060  
  2061      # Wait until process has stopped.
  2062      $waited = 0
  2063      $log_period = 10
  2064      $timeout = 60
  2065      while ((Get-service StackdriverLogging).Status -ne 'Stopped' -and $waited -lt $timeout) {
  2066        Start-Sleep 1
  2067        $waited++
  2068  
  2069        if ($waited % $log_period -eq 0) {
  2070          Log-Output "Waiting for StackdriverLogging service to stop"
  2071        }
  2072      }
  2073  
  2074      # Timeout occurred
  2075      if ($waited -ge $timeout) {
  2076        Throw ("Timeout while waiting for StackdriverLogging service to stop")
  2077      }
  2078    }
  2079  
  2080    Start-Service StackdriverLogging
  2081  }
  2082  
  2083  # Check whether the logging agent is installed by whether it's registered as service
  2084  function IsStackdriverAgentInstalled {
  2085    $stackdriver_status = (Get-Service StackdriverLogging -ErrorAction Ignore).Status
  2086    return -not [string]::IsNullOrEmpty($stackdriver_status)
  2087  }
  2088  
  2089  # Writes the logging configuration file for Stackdriver. Restart-LoggingAgent
  2090  # should then be called to pick up the new configuration.
  2091  function Configure-StackdriverAgent {
  2092    $fluentd_config_dir = "$STACKDRIVER_ROOT\LoggingAgent\config.d"
  2093    $fluentd_config_file = "$fluentd_config_dir\k8s_containers.conf"
  2094  
  2095    # Create a configuration file for kubernetes containers.
  2096    # The config.d directory should have already been created automatically, but
  2097    # try creating again just in case.
  2098    New-Item $fluentd_config_dir -ItemType 'directory' -Force | Out-Null
  2099  
  2100    $config = $FLUENTD_CONFIG.replace('NODE_NAME', (hostname))
  2101    $config | Out-File -FilePath $fluentd_config_file -Encoding ASCII
  2102    Log-Output "Wrote fluentd logging config to $fluentd_config_file"
  2103  
  2104    # Configure StackdriverLogging to automatically restart on failure after 10
  2105    # seconds. The logging agent may die die to various disruptions but can be
  2106    # resumed.
  2107    sc.exe failure StackdriverLogging reset= 0 actions= restart/1000/restart/10000
  2108    Write-VerboseServiceInfoToConsole -Service 'StackdriverLogging'
  2109  }
  2110  
  2111  # The NODE_NAME placeholder must be replaced with the node's name (hostname).
  2112  $FLUENTD_CONFIG = @'
  2113  # This configuration file for Fluentd is used to watch changes to kubernetes
  2114  # container logs in the directory /var/lib/docker/containers/ and submit the
  2115  # log records to Google Cloud Logging using the cloud-logging plugin.
  2116  #
  2117  # Example
  2118  # =======
  2119  # A line in the Docker log file might look like this JSON:
  2120  #
  2121  # {"log":"2014/09/25 21:15:03 Got request with path wombat\\n",
  2122  #  "stream":"stderr",
  2123  #   "time":"2014-09-25T21:15:03.499185026Z"}
  2124  #
  2125  # The original tag is derived from the log file's location.
  2126  # For example a Docker container's logs might be in the directory:
  2127  #  /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b
  2128  # and in the file:
  2129  #  997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
  2130  # where 997599971ee6... is the Docker ID of the running container.
  2131  # The Kubernetes kubelet makes a symbolic link to this file on the host
  2132  # machine in the /var/log/containers directory which includes the pod name,
  2133  # the namespace name and the Kubernetes container name:
  2134  #    synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  2135  #    ->
  2136  #    /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
  2137  # The /var/log directory on the host is mapped to the /var/log directory in the container
  2138  # running this instance of Fluentd and we end up collecting the file:
  2139  #   /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  2140  # This results in the tag:
  2141  #  var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  2142  # where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the
  2143  # namespace name, 'synth-lgr' is the container name and '997599971ee6..' is
  2144  # the container ID.
  2145  # The record reformer is used to extract pod_name, namespace_name and
  2146  # container_name from the tag and set them in a local_resource_id in the
  2147  # format of:
  2148  # 'k8s_container.<NAMESPACE_NAME>.<POD_NAME>.<CONTAINER_NAME>'.
  2149  # The reformer also changes the tags to 'stderr' or 'stdout' based on the
  2150  # value of 'stream'.
  2151  # local_resource_id is later used by google_cloud plugin to determine the
  2152  # monitored resource to ingest logs against.
  2153  # Json Log Example:
  2154  # {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"}
  2155  # CRI Log Example:
  2156  # 2016-02-17T00:04:05.931087621Z stdout F [info:2016-02-16T16:04:05.930-08:00] Some log text here
  2157  <source>
  2158    @type tail
  2159    path /var/log/containers/*.log
  2160    pos_file /var/log/gcp-containers.log.pos
  2161    # Tags at this point are in the format of:
  2162    # reform.var.log.containers.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>-<CONTAINER_ID>.log
  2163    tag reform.*
  2164    read_from_head true
  2165    <parse>
  2166      @type multi_format
  2167      <pattern>
  2168        format json
  2169        time_key time
  2170        time_format %Y-%m-%dT%H:%M:%S.%NZ
  2171        keep_time_key
  2172      </pattern>
  2173      <pattern>
  2174        format /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/
  2175        time_format %Y-%m-%dT%H:%M:%S.%N%:z
  2176      </pattern>
  2177    </parse>
  2178  </source>
  2179  # Example:
  2180  # I0204 07:32:30.020537    3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537]
  2181  <source>
  2182    @type tail
  2183    format multiline
  2184    multiline_flush_interval 5s
  2185    format_firstline /^\w\d{4}/
  2186    format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  2187    time_format %m%d %H:%M:%S.%N
  2188    path /etc/kubernetes/logs/kubelet.log
  2189    pos_file /etc/kubernetes/logs/gcp-kubelet.log.pos
  2190    tag kubelet
  2191  </source>
  2192  # Example:
  2193  # I1118 21:26:53.975789       6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
  2194  <source>
  2195    @type tail
  2196    format multiline
  2197    multiline_flush_interval 5s
  2198    format_firstline /^\w\d{4}/
  2199    format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  2200    time_format %m%d %H:%M:%S.%N
  2201    path /etc/kubernetes/logs/kube-proxy.log
  2202    pos_file /etc/kubernetes/logs/gcp-kube-proxy.log.pos
  2203    tag kube-proxy
  2204  </source>
  2205  # Example:
  2206  # I0928 03:15:50.440223    4880 main.go:51] Starting CSI-Proxy Server ...
  2207  <source>
  2208    @type tail
  2209    format multiline
  2210    multiline_flush_interval 5s
  2211    format_firstline /^\w\d{4}/
  2212    format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  2213    time_format %m%d %H:%M:%S.%N
  2214    path /etc/kubernetes/logs/csi-proxy.log
  2215    pos_file /etc/kubernetes/logs/gcp-csi-proxy.log.pos
  2216    tag csi-proxy
  2217  </source>
  2218  # Example:
  2219  # time="2019-12-10T21:27:59.836946700Z" level=info msg="loading plugin \"io.containerd.grpc.v1.cri\"..." type=io.containerd.grpc.v1
  2220  <source>
  2221    @type tail
  2222    format multiline
  2223    multiline_flush_interval 5s
  2224    format_firstline /^time=/
  2225    format1 /^time="(?<time>[^ ]*)" level=(?<severity>\w*) (?<message>.*)/
  2226    time_format %Y-%m-%dT%H:%M:%S.%N%z
  2227    path /etc/kubernetes/logs/containerd.log
  2228    pos_file /etc/kubernetes/logs/gcp-containerd.log.pos
  2229    tag container-runtime
  2230  </source>
  2231  <match reform.**>
  2232    @type record_reformer
  2233    enable_ruby true
  2234    <record>
  2235      # Extract local_resource_id from tag for 'k8s_container' monitored
  2236      # resource. The format is:
  2237      # 'k8s_container.<namespace_name>.<pod_name>.<container_name>'.
  2238      "logging.googleapis.com/local_resource_id" ${"k8s_container.#{tag_suffix[4].rpartition('.')[0].split('_')[1]}.#{tag_suffix[4].rpartition('.')[0].split('_')[0]}.#{tag_suffix[4].rpartition('.')[0].split('_')[2].rpartition('-')[0]}"}
  2239      # Rename the field 'log' to a more generic field 'message'. This way the
  2240      # fluent-plugin-google-cloud knows to flatten the field as textPayload
  2241      # instead of jsonPayload after extracting 'time', 'severity' and
  2242      # 'stream' from the record.
  2243      message ${record['log']}
  2244      # If 'severity' is not set, assume stderr is ERROR and stdout is INFO.
  2245      severity ${record['severity'] || if record['stream'] == 'stderr' then 'ERROR' else 'INFO' end}
  2246    </record>
  2247    tag ${if record['stream'] == 'stderr' then 'raw.stderr' else 'raw.stdout' end}
  2248    remove_keys stream,log
  2249  </match>
  2250  # TODO: detect exceptions and forward them as one log entry using the
  2251  # detect_exceptions plugin
  2252  # This section is exclusive for k8s_container logs. These logs come with
  2253  # 'raw.stderr' or 'raw.stdout' tags.
  2254  <match {raw.stderr,raw.stdout}>
  2255    @type google_cloud
  2256    # Try to detect JSON formatted log entries.
  2257    detect_json true
  2258    # Allow log entries from multiple containers to be sent in the same request.
  2259    split_logs_by_tag false
  2260    # Set the buffer type to file to improve the reliability and reduce the memory consumption
  2261    buffer_type file
  2262    buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
  2263    # Set queue_full action to block because we want to pause gracefully
  2264    # in case of the off-the-limits load instead of throwing an exception
  2265    buffer_queue_full_action block
  2266    # Set the chunk limit conservatively to avoid exceeding the recommended
  2267    # chunk size of 5MB per write request.
  2268    buffer_chunk_limit 512k
  2269    # Cap the combined memory usage of this buffer and the one below to
  2270    # 512KiB/chunk * (6 + 2) chunks = 4 MiB
  2271    buffer_queue_limit 6
  2272    # Never wait more than 5 seconds before flushing logs in the non-error case.
  2273    flush_interval 5s
  2274    # Never wait longer than 30 seconds between retries.
  2275    max_retry_wait 30
  2276    # Disable the limit on the number of retries (retry forever).
  2277    disable_retry_limit
  2278    # Use multiple threads for processing.
  2279    num_threads 2
  2280    use_grpc true
  2281    # Skip timestamp adjustment as this is in a controlled environment with
  2282    # known timestamp format. This helps with CPU usage.
  2283    adjust_invalid_timestamps false
  2284  </match>
  2285  # Attach local_resource_id for 'k8s_node' monitored resource.
  2286  <filter **>
  2287    @type record_transformer
  2288    enable_ruby true
  2289    <record>
  2290      "logging.googleapis.com/local_resource_id" ${"k8s_node.NODE_NAME"}
  2291    </record>
  2292  </filter>
  2293  '@
  2294  
  2295  # Downloads the out-of-tree kubelet image credential provider binaries.
  2296  function DownloadAndInstall-AuthProviderGcpBinary {
  2297    if ("${env:ENABLE_AUTH_PROVIDER_GCP}" -eq "true") {
  2298      $filename = 'auth-provider-gcp.exe'
  2299      if (ShouldWrite-File ${env:AUTH_PROVIDER_GCP_WINDOWS_BIN_DIR}\$filename) {
  2300        Log-Output "Installing auth provider gcp binaries"
  2301        $tmp_dir = 'C:\k8s_tmp'
  2302        New-Item -Force -ItemType 'directory' $tmp_dir | Out-Null
  2303        $url = "${env:AUTH_PROVIDER_GCP_STORAGE_PATH}/${env:AUTH_PROVIDER_GCP_VERSION}/windows_amd64/$filename"
  2304        MustDownload-File -Hash $AUTH_PROVIDER_GCP_HASH_WINDOWS_AMD64 -Algorithm SHA512 -OutFile $tmp_dir\$filename -URLs $url
  2305        Move-Item -Force $tmp_dir\$filename ${env:AUTH_PROVIDER_GCP_WINDOWS_BIN_DIR}
  2306        Remove-Item -Force -Recurse $tmp_dir
  2307      } else {
  2308        Log-Output "Skipping auth provider gcp binaries installation, auth-provider-gcp.exe file already exists."
  2309      }
  2310    }
  2311  }
  2312  
  2313  # Creates config file for the out-of-tree kubelet image credential provider.
  2314  function Create-AuthProviderGcpConfig {
  2315    if ("${env:ENABLE_AUTH_PROVIDER_GCP}" -eq "true") {
  2316      if (ShouldWrite-File ${env:AUTH_PROVIDER_GCP_WINDOWS_CONF_FILE}) {
  2317        Log-Output "Creating auth provider gcp config file"
  2318        Set-Content ${env:AUTH_PROVIDER_GCP_WINDOWS_CONF_FILE} @'
  2319  kind: CredentialProviderConfig
  2320  apiVersion: kubelet.config.k8s.io/v1
  2321  providers:
  2322    - name: auth-provider-gcp.exe
  2323      apiVersion: credentialprovider.kubelet.k8s.io/v1
  2324      matchImages:
  2325      - "container.cloud.google.com"
  2326      - "gcr.io"
  2327      - "*.gcr.io"
  2328      - "*.pkg.dev"
  2329      args:
  2330      - get-credentials
  2331      - --v=3
  2332      defaultCacheDuration: 1m
  2333  '@
  2334      } else {
  2335        Log-Output "Skipping auth provider gcp config file creation, it already exists"
  2336      }
  2337    }
  2338  }
  2339  
  2340  
  2341  # Export all public functions:
  2342  Export-ModuleMember -Function *-*