github.com/apache/beam/sdks/v2@v2.48.2/java/testing/tpcds/build.gradle (about)

     1  /*
     2   * Licensed to the Apache Software Foundation (ASF) under one
     3   * or more contributor license agreements.  See the NOTICE file
     4   * distributed with this work for additional information
     5   * regarding copyright ownership.  The ASF licenses this file
     6   * to you under the Apache License, Version 2.0 (the
     7   * License); you may not use this file except in compliance
     8   * with the License.  You may obtain a copy of the License at
     9   *
    10   *     http://www.apache.org/licenses/LICENSE-2.0
    11   *
    12   * Unless required by applicable law or agreed to in writing, software
    13   * distributed under the License is distributed on an AS IS BASIS,
    14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15   * See the License for the specific language governing permissions and
    16   * limitations under the License.
    17   */
    18  
    19  plugins { id 'org.apache.beam.module' }
    20  applyJavaNature(
    21          automaticModuleName: 'org.apache.beam.sdk.tpcds',
    22          exportJavadoc: false,
    23          archivesBaseName: 'beam-sdks-java-tpcds',
    24  )
    25  
    26  description = "Apache Beam :: SDKs :: Java :: TPC-DS"
    27  
    28  // When running via Gradle, this property can be used to pass commandline arguments
    29  // to the TPD-DS run
    30  def tpcdsArgsProperty = "tpcds.args"
    31  
    32  // When running via Gradle, this property sets the runner dependency
    33  def tpcdsRunnerProperty = "tpcds.runner"
    34  def tpcdsRunnerDependency = project.findProperty(tpcdsRunnerProperty)
    35          ?: ":runners:direct-java"
    36  def isSpark = tpcdsRunnerDependency.startsWith(":runners:spark:")
    37  def isDataflowRunner = ":runners:google-cloud-dataflow-java".equals(tpcdsRunnerDependency)
    38  def runnerConfiguration = ":runners:direct-java".equals(tpcdsRunnerDependency) ? "shadow" : null
    39  
    40  if (isDataflowRunner) {
    41      /*
    42       * We need to rely on manually specifying these evaluationDependsOn to ensure that
    43       * the following projects are evaluated before we evaluate this project. This is because
    44       * we are attempting to reference a property from the project directly.
    45       */
    46      evaluationDependsOn(":runners:google-cloud-dataflow-java:worker")
    47  }
    48  
    49  configurations {
    50      // A configuration for running the TPC-DS launcher directly from Gradle, which
    51      // uses Gradle to put the appropriate dependencies on the Classpath rather than
    52      // bundling them into a fat jar
    53      gradleRun
    54  }
    55  
    56  dependencies {
    57      implementation enforcedPlatform(library.java.google_cloud_platform_libraries_bom)
    58  
    59      // TODO(https://github.com/apache/beam/issues/21156): Determine how to build without this dependency
    60      provided "org.immutables:value:2.8.8"
    61      permitUnusedDeclared "org.immutables:value:2.8.8"
    62      implementation library.java.avro
    63      implementation library.java.joda_time
    64      implementation library.java.vendored_guava_26_0_jre
    65      implementation library.java.vendored_calcite_1_28_0
    66      implementation library.java.commons_csv
    67      implementation library.java.slf4j_api
    68      implementation "com.googlecode.json-simple:json-simple:1.1.1"
    69      implementation "com.alibaba:fastjson:1.2.69"
    70      implementation project(":sdks:java:extensions:sql")
    71      implementation project(":sdks:java:extensions:sql:zetasql")
    72      implementation project(":sdks:java:io:parquet")
    73      implementation project(":sdks:java:extensions:google-cloud-platform-core")
    74      implementation project(":sdks:java:testing:test-utils")
    75      permitUnusedDeclared project(":sdks:java:extensions:google-cloud-platform-core")
    76      implementation project(":sdks:java:io:google-cloud-platform")
    77      permitUnusedDeclared project(":sdks:java:io:google-cloud-platform")
    78      implementation project(":runners:google-cloud-dataflow-java")
    79      implementation project(path: ":sdks:java:core", configuration: "shadow")
    80      testRuntimeOnly library.java.slf4j_jdk14
    81      testImplementation project(path: ":sdks:java:io:google-cloud-platform", configuration: "testRuntimeMigration")
    82      testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration")
    83      gradleRun project(project.path)
    84      gradleRun project(path: tpcdsRunnerDependency, configuration: runnerConfiguration)
    85  }
    86  
    87  if (isSpark) {
    88      configurations.gradleRun {
    89        exclude group: "org.slf4j", module: "slf4j-jdk14"
    90      }
    91  }
    92  
    93  // Execute the TPC-DS queries or suites via Gradle.
    94  //
    95  // Parameters:
    96  //   -Ptpcds.runner
    97  //       Specify a runner subproject, such as ":runners:spark:3" or ":runners:flink:1.13"
    98  //       Defaults to ":runners:direct-java"
    99  //
   100  //   -Ptpcds.args
   101  //       Specify the command line for invoking org.apache.beam.sdk.tpcds.BeamTpcds
   102  task run(type: JavaExec) {
   103      def tpcdsArgsStr = project.findProperty(tpcdsArgsProperty) ?: ""
   104      def tpcdsArgsList = new ArrayList<String>()
   105      Collections.addAll(tpcdsArgsList, tpcdsArgsStr.split())
   106  
   107      if (isDataflowRunner) {
   108          dependsOn ":runners:google-cloud-dataflow-java:worker:shadowJar"
   109  
   110          def dataflowWorkerJar = project.findProperty('dataflowWorkerJar') ?:
   111                  project(":runners:google-cloud-dataflow-java:worker")
   112                          .shadowJar.archivePath
   113          // Provide job with a customizable worker jar.
   114          // With legacy worker jar, containerImage is set to empty (i.e. to use the internal build).
   115          // More context and discussions can be found in PR#6694.
   116          tpcdsArgsList.add("--dataflowWorkerJar=${dataflowWorkerJar}".toString())
   117          tpcdsArgsList.add('--workerHarnessContainerImage=')
   118      }
   119  
   120    if(isSpark) {
   121      // Disable UI
   122      systemProperty "spark.ui.enabled", "false"
   123      systemProperty "spark.ui.showConsoleProgress", "false"
   124  
   125      // For transparency, be explicit about configuration of local Spark
   126      tpcdsArgsList.add("--sparkMaster=local[4]")
   127      // Dataset runner only
   128      systemProperty "spark.sql.shuffle.partitions", "4"
   129    }
   130  
   131      mainClass = "org.apache.beam.sdk.tpcds.BeamTpcds"
   132      classpath = configurations.gradleRun
   133      args tpcdsArgsList.toArray()
   134  }