github.com/apache/beam/sdks/v2@v2.48.2/java/testing/nexmark/build.gradle (about)

     1  /*
     2   * Licensed to the Apache Software Foundation (ASF) under one
     3   * or more contributor license agreements.  See the NOTICE file
     4   * distributed with this work for additional information
     5   * regarding copyright ownership.  The ASF licenses this file
     6   * to you under the Apache License, Version 2.0 (the
     7   * License); you may not use this file except in compliance
     8   * with the License.  You may obtain a copy of the License at
     9   *
    10   *     http://www.apache.org/licenses/LICENSE-2.0
    11   *
    12   * Unless required by applicable law or agreed to in writing, software
    13   * distributed under the License is distributed on an AS IS BASIS,
    14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15   * See the License for the specific language governing permissions and
    16   * limitations under the License.
    17   */
    18  
    19  plugins { id 'org.apache.beam.module' }
    20  applyJavaNature(
    21      automaticModuleName: 'org.apache.beam.sdk.nexmark',
    22      exportJavadoc: false,
    23      archivesBaseName: 'beam-sdks-java-nexmark',
    24  )
    25  
    26  description = "Apache Beam :: SDKs :: Java :: Nexmark"
    27  
    28  // When running via Gradle, this property can be used to pass commandline arguments
    29  // to the nexmark launch
    30  def nexmarkArgsProperty = "nexmark.args"
    31  // When running via Gradle, this property can be set to "true" to enable profiling for
    32  // the nexmark pipeline. Currently only works for the Dataflow runner.
    33  def nexmarkProfilingProperty = "nexmark.profile"
    34  
    35  // When running via Gradle, this property sets the runner dependency
    36  def nexmarkRunnerProperty = "nexmark.runner"
    37  def nexmarkRunnerDependency = project.findProperty(nexmarkRunnerProperty)
    38          ?: ":runners:direct-java"
    39  def nexmarkRunnerVersionProperty = "nexmark.runner.version"
    40  def nexmarkRunnerVersion = project.findProperty(nexmarkRunnerVersionProperty)
    41  def isSparkRunner = nexmarkRunnerDependency.startsWith(":runners:spark:")
    42  def isDataflowRunner = ":runners:google-cloud-dataflow-java".equals(nexmarkRunnerDependency)
    43  def isDataflowRunnerV2 = isDataflowRunner && "V2".equals(nexmarkRunnerVersion)
    44  def runnerConfiguration = ":runners:direct-java".equals(nexmarkRunnerDependency) ? "shadow" : null
    45  
    46  if (isDataflowRunner) {
    47    /*
    48     * We need to rely on manually specifying these evaluationDependsOn to ensure that
    49     * the following projects are evaluated before we evaluate this project. This is because
    50     * we are attempting to reference a property from the project directly.
    51     */
    52    if (isDataflowRunnerV2) {
    53      evaluationDependsOn(":runners:google-cloud-dataflow-java")
    54    } else {
    55      evaluationDependsOn(":runners:google-cloud-dataflow-java:worker")
    56    }
    57  }
    58  
    59  configurations {
    60    // A configuration for running the Nexmark launcher directly from Gradle, which
    61    // uses Gradle to put the appropriate dependencies on the Classpath rather than
    62    // bundling them into a fat jar
    63    gradleRun
    64  }
    65  
    66  dependencies {
    67    implementation library.java.vendored_guava_26_0_jre
    68    implementation project(path: ":sdks:java:core", configuration: "shadow")
    69    implementation project(":sdks:java:io:google-cloud-platform")
    70    implementation project(":sdks:java:extensions:avro")
    71    implementation project(":sdks:java:extensions:google-cloud-platform-core")
    72    implementation project(":sdks:java:extensions:sql")
    73    implementation project(":sdks:java:extensions:sql:zetasql")
    74    implementation project(":sdks:java:io:kafka")
    75    implementation project(":sdks:java:testing:test-utils")
    76    implementation library.java.google_api_client
    77    implementation library.java.junit
    78    implementation library.java.hamcrest
    79    implementation library.java.google_api_services_bigquery
    80    implementation library.java.jackson_core
    81    implementation library.java.jackson_annotations
    82    implementation library.java.jackson_databind
    83    implementation library.java.jackson_datatype_joda
    84    implementation library.java.avro
    85    implementation library.java.joda_time
    86    implementation library.java.slf4j_api
    87    implementation library.java.kafka_clients
    88    compileOnly library.java.error_prone_annotations
    89    testRuntimeOnly library.java.slf4j_jdk14
    90    testImplementation project(path: ":sdks:java:io:google-cloud-platform", configuration: "testRuntimeMigration")
    91    testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration")
    92    gradleRun project(project.path)
    93    gradleRun project(path: nexmarkRunnerDependency, configuration: runnerConfiguration)
    94  }
    95  
    96  if (isSparkRunner) {
    97    configurations.gradleRun {
    98      // Using Spark runner causes a StackOverflowError if slf4j-jdk14 is on the classpath
    99      exclude group: "org.slf4j", module: "slf4j-jdk14"
   100    }
   101  }
   102  
   103  def getNexmarkArgs = {
   104    def nexmarkArgsStr =  project.findProperty(nexmarkArgsProperty) ?: ""
   105    def nexmarkArgsList = new ArrayList<String>()
   106    Collections.addAll(nexmarkArgsList, nexmarkArgsStr.split())
   107  
   108    if (isDataflowRunner) {
   109      if (isDataflowRunnerV2) {
   110        nexmarkArgsList.add("--experiments=beam_fn_api,use_unified_worker,use_runner_v2,shuffle_mode=service")
   111        def sdkContainerImage = project.findProperty('sdkContainerImage') ?: project(":runners:google-cloud-dataflow-java").dockerJavaImageName
   112        nexmarkArgsList.add("--sdkContainerImage=${sdkContainerImage}")
   113  
   114        // TODO(https://github.com/apache/beam/issues/20880) enable all queries once issues with runner V2 is fixed.
   115        if (nexmarkArgsList.contains("--streaming=true")) {
   116          nexmarkArgsList.add("--skipQueries=AVERAGE_PRICE_FOR_CATEGORY,AVERAGE_SELLING_PRICE_BY_SELLER,WINNING_BIDS,BOUNDED_SIDE_INPUT_JOIN,SESSION_SIDE_INPUT_JOIN,PORTABILITY_BATCH") // 4, 6, 9, 13, 14, 15
   117        } else {
   118          nexmarkArgsList.add("--skipQueries=LOCAL_ITEM_SUGGESTION,AVERAGE_PRICE_FOR_CATEGORY,AVERAGE_SELLING_PRICE_BY_SELLER,HIGHEST_BID,WINNING_BIDS,SESSION_SIDE_INPUT_JOIN,BOUNDED_SIDE_INPUT_JOIN") // 3, 4, 6, 7, 9, 13, 14, 15
   119        }
   120      } else {
   121        def dataflowWorkerJar = project.findProperty('dataflowWorkerJar') ?: project(":runners:google-cloud-dataflow-java:worker").shadowJar.archivePath
   122        // Provide job with a customizable worker jar.
   123        // With legacy worker jar, containerImage is set to empty (i.e. to use the internal build).
   124        // More context and discussions can be found in PR#6694.
   125        nexmarkArgsList.add("--dataflowWorkerJar=${dataflowWorkerJar}".toString())
   126        nexmarkArgsList.add('--workerHarnessContainerImage=')
   127  
   128        def nexmarkProfile =  project.findProperty(nexmarkProfilingProperty) ?: ""
   129        if (nexmarkProfile.equals("true")) {
   130          nexmarkArgsList.add('--profilingAgentConfiguration={ "APICurated": true }')
   131        }
   132      }
   133    }
   134  
   135    if(isSparkRunner) {
   136      // For transparency, be explicit about configuration of local Spark
   137      nexmarkArgsList.add("--sparkMaster=local[4]")
   138    }
   139  
   140    return nexmarkArgsList
   141  }
   142  
   143  // Execute the Nexmark queries or suites via Gradle.
   144  //
   145  // Parameters:
   146  //   -Pnexmark.runner
   147  //       Specify a runner subproject, such as ":runners:spark:3" or ":runners:flink:1.13"
   148  //       Defaults to ":runners:direct-java"
   149  //
   150  //   -Pnexmark.args
   151  //       Specify the command line for invoking org.apache.beam.sdk.nexmark.Main
   152  task run(type: JavaExec) {
   153    def nexmarkArgsList = getNexmarkArgs()
   154    if (isDataflowRunner) {
   155      if (isDataflowRunnerV2) {
   156        dependsOn ":runners:google-cloud-dataflow-java:buildAndPushDockerJavaContainer"
   157        finalizedBy ":runners:google-cloud-dataflow-java:cleanUpDockerJavaImages"
   158      } else {
   159        dependsOn ":runners:google-cloud-dataflow-java:worker:shadowJar"
   160      }
   161    }
   162    if(isSparkRunner) {
   163      // Disable UI
   164      systemProperty "spark.ui.enabled", "false"
   165      systemProperty "spark.ui.showConsoleProgress", "false"
   166      // Dataset runner only
   167      systemProperty "spark.sql.shuffle.partitions", "4"
   168    }
   169  
   170    mainClass = "org.apache.beam.sdk.nexmark.Main"
   171    classpath = configurations.gradleRun
   172    args nexmarkArgsList.toArray()
   173  }