github.com/apache/beam/sdks/v2@v2.48.2/java/io/hadoop-format/build.gradle (about)

     1  /*
     2   * Licensed to the Apache Software Foundation (ASF) under one
     3   * or more contributor license agreements.  See the NOTICE file
     4   * distributed with this work for additional information
     5   * regarding copyright ownership.  The ASF licenses this file
     6   * to you under the Apache License, Version 2.0 (the
     7   * License); you may not use this file except in compliance
     8   * with the License.  You may obtain a copy of the License at
     9   *
    10   *     http://www.apache.org/licenses/LICENSE-2.0
    11   *
    12   * Unless required by applicable law or agreed to in writing, software
    13   * distributed under the License is distributed on an AS IS BASIS,
    14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15   * See the License for the specific language governing permissions and
    16   * limitations under the License.
    17   */
    18  
    19  import groovy.json.JsonOutput
    20  import java.util.stream.Collectors
    21  
    22  plugins { id 'org.apache.beam.module' }
    23  applyJavaNature(
    24    automaticModuleName: 'org.apache.beam.sdk.io.hadoop.format',
    25  )
    26  provideIntegrationTestingDependencies()
    27  enableJavaPerformanceTesting()
    28  
    29  description = "Apache Beam :: SDKs :: Java :: IO :: Hadoop Format"
    30  ext.summary = "IO to read data from sources and to write data to sinks that implement Hadoop MapReduce Format."
    31  
    32  def hadoopVersions = [
    33      "285": "2.8.5",
    34      "292": "2.9.2",
    35      "2102": "2.10.2",
    36      "324": "3.2.4",
    37  ]
    38  
    39  hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}
    40  
    41  def elastic_search_version = "7.12.0"
    42  
    43  // Ban dependencies from the test runtime classpath
    44  configurations.testRuntimeClasspath {
    45    // Prevent a StackOverflow because of wiring LOG4J -> SLF4J -> LOG4J
    46    exclude group: "org.slf4j", module: "log4j-over-slf4j"
    47  }
    48  
    49  // Force use the old version of JAMM that cassandra relies on
    50  configurations.all {
    51    resolutionStrategy {
    52      exclude group: "io.github.stephankoelle", module: "jamm"
    53    }
    54  }
    55  
    56  
    57  dependencies {
    58    implementation project(path: ":sdks:java:core", configuration: "shadow")
    59    implementation library.java.vendored_guava_26_0_jre
    60    implementation library.java.slf4j_api
    61    implementation project(":sdks:java:io:hadoop-common")
    62    implementation library.java.joda_time
    63    provided library.java.hadoop_common
    64    provided library.java.hadoop_hdfs
    65    permitUnusedDeclared library.java.hadoop_hdfs
    66    provided library.java.hadoop_hdfs_client
    67    provided library.java.hadoop_mapreduce_client_core
    68    // Ensure that the older version of JAMM that cassandra relies on appears
    69    // on the classpath before the one provided by :sdks:java:core shadowTest.
    70    testImplementation "com.github.jbellis:jamm:0.3.0"
    71    testImplementation project(path: ":sdks:java:core", configuration: "shadowTest")
    72    testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration")
    73    testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration")
    74    testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration")
    75    testImplementation project(":sdks:java:io:jdbc")
    76    testImplementation "org.elasticsearch.plugin:transport-netty4-client:$elastic_search_version"
    77    testImplementation library.java.testcontainers_elasticsearch
    78    testImplementation "org.elasticsearch.client:elasticsearch-rest-high-level-client:$elastic_search_version"
    79    testImplementation "org.elasticsearch:elasticsearch:$elastic_search_version"
    80    testImplementation ("org.elasticsearch:elasticsearch-hadoop:$elastic_search_version") {
    81      // TODO(https://issues.apache.org/jira/browse/BEAM-3715)
    82      // These are all optional deps of elasticsearch-hadoop. Why do they have to be excluded?
    83      exclude group: "cascading", module: "cascading-local"
    84      exclude group: "cascading", module: "cascading-hadoop"
    85      exclude group: "org.apache.hive", module: "hive-service"
    86      exclude group: "org.apache.pig", module: "pig"
    87      exclude group: "org.apache.spark", module: "spark-core_2.10"
    88      exclude group: "org.apache.spark", module: "spark-streaming_2.10"
    89      exclude group: "org.apache.spark", module: "spark-sql_2.10"
    90      exclude group: "org.apache.storm", module: "storm-core"
    91    }
    92    testImplementation "org.apache.httpcomponents:httpclient:4.5.13"
    93    testImplementation library.java.commons_lang3
    94    testImplementation library.java.commons_io
    95    testImplementation library.java.cassandra_driver_core
    96    testImplementation library.java.cassandra_driver_mapping
    97    testImplementation "org.apache.cassandra:cassandra-all:3.11.8"
    98    testImplementation library.java.hadoop_common
    99    testImplementation library.java.hadoop_hdfs
   100    testImplementation library.java.hadoop_mapreduce_client_core
   101    testImplementation library.java.postgres
   102    testImplementation library.java.junit
   103    testImplementation library.java.hamcrest
   104    testImplementation library.java.testcontainers_postgresql
   105    testImplementation library.java.netty_all
   106    testRuntimeOnly library.java.slf4j_jdk14
   107    testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
   108  
   109    hadoopVersions.each {kv ->
   110      "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-common:$kv.value"
   111      "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
   112      "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-hdfs:$kv.value"
   113      "hadoopVersion$kv.key" "org.apache.httpcomponents:httpclient:4.5.13"
   114    }
   115  }
   116  
   117  hadoopVersions.each {kv ->
   118    configurations."hadoopVersion$kv.key" {
   119      resolutionStrategy {
   120        force "org.apache.hadoop:hadoop-common:$kv.value"
   121        force "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
   122        force "org.apache.hadoop:hadoop-hdfs:$kv.value"
   123        // without forcing httpclient httpcore gets below 4.4.9 which has incompatible API
   124        force "org.apache.httpcomponents:httpclient:4.5.13"
   125      }
   126    }
   127  }
   128  
   129  // Hadoop dependencies require old version of Guava (BEAM-11626)
   130  configurations.all (Configuration it) -> {
   131    // error-prone requires newer guava, don't override for annotation processing
   132    // https://github.com/google/error-prone/issues/2745
   133    if (it.name == "annotationProcessor" || it.name =="testAnnotationProcessor") {
   134      return
   135    }
   136    resolutionStrategy {
   137      force 'com.google.guava:guava:25.1-jre'
   138    }
   139  }
   140  
   141  // The cassandra.yaml file currently assumes "target/..." exists.
   142  // TODO: Update cassandra.yaml to inject new properties representing
   143  // the root path. Also migrate cassandra.yaml to use any open ports
   144  // instead of a static port.
   145  task createTargetDirectoryForCassandra() {
   146    doLast {
   147      if (!project.file("target").exists()) {
   148        project.file("target").mkdirs()
   149      }
   150    }
   151  }
   152  test.dependsOn createTargetDirectoryForCassandra
   153  
   154  task hadoopVersionsTest(group: "Verification") {
   155    description = "Runs Hadoop format tests with different Hadoop versions"
   156    dependsOn createTaskNames(hadoopVersions, "Test")
   157    dependsOn createTaskNames(hadoopVersions, "IT")
   158    dependsOn createTaskNames(hadoopVersions, "ElasticIT")
   159  }
   160  
   161  hadoopVersions.each {kv ->
   162    task "runHadoopFormatIO${kv.key}ElasticIT"(type: Test, group: "Verification") {
   163      description = "Runs HadoopFormatIOElasticIT with Hadoop '${kv.value}'"
   164      outputs.upToDateWhen { false }
   165      testClassesDirs = sourceSets.test.output.classesDirs
   166      classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
   167      def pipelineOptions = JsonOutput.toJson(["--withTestcontainers=true"])
   168      systemProperty "beamTestPipelineOptions", pipelineOptions
   169      include '**/HadoopFormatIOElasticIT.class'
   170    }
   171  }
   172  
   173  hadoopVersions.each {kv ->
   174    task "runHadoopFormatIO${kv.key}IT"(type: Test, group: "Verification") {
   175      description = "Runs HadoopFormatIOIT with Hadoop '${kv.value}'"
   176      outputs.upToDateWhen { false }
   177      testClassesDirs = sourceSets.test.output.classesDirs
   178      classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
   179  
   180      def pipelineOptions = JsonOutput.toJson([
   181          "--postgresServerName=dummy_value",
   182          "--postgresUsername=postgres",
   183          "--postgresDatabaseName=postgres",
   184          "--postgresPassword=postgres",
   185          "--numberOfRecords=1000",
   186          "--withTestcontainers=true",
   187          "--postgresSsl=false",
   188      ])
   189      systemProperty "beamTestPipelineOptions", pipelineOptions
   190      include '**/HadoopFormatIOIT.class'
   191    }
   192  }
   193  
   194  hadoopVersions.each{kv ->
   195    task "runHadoopFormatIO${kv.key}Test"(type: Test, group: "Verification") {
   196      description = "Runs HadoopFormatIO tests with Hadoop '${kv.value}'"
   197      classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
   198      outputs.upToDateWhen { false }
   199      include '**/*Test.class'
   200      // Cassandra test cannot run in parallel
   201      exclude '**/HadoopFormatIOCassandraTest.class'
   202    }
   203  }
   204  
   205  static def createTaskNames(Map<String, String> hadoopVersions, String suffix) {
   206    return hadoopVersions.keySet().stream()
   207        .map{num -> "runHadoopFormatIO$num$suffix"}
   208        .collect(Collectors.toList())
   209  }