github.com/apache/beam/sdks/v2@v2.48.2/java/io/hadoop-format/build.gradle (about) 1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * License); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an AS IS BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 import groovy.json.JsonOutput 20 import java.util.stream.Collectors 21 22 plugins { id 'org.apache.beam.module' } 23 applyJavaNature( 24 automaticModuleName: 'org.apache.beam.sdk.io.hadoop.format', 25 ) 26 provideIntegrationTestingDependencies() 27 enableJavaPerformanceTesting() 28 29 description = "Apache Beam :: SDKs :: Java :: IO :: Hadoop Format" 30 ext.summary = "IO to read data from sources and to write data to sinks that implement Hadoop MapReduce Format." 31 32 def hadoopVersions = [ 33 "285": "2.8.5", 34 "292": "2.9.2", 35 "2102": "2.10.2", 36 "324": "3.2.4", 37 ] 38 39 hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")} 40 41 def elastic_search_version = "7.12.0" 42 43 // Ban dependencies from the test runtime classpath 44 configurations.testRuntimeClasspath { 45 // Prevent a StackOverflow because of wiring LOG4J -> SLF4J -> LOG4J 46 exclude group: "org.slf4j", module: "log4j-over-slf4j" 47 } 48 49 // Force use the old version of JAMM that cassandra relies on 50 configurations.all { 51 resolutionStrategy { 52 exclude group: "io.github.stephankoelle", module: "jamm" 53 } 54 } 55 56 57 dependencies { 58 implementation project(path: ":sdks:java:core", configuration: "shadow") 59 implementation library.java.vendored_guava_26_0_jre 60 implementation library.java.slf4j_api 61 implementation project(":sdks:java:io:hadoop-common") 62 implementation library.java.joda_time 63 provided library.java.hadoop_common 64 provided library.java.hadoop_hdfs 65 permitUnusedDeclared library.java.hadoop_hdfs 66 provided library.java.hadoop_hdfs_client 67 provided library.java.hadoop_mapreduce_client_core 68 // Ensure that the older version of JAMM that cassandra relies on appears 69 // on the classpath before the one provided by :sdks:java:core shadowTest. 70 testImplementation "com.github.jbellis:jamm:0.3.0" 71 testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") 72 testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") 73 testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration") 74 testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration") 75 testImplementation project(":sdks:java:io:jdbc") 76 testImplementation "org.elasticsearch.plugin:transport-netty4-client:$elastic_search_version" 77 testImplementation library.java.testcontainers_elasticsearch 78 testImplementation "org.elasticsearch.client:elasticsearch-rest-high-level-client:$elastic_search_version" 79 testImplementation "org.elasticsearch:elasticsearch:$elastic_search_version" 80 testImplementation ("org.elasticsearch:elasticsearch-hadoop:$elastic_search_version") { 81 // TODO(https://issues.apache.org/jira/browse/BEAM-3715) 82 // These are all optional deps of elasticsearch-hadoop. Why do they have to be excluded? 83 exclude group: "cascading", module: "cascading-local" 84 exclude group: "cascading", module: "cascading-hadoop" 85 exclude group: "org.apache.hive", module: "hive-service" 86 exclude group: "org.apache.pig", module: "pig" 87 exclude group: "org.apache.spark", module: "spark-core_2.10" 88 exclude group: "org.apache.spark", module: "spark-streaming_2.10" 89 exclude group: "org.apache.spark", module: "spark-sql_2.10" 90 exclude group: "org.apache.storm", module: "storm-core" 91 } 92 testImplementation "org.apache.httpcomponents:httpclient:4.5.13" 93 testImplementation library.java.commons_lang3 94 testImplementation library.java.commons_io 95 testImplementation library.java.cassandra_driver_core 96 testImplementation library.java.cassandra_driver_mapping 97 testImplementation "org.apache.cassandra:cassandra-all:3.11.8" 98 testImplementation library.java.hadoop_common 99 testImplementation library.java.hadoop_hdfs 100 testImplementation library.java.hadoop_mapreduce_client_core 101 testImplementation library.java.postgres 102 testImplementation library.java.junit 103 testImplementation library.java.hamcrest 104 testImplementation library.java.testcontainers_postgresql 105 testImplementation library.java.netty_all 106 testRuntimeOnly library.java.slf4j_jdk14 107 testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") 108 109 hadoopVersions.each {kv -> 110 "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-common:$kv.value" 111 "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value" 112 "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-hdfs:$kv.value" 113 "hadoopVersion$kv.key" "org.apache.httpcomponents:httpclient:4.5.13" 114 } 115 } 116 117 hadoopVersions.each {kv -> 118 configurations."hadoopVersion$kv.key" { 119 resolutionStrategy { 120 force "org.apache.hadoop:hadoop-common:$kv.value" 121 force "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value" 122 force "org.apache.hadoop:hadoop-hdfs:$kv.value" 123 // without forcing httpclient httpcore gets below 4.4.9 which has incompatible API 124 force "org.apache.httpcomponents:httpclient:4.5.13" 125 } 126 } 127 } 128 129 // Hadoop dependencies require old version of Guava (BEAM-11626) 130 configurations.all (Configuration it) -> { 131 // error-prone requires newer guava, don't override for annotation processing 132 // https://github.com/google/error-prone/issues/2745 133 if (it.name == "annotationProcessor" || it.name =="testAnnotationProcessor") { 134 return 135 } 136 resolutionStrategy { 137 force 'com.google.guava:guava:25.1-jre' 138 } 139 } 140 141 // The cassandra.yaml file currently assumes "target/..." exists. 142 // TODO: Update cassandra.yaml to inject new properties representing 143 // the root path. Also migrate cassandra.yaml to use any open ports 144 // instead of a static port. 145 task createTargetDirectoryForCassandra() { 146 doLast { 147 if (!project.file("target").exists()) { 148 project.file("target").mkdirs() 149 } 150 } 151 } 152 test.dependsOn createTargetDirectoryForCassandra 153 154 task hadoopVersionsTest(group: "Verification") { 155 description = "Runs Hadoop format tests with different Hadoop versions" 156 dependsOn createTaskNames(hadoopVersions, "Test") 157 dependsOn createTaskNames(hadoopVersions, "IT") 158 dependsOn createTaskNames(hadoopVersions, "ElasticIT") 159 } 160 161 hadoopVersions.each {kv -> 162 task "runHadoopFormatIO${kv.key}ElasticIT"(type: Test, group: "Verification") { 163 description = "Runs HadoopFormatIOElasticIT with Hadoop '${kv.value}'" 164 outputs.upToDateWhen { false } 165 testClassesDirs = sourceSets.test.output.classesDirs 166 classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath 167 def pipelineOptions = JsonOutput.toJson(["--withTestcontainers=true"]) 168 systemProperty "beamTestPipelineOptions", pipelineOptions 169 include '**/HadoopFormatIOElasticIT.class' 170 } 171 } 172 173 hadoopVersions.each {kv -> 174 task "runHadoopFormatIO${kv.key}IT"(type: Test, group: "Verification") { 175 description = "Runs HadoopFormatIOIT with Hadoop '${kv.value}'" 176 outputs.upToDateWhen { false } 177 testClassesDirs = sourceSets.test.output.classesDirs 178 classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath 179 180 def pipelineOptions = JsonOutput.toJson([ 181 "--postgresServerName=dummy_value", 182 "--postgresUsername=postgres", 183 "--postgresDatabaseName=postgres", 184 "--postgresPassword=postgres", 185 "--numberOfRecords=1000", 186 "--withTestcontainers=true", 187 "--postgresSsl=false", 188 ]) 189 systemProperty "beamTestPipelineOptions", pipelineOptions 190 include '**/HadoopFormatIOIT.class' 191 } 192 } 193 194 hadoopVersions.each{kv -> 195 task "runHadoopFormatIO${kv.key}Test"(type: Test, group: "Verification") { 196 description = "Runs HadoopFormatIO tests with Hadoop '${kv.value}'" 197 classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath 198 outputs.upToDateWhen { false } 199 include '**/*Test.class' 200 // Cassandra test cannot run in parallel 201 exclude '**/HadoopFormatIOCassandraTest.class' 202 } 203 } 204 205 static def createTaskNames(Map<String, String> hadoopVersions, String suffix) { 206 return hadoopVersions.keySet().stream() 207 .map{num -> "runHadoopFormatIO$num$suffix"} 208 .collect(Collectors.toList()) 209 }