[SPARK-35483][INFRA] Add docker-integration-tests to run-tests.py and GA

### What changes were proposed in this pull request?

This PR proposes to add `docker-integratin-tests` to `run-tests.py` and GA.
`doker-integration-tests` can't run if docker is not installed so it run only if `docker-integration-tests` is specified with `--module`.

### Why are the changes needed?

CI for `docker-integration-tests` is absent for now.

### Does this PR introduce _any_ user-facing change?

GA.

### How was this patch tested?

Closes #32631 from sarutak/docker-integration-test-ga.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
This commit is contained in:
Kousuke Saruta 2021-05-28 07:56:37 +09:00 committed by Hyukjin Kwon
parent 29ed1a2de4
commit 0a74ad66b3
22 changed files with 183 additions and 27 deletions

View file

@ -625,3 +625,83 @@ jobs:
with:
name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3
path: "**/target/unit-tests.log"
docker-integration-tests:
name: Run docker integration tests
runs-on: ubuntu-20.04
env:
HADOOP_PROFILE: hadoop3.2
HIVE_PROFILE: hive2.3
GITHUB_PREV_SHA: ${{ github.event.before }}
SPARK_LOCAL_IP: localhost
ORACLE_DOCKER_IMAGE_NAME: oracle/database:18.4.0-xe
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
with:
fetch-depth: 0
repository: apache/spark
ref: master
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
id: sync-branch
run: |
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
docker-integration-coursier-
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Cache Oracle docker-images repository
id: cache-oracle-docker-images
uses: actions/cache@v2
with:
path: ./oracle/docker-images
# key should contains the commit hash of the Oracle docker images to be checkout.
key: oracle-docker-images-3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1
- name: Checkout Oracle docker-images repository
uses: actions/checkout@v2
with:
fetch-depth: 0
repository: oracle/docker-images
ref: 3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1
path: ./oracle/docker-images
- name: Install Oracle Docker image
run: |
cd oracle/docker-images/OracleDatabase/SingleInstance/dockerfiles
./buildContainerImage.sh -v 18.4.0 -x
- name: Run tests
run: |
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
./dev/run-tests --parallelism 2 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-docker-integration--8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
name: unit-tests-log-docker-integration--8-hadoop3.2-hive2.3
path: "**/target/unit-tests.log"

View file

@ -122,19 +122,21 @@ def determine_modules_to_test(changed_modules, deduplicated=True):
['graphx', 'examples']
>>> [x.name for x in determine_modules_to_test([modules.sql])]
... # doctest: +NORMALIZE_WHITESPACE
['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver',
'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-ml']
['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples',
'hive-thriftserver', 'pyspark-sql', 'repl', 'sparkr',
'pyspark-mllib', 'pyspark-pandas', 'pyspark-ml']
>>> sorted([x.name for x in determine_modules_to_test(
... [modules.sparkr, modules.sql], deduplicated=False)])
... # doctest: +NORMALIZE_WHITESPACE
['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'pyspark-ml',
'pyspark-mllib', 'pyspark-pandas', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib',
'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-sql',
'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
>>> sorted([x.name for x in determine_modules_to_test(
... [modules.sql, modules.core], deduplicated=False)])
... # doctest: +NORMALIZE_WHITESPACE
['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive', 'hive-thriftserver',
'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas',
'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root',
['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive',
'hive-thriftserver', 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib',
'pyspark-pandas', 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root',
'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10',
'streaming-kinesis-asl']
"""

View file

@ -17,6 +17,7 @@
from functools import total_ordering
import itertools
import os
import re
all_modules = []
@ -745,6 +746,20 @@ spark_ganglia_lgpl = Module(
]
)
docker_integration_tests = Module(
name="docker-integration-tests",
dependencies=[],
build_profile_flags=["-Pdocker-integration-tests"],
source_file_regexes=["external/docker-integration-tests"],
sbt_test_goals=["docker-integration-tests/test"],
environ=None if "GITHUB_ACTIONS" not in os.environ else {
"ENABLE_DOCKER_INTEGRATION_TESTS": "1"
},
test_tags=[
"org.apache.spark.tags.DockerTest"
]
)
# The root module is a dummy module which is used to run all of the tests.
# No other modules should directly depend on this module.
root = Module(

View file

@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
* {{{
* DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite"
* }}}

View file

@ -33,7 +33,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
* {{{
* DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ./build/sbt -Pdocker-integration-tests "testOnly *DB2KrbIntegrationSuite"
* }}}
*/

View file

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.jdbc
import org.scalactic.source.Position
import org.scalatest.Tag
import org.apache.spark.SparkFunSuite
/**
* Helper class that runs docker integration tests.
* Ignores them based on env variable is set or not.
*/
trait DockerIntegrationFunSuite extends SparkFunSuite {
private val envVarNameForEnablingTests = "ENABLE_DOCKER_INTEGRATION_TESTS"
private val shouldRunTests = sys.env.getOrElse(envVarNameForEnablingTests, "0") match {
case "1" => true
case _ => false
}
/** Run the test if environment variable is set or ignore the test */
override def test(testName: String, testTags: Tag*)(testBody: => Any)
(implicit pos: Position): Unit = {
if (shouldRunTests) {
super.test(testName, testTags: _*)(testBody)
} else {
ignore(s"$testName [enable by setting env var $envVarNameForEnablingTests=1]")(testBody)
}
}
/** Run the give body of code only if Kinesis tests are enabled */
def runIfTestsEnabled(message: String)(body: => Unit): Unit = {
if (shouldRunTests) {
body
} else {
ignore(s"$message [enable by setting env var $envVarNameForEnablingTests=1]")(())
}
}
}

View file

@ -92,7 +92,8 @@ abstract class DatabaseOnDocker {
containerConfigBuilder: ContainerConfig.Builder): Unit = {}
}
abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventually {
abstract class DockerJDBCIntegrationSuite
extends SharedSparkSession with Eventually with DockerIntegrationFunSuite {
protected val dockerIp = DockerUtils.getDockerIp()
val db: DatabaseOnDocker
@ -114,7 +115,7 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu
private var pulled: Boolean = false
protected var jdbcUrl: String = _
override def beforeAll(): Unit = {
override def beforeAll(): Unit = runIfTestsEnabled(s"Prepare for ${this.getClass.getName}") {
super.beforeAll()
try {
docker = DefaultDockerClient.fromEnv.build()

View file

@ -41,7 +41,7 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite
protected var keytabFullPath: String = _
protected def setAuthentication(keytabFile: String, principal: String): Unit
override def beforeAll(): Unit = {
override def beforeAll(): Unit = runIfTestsEnabled(s"Prepare for ${this.getClass.getName}") {
SecurityUtils.setGlobalKrbDebug(true)
val kdcDir = Utils.createTempDir()

View file

@ -27,7 +27,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., mariadb:10.5.8):
* {{{
* MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.8
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.8
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"
* }}}

View file

@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04):
* {{{
* MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.MsSqlServerIntegrationSuite"
* }}}

View file

@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., mysql:5.7.31):
* {{{
* MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite"
* }}}

View file

@ -50,6 +50,7 @@ import org.apache.spark.tags.DockerTest
* $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
* $ ./buildContainerImage.sh -v 18.4.0 -x
* $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe
* $ export ENABLE_DOCKER_INTEGRATION_TESTS=1
* $ cd $SPARK_HOME
* $ ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite"
@ -61,7 +62,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark
import testImplicits._
override val db = new DatabaseOnDocker {
override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME")
lazy override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME")
override val env = Map(
"ORACLE_PWD" -> "oracle"
)

View file

@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., postgres:13.0):
* {{{
* POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
* }}}

View file

@ -27,7 +27,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., postgres:13.0):
* {{{
* POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ./build/sbt -Pdocker-integration-tests "testOnly *PostgresKrbIntegrationSuite"
* }}}
*/

View file

@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
* {{{
* DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ./build/sbt -Pdocker-integration-tests "testOnly *v2.DB2IntegrationSuite"
* }}}
*/

View file

@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04):
* {{{
* MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
* ./build/sbt -Pdocker-integration-tests "testOnly *v2*MsSqlServerIntegrationSuite"
* }}}
*/

View file

@ -32,8 +32,8 @@ import org.apache.spark.tags.DockerTest
*
* To run this test suite for a specific version (e.g., mysql:5.7.31):
* {{{
* MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
* ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite"
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
* ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite"
*
* }}}
*

View file

@ -45,6 +45,7 @@ import org.apache.spark.tags.DockerTest
* $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
* $ ./buildContainerImage.sh -v 18.4.0 -x
* $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe
* $ export ENABLE_DOCKER_INTEGRATION_TESTS=1
* $ cd $SPARK_HOME
* $ ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite"
@ -55,7 +56,7 @@ import org.apache.spark.tags.DockerTest
class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
override val catalogName: String = "oracle"
override val db = new DatabaseOnDocker {
override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME")
lazy override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME")
override val env = Map(
"ORACLE_PWD" -> "oracle"
)

View file

@ -29,7 +29,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., postgres:13.0):
* {{{
* POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresIntegrationSuite"
* }}}
*/

View file

@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., postgres:13.0):
* {{{
* POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresNamespaceSuite"
* }}}
*/

View file

@ -24,11 +24,12 @@ import org.apache.log4j.Level
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.connector.catalog.NamespaceChange
import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
import org.apache.spark.sql.jdbc.DockerIntegrationFunSuite
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.tags.DockerTest
@DockerTest
private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession {
private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerIntegrationFunSuite {
val catalog = new JDBCTableCatalog()
def builtinNamespaces: Array[Array[String]]

View file

@ -20,12 +20,13 @@ package org.apache.spark.sql.jdbc.v2
import org.apache.log4j.Level
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.jdbc.DockerIntegrationFunSuite
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types._
import org.apache.spark.tags.DockerTest
@DockerTest
private[v2] trait V2JDBCTest extends SharedSparkSession {
private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFunSuite {
val catalogName: String
// dialect specific update column type test
def testUpdateColumnType(tbl: String): Unit