spark-instrumented-optimizer/dev/run-tests

#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Go to the Spark project root directory
FWDIR="$(cd "`dirname $0`"/..; pwd)"
cd "$FWDIR"

# Clean up work directory and caches
rm -rf ./work
rm -rf ~/.ivy2/local/org.apache.spark
rm -rf ~/.ivy2/cache/org.apache.spark

source "$FWDIR/dev/run-tests-codes.sh"

CURRENT_BLOCK=$BLOCK_GENERAL

function handle_error () {
  echo "[error] Got a return code of $? on line $1 of the run-tests script."
  exit $CURRENT_BLOCK
}


# Build against the right version of Hadoop.
{
  if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
    if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then
      export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4"
    elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
      export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
    elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
      export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0"
    elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
      export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
    fi
  fi

  if [ -z "$SBT_MAVEN_PROFILES_ARGS" ]; then
    export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
  fi
}

export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl"

# Determine Java path and version.
{
  if test -x "$JAVA_HOME/bin/java"; then
      declare java_cmd="$JAVA_HOME/bin/java"
  else
      declare java_cmd=java
  fi

  # We can't use sed -r -e due to OS X / BSD compatibility; hence, all the parentheses.
  JAVA_VERSION=$(
    $java_cmd -version 2>&1 \
    | grep -e "^java version" --max-count=1 \
    | sed "s/java version \"\(.*\)\.\(.*\)\.\(.*\)\"/\1\2/"
  )

  if [ "$JAVA_VERSION" -lt 18 ]; then
    echo "[warn] Java 8 tests will not run because JDK version is < 1.8."
  fi
}

# Only run Hive tests if there are SQL changes.
# Partial solution for SPARK-1455.
if [ -n "$AMPLAB_JENKINS" ]; then
  git fetch origin master:master

  sql_diffs=$(
    git diff --name-only master \
    | grep -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
  )

  non_sql_diffs=$(
    git diff --name-only master \
    | grep -v -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
  )

  if [ -n "$sql_diffs" ]; then
    echo "[info] Detected changes in SQL. Will run Hive test suite."
    _RUN_SQL_TESTS=true

    if [ -z "$non_sql_diffs" ]; then
      echo "[info] Detected no changes except in SQL. Will only run SQL tests."
      _SQL_TESTS_ONLY=true
    fi
  fi
fi

set -o pipefail
trap 'handle_error $LINENO' ERR

echo ""
echo "========================================================================="
echo "Running Apache RAT checks"
echo "========================================================================="

CURRENT_BLOCK=$BLOCK_RAT

./dev/check-license

echo ""
echo "========================================================================="
echo "Running Scala style checks"
echo "========================================================================="

CURRENT_BLOCK=$BLOCK_SCALA_STYLE

./dev/lint-scala

echo ""
echo "========================================================================="
echo "Running Python style checks"
echo "========================================================================="

CURRENT_BLOCK=$BLOCK_PYTHON_STYLE

./dev/lint-python

echo ""
echo "========================================================================="
echo "Building Spark"
echo "========================================================================="

CURRENT_BLOCK=$BLOCK_BUILD

{
  HIVE_BUILD_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver"
  HIVE_12_BUILD_ARGS="$HIVE_BUILD_ARGS -Phive-0.12.0"

  # First build with Hive 0.12.0 to ensure patches do not break the Hive 0.12.0 build
  echo "[info] Compile with Hive 0.12.0"
  [ -d "lib_managed" ] && rm -rf lib_managed
  echo "[info] Building Spark with these arguments: $HIVE_12_BUILD_ARGS"

  if [ "${AMPLAB_JENKINS_BUILD_TOOL}" == "maven" ]; then
    build/mvn $HIVE_12_BUILD_ARGS clean package -DskipTests
  else
    # NOTE: echo "q" is needed because sbt on encountering a build file with failure
    # (either resolution or compilation) prompts the user for input either q, r, etc
    # to quit or retry. This echo is there to make it not block.
    # NOTE: Do not quote $BUILD_MVN_PROFILE_ARGS or else it will be interpreted as a
    # single argument!
    # QUESTION: Why doesn't 'yes "q"' work?
    # QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
    echo -e "q\n" \
      | build/sbt $HIVE_12_BUILD_ARGS clean hive/compile hive-thriftserver/compile \
      | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
  fi

  # Then build with default Hive version (0.13.1) because tests are based on this version
  echo "[info] Compile with Hive 0.13.1"
  [ -d "lib_managed" ] && rm -rf lib_managed
  echo "[info] Building Spark with these arguments: $HIVE_BUILD_ARGS"

  if [ "${AMPLAB_JENKINS_BUILD_TOOL}" == "maven" ]; then
    build/mvn $HIVE_BUILD_ARGS clean package -DskipTests
  else
    echo -e "q\n" \
      | build/sbt $HIVE_BUILD_ARGS package assembly/assembly streaming-kafka-assembly/assembly \
      | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
  fi
}

echo ""
echo "========================================================================="
echo "Detecting binary incompatibilities with MiMa"
echo "========================================================================="

CURRENT_BLOCK=$BLOCK_MIMA

./dev/mima

echo ""
echo "========================================================================="
echo "Running Spark unit tests"
echo "========================================================================="

CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS

{
  # If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled.
  # This must be a single argument, as it is.
  if [ -n "$_RUN_SQL_TESTS" ]; then
    SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver"
  fi

  if [ -n "$_SQL_TESTS_ONLY" ]; then
    # This must be an array of individual arguments. Otherwise, having one long string
    # will be interpreted as a single test, which doesn't work.
    SBT_MAVEN_TEST_ARGS=("catalyst/test" "sql/test" "hive/test" "hive-thriftserver/test" "mllib/test")
  else
    SBT_MAVEN_TEST_ARGS=("test")
  fi

  echo "[info] Running Spark tests with these arguments: $SBT_MAVEN_PROFILES_ARGS ${SBT_MAVEN_TEST_ARGS[@]}"

  if [ "${AMPLAB_JENKINS_BUILD_TOOL}" == "maven" ]; then
    build/mvn test $SBT_MAVEN_PROFILES_ARGS --fail-at-end
  else
    # NOTE: echo "q" is needed because sbt on encountering a build file with failure
    # (either resolution or compilation) prompts the user for input either q, r, etc
    # to quit or retry. This echo is there to make it not block.
    # NOTE: Do not quote $SBT_MAVEN_PROFILES_ARGS or else it will be interpreted as a
    # single argument!
    # "${SBT_MAVEN_TEST_ARGS[@]}" is cool because it's an array.
    # QUESTION: Why doesn't 'yes "q"' work?
    # QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
    echo -e "q\n" \
      | build/sbt $SBT_MAVEN_PROFILES_ARGS "${SBT_MAVEN_TEST_ARGS[@]}" \
      | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
  fi
}

echo ""
echo "========================================================================="
echo "Running PySpark tests"
echo "========================================================================="

CURRENT_BLOCK=$BLOCK_PYSPARK_UNIT_TESTS

# add path for python 3 in jenkins
export PATH="${PATH}:/home/anaonda/envs/py3k/bin"
./python/run-tests

echo ""
echo "========================================================================="
echo "Running SparkR tests"
echo "========================================================================="

CURRENT_BLOCK=$BLOCK_SPARKR_UNIT_TESTS

if [ $(command -v R) ]; then
  ./R/install-dev.sh
  ./R/run-tests.sh
else
  echo "Ignoring SparkR tests as R was not found in PATH"
fi