[SPARK-21648][SQL] Fix confusing assert failure in JDBC source when parallel fetching parameters are not properly provided.
### What changes were proposed in this pull request? ```SQL CREATE TABLE mytesttable1 USING org.apache.spark.sql.jdbc OPTIONS ( url 'jdbc:mysql://${jdbcHostname}:${jdbcPort}/${jdbcDatabase}?user=${jdbcUsername}&password=${jdbcPassword}', dbtable 'mytesttable1', paritionColumn 'state_id', lowerBound '0', upperBound '52', numPartitions '53', fetchSize '10000' ) ``` The above option name `paritionColumn` is wrong. That mean, users did not provide the value for `partitionColumn`. In such case, users hit a confusing error. ``` AssertionError: assertion failed java.lang.AssertionError: assertion failed at scala.Predef$.assert(Predef.scala:156) at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:39) at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:312) ``` ### How was this patch tested? Added a test case Author: gatorsmile <gatorsmile@gmail.com> Closes #18864 from gatorsmile/jdbcPartCol.
This commit is contained in:
parent
cce25b360e
commit
baf5cac0f8
|
@ -97,10 +97,13 @@ class JDBCOptions(
|
|||
val lowerBound = parameters.get(JDBC_LOWER_BOUND).map(_.toLong)
|
||||
// the upper bound of the partition column
|
||||
val upperBound = parameters.get(JDBC_UPPER_BOUND).map(_.toLong)
|
||||
require(partitionColumn.isEmpty ||
|
||||
(lowerBound.isDefined && upperBound.isDefined && numPartitions.isDefined),
|
||||
s"If '$JDBC_PARTITION_COLUMN' is specified then '$JDBC_LOWER_BOUND', '$JDBC_UPPER_BOUND'," +
|
||||
s" and '$JDBC_NUM_PARTITIONS' are required.")
|
||||
// numPartitions is also used for data source writing
|
||||
require((partitionColumn.isEmpty && lowerBound.isEmpty && upperBound.isEmpty) ||
|
||||
(partitionColumn.isDefined && lowerBound.isDefined && upperBound.isDefined &&
|
||||
numPartitions.isDefined),
|
||||
s"When reading JDBC data sources, users need to specify all or none for the following " +
|
||||
s"options: '$JDBC_PARTITION_COLUMN', '$JDBC_LOWER_BOUND', '$JDBC_UPPER_BOUND', " +
|
||||
s"and '$JDBC_NUM_PARTITIONS'")
|
||||
val fetchSize = {
|
||||
val size = parameters.getOrElse(JDBC_BATCH_FETCH_SIZE, "0").toInt
|
||||
require(size >= 0,
|
||||
|
|
|
@ -29,6 +29,8 @@ class JdbcRelationProvider extends CreatableRelationProvider
|
|||
override def createRelation(
|
||||
sqlContext: SQLContext,
|
||||
parameters: Map[String, String]): BaseRelation = {
|
||||
import JDBCOptions._
|
||||
|
||||
val jdbcOptions = new JDBCOptions(parameters)
|
||||
val partitionColumn = jdbcOptions.partitionColumn
|
||||
val lowerBound = jdbcOptions.lowerBound
|
||||
|
@ -36,10 +38,13 @@ class JdbcRelationProvider extends CreatableRelationProvider
|
|||
val numPartitions = jdbcOptions.numPartitions
|
||||
|
||||
val partitionInfo = if (partitionColumn.isEmpty) {
|
||||
assert(lowerBound.isEmpty && upperBound.isEmpty)
|
||||
assert(lowerBound.isEmpty && upperBound.isEmpty, "When 'partitionColumn' is not specified, " +
|
||||
s"'$JDBC_LOWER_BOUND' and '$JDBC_UPPER_BOUND' are expected to be empty")
|
||||
null
|
||||
} else {
|
||||
assert(lowerBound.nonEmpty && upperBound.nonEmpty && numPartitions.nonEmpty)
|
||||
assert(lowerBound.nonEmpty && upperBound.nonEmpty && numPartitions.nonEmpty,
|
||||
s"When 'partitionColumn' is specified, '$JDBC_LOWER_BOUND', '$JDBC_UPPER_BOUND', and " +
|
||||
s"'$JDBC_NUM_PARTITIONS' are also required")
|
||||
JDBCPartitioningInfo(
|
||||
partitionColumn.get, lowerBound.get, upperBound.get, numPartitions.get)
|
||||
}
|
||||
|
|
|
@ -421,6 +421,28 @@ class JDBCSuite extends SparkFunSuite
|
|||
assert(e.contains("Invalid value `-1` for parameter `fetchsize`"))
|
||||
}
|
||||
|
||||
test("Missing partition columns") {
|
||||
withView("tempPeople") {
|
||||
val e = intercept[IllegalArgumentException] {
|
||||
sql(
|
||||
s"""
|
||||
|CREATE OR REPLACE TEMPORARY VIEW tempPeople
|
||||
|USING org.apache.spark.sql.jdbc
|
||||
|OPTIONS (
|
||||
| url 'jdbc:h2:mem:testdb0;user=testUser;password=testPass',
|
||||
| dbtable 'TEST.PEOPLE',
|
||||
| lowerBound '0',
|
||||
| upperBound '52',
|
||||
| numPartitions '53',
|
||||
| fetchSize '10000' )
|
||||
""".stripMargin.replaceAll("\n", " "))
|
||||
}.getMessage
|
||||
assert(e.contains("When reading JDBC data sources, users need to specify all or none " +
|
||||
"for the following options: 'partitionColumn', 'lowerBound', 'upperBound', and " +
|
||||
"'numPartitions'"))
|
||||
}
|
||||
}
|
||||
|
||||
test("Basic API with FetchSize") {
|
||||
(0 to 4).foreach { size =>
|
||||
val properties = new Properties()
|
||||
|
|
|
@ -324,8 +324,9 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
|
|||
.option("partitionColumn", "foo")
|
||||
.save()
|
||||
}.getMessage
|
||||
assert(e.contains("If 'partitionColumn' is specified then 'lowerBound', 'upperBound'," +
|
||||
" and 'numPartitions' are required."))
|
||||
assert(e.contains("When reading JDBC data sources, users need to specify all or none " +
|
||||
"for the following options: 'partitionColumn', 'lowerBound', 'upperBound', and " +
|
||||
"'numPartitions'"))
|
||||
}
|
||||
|
||||
test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
|
||||
|
|
Loading…
Reference in a new issue