Revert "[SPARK-11753][SQL][TEST-HADOOP2.2] Make allowNonNumericNumbers option work
## What changes were proposed in this pull request?
This reverts commit c24b6b679c
. Sent a PR to run Jenkins tests due to the revert conflicts of `dev/deps/spark-deps-hadoop*`.
## How was this patch tested?
Jenkins unit tests, integration tests, manual tests)
Author: Shixiong Zhu <shixiong@databricks.com>
Closes #13417 from zsxwing/revert-SPARK-11753.
This commit is contained in:
parent
c6de5832bf
commit
9a74de18a1
|
@ -72,13 +72,13 @@ hk2-utils-2.4.0-b34.jar
|
||||||
httpclient-4.5.2.jar
|
httpclient-4.5.2.jar
|
||||||
httpcore-4.4.4.jar
|
httpcore-4.4.4.jar
|
||||||
ivy-2.4.0.jar
|
ivy-2.4.0.jar
|
||||||
jackson-annotations-2.7.3.jar
|
jackson-annotations-2.6.5.jar
|
||||||
jackson-core-2.7.3.jar
|
jackson-core-2.6.5.jar
|
||||||
jackson-core-asl-1.9.13.jar
|
jackson-core-asl-1.9.13.jar
|
||||||
jackson-databind-2.7.3.jar
|
jackson-databind-2.6.5.jar
|
||||||
jackson-mapper-asl-1.9.13.jar
|
jackson-mapper-asl-1.9.13.jar
|
||||||
jackson-module-paranamer-2.7.3.jar
|
jackson-module-paranamer-2.6.5.jar
|
||||||
jackson-module-scala_2.11-2.7.3.jar
|
jackson-module-scala_2.11-2.6.5.jar
|
||||||
janino-2.7.8.jar
|
janino-2.7.8.jar
|
||||||
javassist-3.18.1-GA.jar
|
javassist-3.18.1-GA.jar
|
||||||
javax.annotation-api-1.2.jar
|
javax.annotation-api-1.2.jar
|
||||||
|
@ -128,7 +128,7 @@ objenesis-2.1.jar
|
||||||
opencsv-2.3.jar
|
opencsv-2.3.jar
|
||||||
oro-2.0.8.jar
|
oro-2.0.8.jar
|
||||||
osgi-resource-locator-1.0.1.jar
|
osgi-resource-locator-1.0.1.jar
|
||||||
paranamer-2.8.jar
|
paranamer-2.3.jar
|
||||||
parquet-column-1.8.1.jar
|
parquet-column-1.8.1.jar
|
||||||
parquet-common-1.8.1.jar
|
parquet-common-1.8.1.jar
|
||||||
parquet-encoding-1.8.1.jar
|
parquet-encoding-1.8.1.jar
|
||||||
|
|
|
@ -74,13 +74,13 @@ hk2-utils-2.4.0-b34.jar
|
||||||
httpclient-4.5.2.jar
|
httpclient-4.5.2.jar
|
||||||
httpcore-4.4.4.jar
|
httpcore-4.4.4.jar
|
||||||
ivy-2.4.0.jar
|
ivy-2.4.0.jar
|
||||||
jackson-annotations-2.7.3.jar
|
jackson-annotations-2.6.5.jar
|
||||||
jackson-core-2.7.3.jar
|
jackson-core-2.6.5.jar
|
||||||
jackson-core-asl-1.9.13.jar
|
jackson-core-asl-1.9.13.jar
|
||||||
jackson-databind-2.7.3.jar
|
jackson-databind-2.6.5.jar
|
||||||
jackson-mapper-asl-1.9.13.jar
|
jackson-mapper-asl-1.9.13.jar
|
||||||
jackson-module-paranamer-2.7.3.jar
|
jackson-module-paranamer-2.6.5.jar
|
||||||
jackson-module-scala_2.11-2.7.3.jar
|
jackson-module-scala_2.11-2.6.5.jar
|
||||||
janino-2.7.8.jar
|
janino-2.7.8.jar
|
||||||
java-xmlbuilder-1.0.jar
|
java-xmlbuilder-1.0.jar
|
||||||
javassist-3.18.1-GA.jar
|
javassist-3.18.1-GA.jar
|
||||||
|
@ -135,7 +135,7 @@ objenesis-2.1.jar
|
||||||
opencsv-2.3.jar
|
opencsv-2.3.jar
|
||||||
oro-2.0.8.jar
|
oro-2.0.8.jar
|
||||||
osgi-resource-locator-1.0.1.jar
|
osgi-resource-locator-1.0.1.jar
|
||||||
paranamer-2.8.jar
|
paranamer-2.3.jar
|
||||||
parquet-column-1.8.1.jar
|
parquet-column-1.8.1.jar
|
||||||
parquet-common-1.8.1.jar
|
parquet-common-1.8.1.jar
|
||||||
parquet-encoding-1.8.1.jar
|
parquet-encoding-1.8.1.jar
|
||||||
|
|
|
@ -74,13 +74,13 @@ hk2-utils-2.4.0-b34.jar
|
||||||
httpclient-4.5.2.jar
|
httpclient-4.5.2.jar
|
||||||
httpcore-4.4.4.jar
|
httpcore-4.4.4.jar
|
||||||
ivy-2.4.0.jar
|
ivy-2.4.0.jar
|
||||||
jackson-annotations-2.7.3.jar
|
jackson-annotations-2.6.5.jar
|
||||||
jackson-core-2.7.3.jar
|
jackson-core-2.6.5.jar
|
||||||
jackson-core-asl-1.9.13.jar
|
jackson-core-asl-1.9.13.jar
|
||||||
jackson-databind-2.7.3.jar
|
jackson-databind-2.6.5.jar
|
||||||
jackson-mapper-asl-1.9.13.jar
|
jackson-mapper-asl-1.9.13.jar
|
||||||
jackson-module-paranamer-2.7.3.jar
|
jackson-module-paranamer-2.6.5.jar
|
||||||
jackson-module-scala_2.11-2.7.3.jar
|
jackson-module-scala_2.11-2.6.5.jar
|
||||||
janino-2.7.8.jar
|
janino-2.7.8.jar
|
||||||
java-xmlbuilder-1.0.jar
|
java-xmlbuilder-1.0.jar
|
||||||
javassist-3.18.1-GA.jar
|
javassist-3.18.1-GA.jar
|
||||||
|
@ -135,7 +135,7 @@ objenesis-2.1.jar
|
||||||
opencsv-2.3.jar
|
opencsv-2.3.jar
|
||||||
oro-2.0.8.jar
|
oro-2.0.8.jar
|
||||||
osgi-resource-locator-1.0.1.jar
|
osgi-resource-locator-1.0.1.jar
|
||||||
paranamer-2.8.jar
|
paranamer-2.3.jar
|
||||||
parquet-column-1.8.1.jar
|
parquet-column-1.8.1.jar
|
||||||
parquet-common-1.8.1.jar
|
parquet-common-1.8.1.jar
|
||||||
parquet-encoding-1.8.1.jar
|
parquet-encoding-1.8.1.jar
|
||||||
|
|
|
@ -80,14 +80,14 @@ htrace-core-3.0.4.jar
|
||||||
httpclient-4.5.2.jar
|
httpclient-4.5.2.jar
|
||||||
httpcore-4.4.4.jar
|
httpcore-4.4.4.jar
|
||||||
ivy-2.4.0.jar
|
ivy-2.4.0.jar
|
||||||
jackson-annotations-2.7.3.jar
|
jackson-annotations-2.6.5.jar
|
||||||
jackson-core-2.7.3.jar
|
jackson-core-2.6.5.jar
|
||||||
jackson-core-asl-1.9.13.jar
|
jackson-core-asl-1.9.13.jar
|
||||||
jackson-databind-2.7.3.jar
|
jackson-databind-2.6.5.jar
|
||||||
jackson-jaxrs-1.9.13.jar
|
jackson-jaxrs-1.9.13.jar
|
||||||
jackson-mapper-asl-1.9.13.jar
|
jackson-mapper-asl-1.9.13.jar
|
||||||
jackson-module-paranamer-2.7.3.jar
|
jackson-module-paranamer-2.6.5.jar
|
||||||
jackson-module-scala_2.11-2.7.3.jar
|
jackson-module-scala_2.11-2.6.5.jar
|
||||||
jackson-xc-1.9.13.jar
|
jackson-xc-1.9.13.jar
|
||||||
janino-2.7.8.jar
|
janino-2.7.8.jar
|
||||||
java-xmlbuilder-1.0.jar
|
java-xmlbuilder-1.0.jar
|
||||||
|
@ -143,7 +143,7 @@ objenesis-2.1.jar
|
||||||
opencsv-2.3.jar
|
opencsv-2.3.jar
|
||||||
oro-2.0.8.jar
|
oro-2.0.8.jar
|
||||||
osgi-resource-locator-1.0.1.jar
|
osgi-resource-locator-1.0.1.jar
|
||||||
paranamer-2.8.jar
|
paranamer-2.3.jar
|
||||||
parquet-column-1.8.1.jar
|
parquet-column-1.8.1.jar
|
||||||
parquet-common-1.8.1.jar
|
parquet-common-1.8.1.jar
|
||||||
parquet-encoding-1.8.1.jar
|
parquet-encoding-1.8.1.jar
|
||||||
|
|
|
@ -80,14 +80,14 @@ htrace-core-3.1.0-incubating.jar
|
||||||
httpclient-4.5.2.jar
|
httpclient-4.5.2.jar
|
||||||
httpcore-4.4.4.jar
|
httpcore-4.4.4.jar
|
||||||
ivy-2.4.0.jar
|
ivy-2.4.0.jar
|
||||||
jackson-annotations-2.7.3.jar
|
jackson-annotations-2.6.5.jar
|
||||||
jackson-core-2.7.3.jar
|
jackson-core-2.6.5.jar
|
||||||
jackson-core-asl-1.9.13.jar
|
jackson-core-asl-1.9.13.jar
|
||||||
jackson-databind-2.7.3.jar
|
jackson-databind-2.6.5.jar
|
||||||
jackson-jaxrs-1.9.13.jar
|
jackson-jaxrs-1.9.13.jar
|
||||||
jackson-mapper-asl-1.9.13.jar
|
jackson-mapper-asl-1.9.13.jar
|
||||||
jackson-module-paranamer-2.7.3.jar
|
jackson-module-paranamer-2.6.5.jar
|
||||||
jackson-module-scala_2.11-2.7.3.jar
|
jackson-module-scala_2.11-2.6.5.jar
|
||||||
jackson-xc-1.9.13.jar
|
jackson-xc-1.9.13.jar
|
||||||
janino-2.7.8.jar
|
janino-2.7.8.jar
|
||||||
java-xmlbuilder-1.0.jar
|
java-xmlbuilder-1.0.jar
|
||||||
|
@ -144,7 +144,7 @@ objenesis-2.1.jar
|
||||||
opencsv-2.3.jar
|
opencsv-2.3.jar
|
||||||
oro-2.0.8.jar
|
oro-2.0.8.jar
|
||||||
osgi-resource-locator-1.0.1.jar
|
osgi-resource-locator-1.0.1.jar
|
||||||
paranamer-2.8.jar
|
paranamer-2.3.jar
|
||||||
parquet-column-1.8.1.jar
|
parquet-column-1.8.1.jar
|
||||||
parquet-common-1.8.1.jar
|
parquet-common-1.8.1.jar
|
||||||
parquet-encoding-1.8.1.jar
|
parquet-encoding-1.8.1.jar
|
||||||
|
|
8
pom.xml
8
pom.xml
|
@ -160,7 +160,7 @@
|
||||||
<jline.version>${scala.version}</jline.version>
|
<jline.version>${scala.version}</jline.version>
|
||||||
<jline.groupid>org.scala-lang</jline.groupid>
|
<jline.groupid>org.scala-lang</jline.groupid>
|
||||||
<codehaus.jackson.version>1.9.13</codehaus.jackson.version>
|
<codehaus.jackson.version>1.9.13</codehaus.jackson.version>
|
||||||
<fasterxml.jackson.version>2.7.3</fasterxml.jackson.version>
|
<fasterxml.jackson.version>2.6.5</fasterxml.jackson.version>
|
||||||
<snappy.version>1.1.2.4</snappy.version>
|
<snappy.version>1.1.2.4</snappy.version>
|
||||||
<netlib.java.version>1.1.2</netlib.java.version>
|
<netlib.java.version>1.1.2</netlib.java.version>
|
||||||
<calcite.version>1.2.0-incubating</calcite.version>
|
<calcite.version>1.2.0-incubating</calcite.version>
|
||||||
|
@ -180,7 +180,6 @@
|
||||||
<antlr4.version>4.5.3</antlr4.version>
|
<antlr4.version>4.5.3</antlr4.version>
|
||||||
<jpam.version>1.1</jpam.version>
|
<jpam.version>1.1</jpam.version>
|
||||||
<selenium.version>2.52.0</selenium.version>
|
<selenium.version>2.52.0</selenium.version>
|
||||||
<paranamer.version>2.8</paranamer.version>
|
|
||||||
|
|
||||||
<test.java.home>${java.home}</test.java.home>
|
<test.java.home>${java.home}</test.java.home>
|
||||||
<test.exclude.tags></test.exclude.tags>
|
<test.exclude.tags></test.exclude.tags>
|
||||||
|
@ -1826,11 +1825,6 @@
|
||||||
<artifactId>antlr4-runtime</artifactId>
|
<artifactId>antlr4-runtime</artifactId>
|
||||||
<version>${antlr4.version}</version>
|
<version>${antlr4.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.thoughtworks.paranamer</groupId>
|
|
||||||
<artifactId>paranamer</artifactId>
|
|
||||||
<version>${paranamer.version}</version>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
|
||||||
|
|
|
@ -193,9 +193,6 @@ class DataFrameReader(object):
|
||||||
set, it uses the default value, ``true``.
|
set, it uses the default value, ``true``.
|
||||||
:param allowNumericLeadingZero: allows leading zeros in numbers (e.g. 00012). If None is
|
:param allowNumericLeadingZero: allows leading zeros in numbers (e.g. 00012). If None is
|
||||||
set, it uses the default value, ``false``.
|
set, it uses the default value, ``false``.
|
||||||
:param allowNonNumericNumbers: allows using non-numeric numbers such as "NaN", "Infinity",
|
|
||||||
"-Infinity", "INF", "-INF", which are convertd to floating
|
|
||||||
point numbers, ``true``.
|
|
||||||
:param allowBackslashEscapingAnyCharacter: allows accepting quoting of all character
|
:param allowBackslashEscapingAnyCharacter: allows accepting quoting of all character
|
||||||
using backslash quoting mechanism. If None is
|
using backslash quoting mechanism. If None is
|
||||||
set, it uses the default value, ``false``.
|
set, it uses the default value, ``false``.
|
||||||
|
|
|
@ -293,8 +293,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
|
||||||
* </li>
|
* </li>
|
||||||
* <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers
|
* <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers
|
||||||
* (e.g. 00012)</li>
|
* (e.g. 00012)</li>
|
||||||
* <li>`allowNonNumericNumbers` (default `true`): allows using non-numeric numbers such as "NaN",
|
|
||||||
* "Infinity", "-Infinity", "INF", "-INF", which are convertd to floating point numbers.</li>
|
|
||||||
* <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all
|
* <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all
|
||||||
* character using backslash quoting mechanism</li>
|
* character using backslash quoting mechanism</li>
|
||||||
* <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
|
* <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
|
||||||
|
|
|
@ -129,15 +129,13 @@ object JacksonParser extends Logging {
|
||||||
case (VALUE_STRING, FloatType) =>
|
case (VALUE_STRING, FloatType) =>
|
||||||
// Special case handling for NaN and Infinity.
|
// Special case handling for NaN and Infinity.
|
||||||
val value = parser.getText
|
val value = parser.getText
|
||||||
if (value.equals("NaN") ||
|
val lowerCaseValue = value.toLowerCase()
|
||||||
value.equals("Infinity") ||
|
if (lowerCaseValue.equals("nan") ||
|
||||||
value.equals("+Infinity") ||
|
lowerCaseValue.equals("infinity") ||
|
||||||
value.equals("-Infinity")) {
|
lowerCaseValue.equals("-infinity") ||
|
||||||
|
lowerCaseValue.equals("inf") ||
|
||||||
|
lowerCaseValue.equals("-inf")) {
|
||||||
value.toFloat
|
value.toFloat
|
||||||
} else if (value.equals("+INF") || value.equals("INF")) {
|
|
||||||
Float.PositiveInfinity
|
|
||||||
} else if (value.equals("-INF")) {
|
|
||||||
Float.NegativeInfinity
|
|
||||||
} else {
|
} else {
|
||||||
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as FloatType.")
|
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as FloatType.")
|
||||||
}
|
}
|
||||||
|
@ -148,15 +146,13 @@ object JacksonParser extends Logging {
|
||||||
case (VALUE_STRING, DoubleType) =>
|
case (VALUE_STRING, DoubleType) =>
|
||||||
// Special case handling for NaN and Infinity.
|
// Special case handling for NaN and Infinity.
|
||||||
val value = parser.getText
|
val value = parser.getText
|
||||||
if (value.equals("NaN") ||
|
val lowerCaseValue = value.toLowerCase()
|
||||||
value.equals("Infinity") ||
|
if (lowerCaseValue.equals("nan") ||
|
||||||
value.equals("+Infinity") ||
|
lowerCaseValue.equals("infinity") ||
|
||||||
value.equals("-Infinity")) {
|
lowerCaseValue.equals("-infinity") ||
|
||||||
|
lowerCaseValue.equals("inf") ||
|
||||||
|
lowerCaseValue.equals("-inf")) {
|
||||||
value.toDouble
|
value.toDouble
|
||||||
} else if (value.equals("+INF") || value.equals("INF")) {
|
|
||||||
Double.PositiveInfinity
|
|
||||||
} else if (value.equals("-INF")) {
|
|
||||||
Double.NegativeInfinity
|
|
||||||
} else {
|
} else {
|
||||||
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as DoubleType.")
|
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as DoubleType.")
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources.json
|
||||||
|
|
||||||
import org.apache.spark.sql.QueryTest
|
import org.apache.spark.sql.QueryTest
|
||||||
import org.apache.spark.sql.test.SharedSQLContext
|
import org.apache.spark.sql.test.SharedSQLContext
|
||||||
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test cases for various [[JSONOptions]].
|
* Test cases for various [[JSONOptions]].
|
||||||
|
@ -94,51 +93,23 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSQLContext {
|
||||||
assert(df.first().getLong(0) == 18)
|
assert(df.first().getLong(0) == 18)
|
||||||
}
|
}
|
||||||
|
|
||||||
test("allowNonNumericNumbers off") {
|
// The following two tests are not really working - need to look into Jackson's
|
||||||
// non-quoted non-numeric numbers don't work if allowNonNumericNumbers is off.
|
// JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS.
|
||||||
var testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
|
ignore("allowNonNumericNumbers off") {
|
||||||
"""{"age": +Infinity}""", """{"age": -Infinity}""", """{"age": INF}""",
|
val str = """{"age": NaN}"""
|
||||||
"""{"age": +INF}""", """{"age": -INF}""")
|
|
||||||
testCases.foreach { str =>
|
|
||||||
val rdd = spark.sparkContext.parallelize(Seq(str))
|
val rdd = spark.sparkContext.parallelize(Seq(str))
|
||||||
val df = spark.read.option("allowNonNumericNumbers", "false").json(rdd)
|
val df = spark.read.json(rdd)
|
||||||
|
|
||||||
assert(df.schema.head.name == "_corrupt_record")
|
assert(df.schema.head.name == "_corrupt_record")
|
||||||
}
|
}
|
||||||
|
|
||||||
// quoted non-numeric numbers should still work even allowNonNumericNumbers is off.
|
ignore("allowNonNumericNumbers on") {
|
||||||
testCases = Seq("""{"age": "NaN"}""", """{"age": "Infinity"}""", """{"age": "+Infinity"}""",
|
val str = """{"age": NaN}"""
|
||||||
"""{"age": "-Infinity"}""", """{"age": "INF"}""", """{"age": "+INF"}""",
|
|
||||||
"""{"age": "-INF"}""")
|
|
||||||
val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isPosInfinity,
|
|
||||||
_.isNegInfinity, _.isPosInfinity, _.isPosInfinity, _.isNegInfinity)
|
|
||||||
val schema = StructType(StructField("age", DoubleType, true) :: Nil)
|
|
||||||
|
|
||||||
testCases.zipWithIndex.foreach { case (str, idx) =>
|
|
||||||
val rdd = spark.sparkContext.parallelize(Seq(str))
|
val rdd = spark.sparkContext.parallelize(Seq(str))
|
||||||
val df = spark.read.option("allowNonNumericNumbers", "false").schema(schema).json(rdd)
|
val df = spark.read.option("allowNonNumericNumbers", "true").json(rdd)
|
||||||
|
|
||||||
assert(df.schema.head.name == "age")
|
assert(df.schema.head.name == "age")
|
||||||
assert(tests(idx)(df.first().getDouble(0)))
|
assert(df.first().getDouble(0).isNaN)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
test("allowNonNumericNumbers on") {
|
|
||||||
val testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
|
|
||||||
"""{"age": +Infinity}""", """{"age": -Infinity}""", """{"age": +INF}""",
|
|
||||||
"""{"age": -INF}""", """{"age": "NaN"}""", """{"age": "Infinity"}""",
|
|
||||||
"""{"age": "-Infinity"}""")
|
|
||||||
val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isPosInfinity,
|
|
||||||
_.isNegInfinity, _.isPosInfinity, _.isNegInfinity, _.isNaN, _.isPosInfinity,
|
|
||||||
_.isNegInfinity, _.isPosInfinity, _.isNegInfinity)
|
|
||||||
val schema = StructType(StructField("age", DoubleType, true) :: Nil)
|
|
||||||
testCases.zipWithIndex.foreach { case (str, idx) =>
|
|
||||||
val rdd = spark.sparkContext.parallelize(Seq(str))
|
|
||||||
val df = spark.read.option("allowNonNumericNumbers", "true").schema(schema).json(rdd)
|
|
||||||
|
|
||||||
assert(df.schema.head.name == "age")
|
|
||||||
assert(tests(idx)(df.first().getDouble(0)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
test("allowBackslashEscapingAnyCharacter off") {
|
test("allowBackslashEscapingAnyCharacter off") {
|
||||||
|
|
Loading…
Reference in a new issue