Revert "[SPARK-26248][SQL] Infer date type from CSV"
This reverts commit 5217f7b226
.
This commit is contained in:
parent
e408e05322
commit
db1c5b1839
|
@ -22,20 +22,16 @@ import scala.util.control.Exception.allCatch
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
import org.apache.spark.sql.catalyst.analysis.TypeCoercion
|
import org.apache.spark.sql.catalyst.analysis.TypeCoercion
|
||||||
import org.apache.spark.sql.catalyst.expressions.ExprUtils
|
import org.apache.spark.sql.catalyst.expressions.ExprUtils
|
||||||
import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
|
import org.apache.spark.sql.catalyst.util.TimestampFormatter
|
||||||
import org.apache.spark.sql.types._
|
import org.apache.spark.sql.types._
|
||||||
|
|
||||||
class CSVInferSchema(val options: CSVOptions) extends Serializable {
|
class CSVInferSchema(val options: CSVOptions) extends Serializable {
|
||||||
|
|
||||||
@transient
|
@transient
|
||||||
private lazy val timestampFormatter = TimestampFormatter(
|
private lazy val timestampParser = TimestampFormatter(
|
||||||
options.timestampFormat,
|
options.timestampFormat,
|
||||||
options.timeZone,
|
options.timeZone,
|
||||||
options.locale)
|
options.locale)
|
||||||
@transient
|
|
||||||
private lazy val dateFormatter = DateFormatter(
|
|
||||||
options.dateFormat,
|
|
||||||
options.locale)
|
|
||||||
|
|
||||||
private val decimalParser = {
|
private val decimalParser = {
|
||||||
ExprUtils.getDecimalParser(options.locale)
|
ExprUtils.getDecimalParser(options.locale)
|
||||||
|
@ -108,7 +104,6 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
|
||||||
compatibleType(typeSoFar, tryParseDecimal(field)).getOrElse(StringType)
|
compatibleType(typeSoFar, tryParseDecimal(field)).getOrElse(StringType)
|
||||||
case DoubleType => tryParseDouble(field)
|
case DoubleType => tryParseDouble(field)
|
||||||
case TimestampType => tryParseTimestamp(field)
|
case TimestampType => tryParseTimestamp(field)
|
||||||
case DateType => tryParseDate(field)
|
|
||||||
case BooleanType => tryParseBoolean(field)
|
case BooleanType => tryParseBoolean(field)
|
||||||
case StringType => StringType
|
case StringType => StringType
|
||||||
case other: DataType =>
|
case other: DataType =>
|
||||||
|
@ -164,16 +159,9 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private def tryParseTimestamp(field: String): DataType = {
|
private def tryParseTimestamp(field: String): DataType = {
|
||||||
if ((allCatch opt timestampFormatter.parse(field)).isDefined) {
|
// This case infers a custom `dataFormat` is set.
|
||||||
|
if ((allCatch opt timestampParser.parse(field)).isDefined) {
|
||||||
TimestampType
|
TimestampType
|
||||||
} else {
|
|
||||||
tryParseDate(field)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private def tryParseDate(field: String): DataType = {
|
|
||||||
if ((allCatch opt dateFormatter.parse(field)).isDefined) {
|
|
||||||
DateType
|
|
||||||
} else {
|
} else {
|
||||||
tryParseBoolean(field)
|
tryParseBoolean(field)
|
||||||
}
|
}
|
||||||
|
|
|
@ -187,22 +187,4 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
|
||||||
|
|
||||||
Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0)))
|
Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0)))
|
||||||
}
|
}
|
||||||
|
|
||||||
test("inferring date type") {
|
|
||||||
var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd"), false, "GMT")
|
|
||||||
var inferSchema = new CSVInferSchema(options)
|
|
||||||
assert(inferSchema.inferField(NullType, "2018/12/02") == DateType)
|
|
||||||
|
|
||||||
options = new CSVOptions(Map("dateFormat" -> "MMM yyyy"), false, "GMT")
|
|
||||||
inferSchema = new CSVInferSchema(options)
|
|
||||||
assert(inferSchema.inferField(NullType, "Dec 2018") == DateType)
|
|
||||||
|
|
||||||
options = new CSVOptions(
|
|
||||||
Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss"),
|
|
||||||
columnPruning = false,
|
|
||||||
defaultTimeZoneId = "GMT")
|
|
||||||
inferSchema = new CSVInferSchema(options)
|
|
||||||
assert(inferSchema.inferField(NullType, "2018-12-03T11:00:00") == TimestampType)
|
|
||||||
assert(inferSchema.inferField(NullType, "2018-12-03") == DateType)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue