Revert "[SPARK-26248][SQL] Infer date type from CSV"

This reverts commit 5217f7b226.
This commit is contained in:
Hyukjin Kwon 2018-12-17 11:53:14 +08:00
parent e408e05322
commit db1c5b1839
2 changed files with 4 additions and 34 deletions

View file

@ -22,20 +22,16 @@ import scala.util.control.Exception.allCatch
import org.apache.spark.rdd.RDD import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.analysis.TypeCoercion import org.apache.spark.sql.catalyst.analysis.TypeCoercion
import org.apache.spark.sql.catalyst.expressions.ExprUtils import org.apache.spark.sql.catalyst.expressions.ExprUtils
import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter} import org.apache.spark.sql.catalyst.util.TimestampFormatter
import org.apache.spark.sql.types._ import org.apache.spark.sql.types._
class CSVInferSchema(val options: CSVOptions) extends Serializable { class CSVInferSchema(val options: CSVOptions) extends Serializable {
@transient @transient
private lazy val timestampFormatter = TimestampFormatter( private lazy val timestampParser = TimestampFormatter(
options.timestampFormat, options.timestampFormat,
options.timeZone, options.timeZone,
options.locale) options.locale)
@transient
private lazy val dateFormatter = DateFormatter(
options.dateFormat,
options.locale)
private val decimalParser = { private val decimalParser = {
ExprUtils.getDecimalParser(options.locale) ExprUtils.getDecimalParser(options.locale)
@ -108,7 +104,6 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
compatibleType(typeSoFar, tryParseDecimal(field)).getOrElse(StringType) compatibleType(typeSoFar, tryParseDecimal(field)).getOrElse(StringType)
case DoubleType => tryParseDouble(field) case DoubleType => tryParseDouble(field)
case TimestampType => tryParseTimestamp(field) case TimestampType => tryParseTimestamp(field)
case DateType => tryParseDate(field)
case BooleanType => tryParseBoolean(field) case BooleanType => tryParseBoolean(field)
case StringType => StringType case StringType => StringType
case other: DataType => case other: DataType =>
@ -164,16 +159,9 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
} }
private def tryParseTimestamp(field: String): DataType = { private def tryParseTimestamp(field: String): DataType = {
if ((allCatch opt timestampFormatter.parse(field)).isDefined) { // This case infers a custom `dataFormat` is set.
if ((allCatch opt timestampParser.parse(field)).isDefined) {
TimestampType TimestampType
} else {
tryParseDate(field)
}
}
private def tryParseDate(field: String): DataType = {
if ((allCatch opt dateFormatter.parse(field)).isDefined) {
DateType
} else { } else {
tryParseBoolean(field) tryParseBoolean(field)
} }

View file

@ -187,22 +187,4 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0))) Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0)))
} }
test("inferring date type") {
var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd"), false, "GMT")
var inferSchema = new CSVInferSchema(options)
assert(inferSchema.inferField(NullType, "2018/12/02") == DateType)
options = new CSVOptions(Map("dateFormat" -> "MMM yyyy"), false, "GMT")
inferSchema = new CSVInferSchema(options)
assert(inferSchema.inferField(NullType, "Dec 2018") == DateType)
options = new CSVOptions(
Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss"),
columnPruning = false,
defaultTimeZoneId = "GMT")
inferSchema = new CSVInferSchema(options)
assert(inferSchema.inferField(NullType, "2018-12-03T11:00:00") == TimestampType)
assert(inferSchema.inferField(NullType, "2018-12-03") == DateType)
}
} }