diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index db52f77481..1fd59213a0 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -562,10 +562,10 @@ public final class UTF8String implements Comparable, Externalizable, } /** - * Trims whitespaces ({@literal <=} ASCII 32) from both ends of this string. + * Trims whitespace ASCII characters from both ends of this string. * - * Note that, this method is the same as java's {@link String#trim}, and different from - * {@link UTF8String#trim()} which remove only spaces(= ASCII 32) from both ends. + * Note that, this method is different from {@link UTF8String#trim()} which removes + * only spaces(= ASCII 32) from both ends. * * @return A UTF8String whose value is this UTF8String, with any leading and trailing white * space removed, or this UTF8String if it has no leading or trailing whitespace. @@ -573,13 +573,13 @@ public final class UTF8String implements Comparable, Externalizable, */ public UTF8String trimAll() { int s = 0; - // skip all of the whitespaces (<=0x20) in the left side + // skip all of the whitespaces in the left side while (s < this.numBytes && Character.isWhitespace(getByte(s))) s++; if (s == this.numBytes) { // Everything trimmed return EMPTY_UTF8; } - // skip all of the whitespaces (<=0x20) in the right side + // skip all of the whitespaces in the right side int e = this.numBytes - 1; while (e > s && Character.isWhitespace(getByte(e))) e--; if (s == 0 && e == numBytes - 1) { diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 3464f26f36..dc9a49e69a 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -159,6 +159,8 @@ license: | - In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference. +- In Spark 3.0, when casting string to integral types(tinyint, smallint, int and bigint), datetime types(date, timestamp and interval) and boolean type, the leading and trailing characters (<= ASCII 32) will be trimmed. For example, `cast('\b1\b' as int)` results `1`. Since Spark 3.0.1, only the leading and trailing whitespace ASCII characters will be trimmed. For example, `cast('\t1\t' as int)` results `1` but `cast('\b1\b' as int)` results `NULL`. + ## Upgrading from Spark SQL 2.4 to 3.0 ### Dataset/DataFrame APIs