[SPARK-9155][SQL] codegen StringSpace
Jira https://issues.apache.org/jira/browse/SPARK-9155 Author: Tarek Auel <tarek.auel@googlemail.com> Closes #7531 from tarekauel/SPARK-9155 and squashes the following commits: 423c426 [Tarek Auel] [SPARK-9155] language typo fix e34bd1b [Tarek Auel] [SPARK-9155] moved creation of blank string to UTF8String 4bc33e6 [Tarek Auel] [SPARK-9155] codegen StringSpace
This commit is contained in:
parent
dde0e12f32
commit
4863c11ea9
|
@ -593,17 +593,19 @@ case class StringReverse(child: Expression) extends UnaryExpression with String2
|
|||
* Returns a n spaces string.
|
||||
*/
|
||||
case class StringSpace(child: Expression)
|
||||
extends UnaryExpression with ImplicitCastInputTypes with CodegenFallback {
|
||||
extends UnaryExpression with ImplicitCastInputTypes {
|
||||
|
||||
override def dataType: DataType = StringType
|
||||
override def inputTypes: Seq[DataType] = Seq(IntegerType)
|
||||
|
||||
override def nullSafeEval(s: Any): Any = {
|
||||
val length = s.asInstanceOf[Integer]
|
||||
val length = s.asInstanceOf[Int]
|
||||
UTF8String.blankString(if (length < 0) 0 else length)
|
||||
}
|
||||
|
||||
val spaces = new Array[Byte](if (length < 0) 0 else length)
|
||||
java.util.Arrays.fill(spaces, ' '.asInstanceOf[Byte])
|
||||
UTF8String.fromBytes(spaces)
|
||||
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
|
||||
nullSafeCodeGen(ctx, ev, (length) =>
|
||||
s"""${ev.primitive} = UTF8String.blankString(($length < 0) ? 0 : $length);""")
|
||||
}
|
||||
|
||||
override def prettyName: String = "space"
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.spark.unsafe.types;
|
|||
import javax.annotation.Nonnull;
|
||||
import java.io.Serializable;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.spark.unsafe.PlatformDependent;
|
||||
import org.apache.spark.unsafe.array.ByteArrayMethods;
|
||||
|
@ -77,6 +78,15 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an UTF8String that contains `length` spaces.
|
||||
*/
|
||||
public static UTF8String blankString(int length) {
|
||||
byte[] spaces = new byte[length];
|
||||
Arrays.fill(spaces, (byte) ' ');
|
||||
return fromBytes(spaces);
|
||||
}
|
||||
|
||||
protected UTF8String(Object base, long offset, int size) {
|
||||
this.base = base;
|
||||
this.offset = offset;
|
||||
|
|
|
@ -286,4 +286,12 @@ public class UTF8StringSuite {
|
|||
assertEquals(
|
||||
UTF8String.fromString("世界千世").levenshteinDistance(UTF8String.fromString("千a世b")),4);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void createBlankString() {
|
||||
assertEquals(fromString(" "), blankString(1));
|
||||
assertEquals(fromString(" "), blankString(2));
|
||||
assertEquals(fromString(" "), blankString(3));
|
||||
assertEquals(fromString(""), blankString(0));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue