From 15ef3740dea3d82f64a030d4523ad542485e1453 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 24 Jan 2017 23:35:23 +0100 Subject: [PATCH] [SPARK-19334][SQL] Fix the code injection vulnerability related to Generator functions. ## What changes were proposed in this pull request? Similar to SPARK-15165, codegen is in danger of arbitrary code injection. The root cause is how variable names are created by codegen. In GenerateExec#codeGenAccessor, a variable name is created like as follows. ``` val value = ctx.freshName(name) ``` The variable `value` is named based on the value of the variable `name` and the value of `name` is from schema given by users so an attacker can attack with queries like as follows. ``` SELECT inline(array(cast(struct(1) AS struct<`=new Object() { {f();} public void f() {throw new RuntimeException("This exception is injected.");} public int x;}.x`:int>))) ``` In the example above, a RuntimeException is thrown but an attacker can replace it with arbitrary code. ## How was this patch tested? Added a new test case. Author: Kousuke Saruta Closes #16681 from sarutak/SPARK-19334. --- .../spark/sql/execution/GenerateExec.scala | 11 +++++++++-- .../org/apache/spark/sql/SQLQuerySuite.scala | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala index b52f5c4d4a..69be7094d2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala @@ -181,7 +181,14 @@ case class GenerateExec( val row = codeGenAccessor(ctx, data.value, "col", index, st, nullable, checks) val fieldChecks = checks ++ optionalCode(nullable, row.isNull) val columns = st.fields.toSeq.zipWithIndex.map { case (f, i) => - codeGenAccessor(ctx, row.value, f.name, i.toString, f.dataType, f.nullable, fieldChecks) + codeGenAccessor( + ctx, + row.value, + s"st_col${i}", + i.toString, + f.dataType, + f.nullable, + fieldChecks) } ("", row.code, columns) @@ -247,7 +254,7 @@ case class GenerateExec( val values = e.dataType match { case ArrayType(st: StructType, nullable) => st.fields.toSeq.zipWithIndex.map { case (f, i) => - codeGenAccessor(ctx, current, f.name, s"$i", f.dataType, f.nullable, checks) + codeGenAccessor(ctx, current, s"st_col${i}", s"$i", f.dataType, f.nullable, checks) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 07b787a191..a77f920598 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2548,4 +2548,20 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { checkAnswer( sql("SELECT * FROM `_tbl`"), Row(1) :: Row(2) :: Row(3) :: Nil) } } + + test("SPARK-19334: check code injection is prevented") { + // The end of comment (*/) should be escaped. + val badQuery = + """|SELECT inline(array(cast(struct(1) AS + | struct<`= + | new Object() { + | {f();} + | public void f() {throw new RuntimeException("This exception is injected.");} + | public int x; + | }.x + | `:int>)))""".stripMargin.replaceAll("\n", "") + + checkAnswer(sql(badQuery), Row(1) :: Nil) + } + }