[SPARK-31993][SQL] Build arrays for passing variables generated from children for 'concat_ws' with columns having at least one of array type

### What changes were proposed in this pull request?

Please refer the next section `Why are the changes needed?` for details how the current implementation of `concat_ws` is broken for some condition.

This patch fixes the code generation logic for columns having at least one array types of columns in `concat_ws` to build two arrays for storing isNull and value from children's generated code and pass these arrays to the both varargCounts and varargBuilds. This change guarantees that both varargCounts and varargBuilds can access the relevant local variables the children's generated code makes as array parameters, which is critical to ensure both varargCounts and varargBuilds succeed to compile.

Below is the generated code for newly added UT, `SPARK-31993: concat_ws in agg function with plenty of string/array types columns`.

> before the patch

```
/* 001 */ public java.lang.Object generate(Object[] references) {
/* 002 */   return new SpecificUnsafeProjection(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificUnsafeProjection extends org.apache.spark.sql.catalyst.expressions.UnsafeProjection {
/* 006 */
/* 007 */   private Object[] references;
/* 008 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
/* 009 */
/* 010 */   public SpecificUnsafeProjection(Object[] references) {
/* 011 */     this.references = references;
/* 012 */     mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 32);
/* 013 */
/* 014 */   }
/* 015 */
/* 016 */   public void initialize(int partitionIndex) {
/* 017 */
/* 018 */   }
/* 019 */
/* 020 */   // Scala.Function1 need this
/* 021 */   public java.lang.Object apply(java.lang.Object row) {
/* 022 */     return apply((InternalRow) row);
/* 023 */   }
/* 024 */
/* 025 */   public UnsafeRow apply(InternalRow i) {
/* 026 */     mutableStateArray_0[0].reset();
/* 027 */
/* 028 */
/* 029 */     mutableStateArray_0[0].zeroOutNullBytes();
/* 030 */
/* 031 */     apply_0_0(i);
/* 032 */     apply_0_1(i);
/* 033 */     int varargNum_0 = 30;
/* 034 */     int idxInVararg_0 = 0;
/* 035 */
/* 036 */     if (!isNull_2) {
/* 037 */       varargNum_0 += value_2.numElements();
/* 038 */     }
/* 039 */
/* 040 */     if (!isNull_3) {
/* 041 */       varargNum_0 += value_3.numElements();
/* 042 */     }
/* 043 */
/* 044 */     UTF8String[] array_0 = new UTF8String[varargNum_0];
/* 045 */     idxInVararg_0 = varargBuildsConcatWs_0_0(i, array_0, idxInVararg_0);
/* 046 */     idxInVararg_0 = varargBuildsConcatWs_0_1(i, array_0, idxInVararg_0);
/* 047 */     idxInVararg_0 = varargBuildsConcatWs_0_2(i, array_0, idxInVararg_0);
/* 048 */     UTF8String value_0 = UTF8String.concatWs(((UTF8String) references[0] /* literal */), array_0);
/* 049 */     boolean isNull_0 = value_0 == null;
/* 050 */     mutableStateArray_0[0].write(0, value_0);
/* 051 */     return (mutableStateArray_0[0].getRow());
/* 052 */   }
/* 053 */
/* 054 */
/* 055 */   private void apply_0_1(InternalRow i) {
/* 056 */     UTF8String value_25 = i.getUTF8String(22);UTF8String value_26 = i.getUTF8String(23);UTF8String value_27 = i.getUTF8String(24);UTF8String value_28 = i.getUTF8String(25);UTF8String value_29 = i.getUTF8String(26);UTF8String value_30 = i.getUTF8String(27);UTF8String value_31 = i.getUTF8String(28);UTF8String value_32 = i.getUTF8String(29);UTF8String value_33 = i.getUTF8String(30);
/* 057 */   }
/* 058 */
/* 059 */
/* 060 */   private int varargBuildsConcatWs_0_0(InternalRow i, UTF8String [] array_0, int idxInVararg_0) {
/* 061 */
/* 062 */
/* 063 */     if (!isNull_2) {
/* 064 */       final int n_0 = value_2.numElements();
/* 065 */       for (int j = 0; j < n_0; j ++) {
/* 066 */         array_0[idxInVararg_0 ++] = value_2.getUTF8String(j);
/* 067 */       }
/* 068 */     }
/* 069 */
/* 070 */     if (!isNull_3) {
/* 071 */       final int n_1 = value_3.numElements();
/* 072 */       for (int j = 0; j < n_1; j ++) {
/* 073 */         array_0[idxInVararg_0 ++] = value_3.getUTF8String(j);
/* 074 */       }
/* 075 */     }
/* 076 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_4;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_5;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_6;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_7;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_8;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_9;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_10;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_11;
/* 077 */     return idxInVararg_0;
/* 078 */
/* 079 */   }
/* 080 */
/* 081 */
/* 082 */   private int varargBuildsConcatWs_0_2(InternalRow i, UTF8String [] array_0, int idxInVararg_0) {
/* 083 */
/* 084 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_28;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_29;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_30;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_31;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_32;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_33;
/* 085 */     return idxInVararg_0;
/* 086 */
/* 087 */   }
/* 088 */
/* 089 */
/* 090 */   private void apply_0_0(InternalRow i) {
/* 091 */     boolean isNull_2 = i.isNullAt(31);
/* 092 */     ArrayData value_2 = isNull_2 ?
/* 093 */     null : (i.getArray(31));boolean isNull_3 = i.isNullAt(32);
/* 094 */     ArrayData value_3 = isNull_3 ?
/* 095 */     null : (i.getArray(32));UTF8String value_4 = i.getUTF8String(1);UTF8String value_5 = i.getUTF8String(2);UTF8String value_6 = i.getUTF8String(3);UTF8String value_7 = i.getUTF8String(4);UTF8String value_8 = i.getUTF8String(5);UTF8String value_9 = i.getUTF8String(6);UTF8String value_10 = i.getUTF8String(7);UTF8String value_11 = i.getUTF8String(8);UTF8String value_12 = i.getUTF8String(9);UTF8String value_13 = i.getUTF8String(10);UTF8String value_14 = i.getUTF8String(11);UTF8String value_15 = i.getUTF8String(12);UTF8String value_16 = i.getUTF8String(13);UTF8String value_17 = i.getUTF8String(14);UTF8String value_18 = i.getUTF8String(15);UTF8String value_19 = i.getUTF8String(16);UTF8String value_20 = i.getUTF8String(17);UTF8String value_21 = i.getUTF8String(18);UTF8String value_22 = i.getUTF8String(19);UTF8String value_23 = i.getUTF8String(20);UTF8String value_24 = i.getUTF8String(21);
/* 096 */   }
/* 097 */
/* 098 */
/* 099 */   private int varargBuildsConcatWs_0_1(InternalRow i, UTF8String [] array_0, int idxInVararg_0) {
/* 100 */
/* 101 */     array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_12;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_13;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_14;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_15;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_16;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_17;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_18;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_19;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_20;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_21;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_22;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_23;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_24;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_25;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_26;array_0[idxInVararg_0 ++] = false ? (UTF8String) null : value_27;
/* 102 */     return idxInVararg_0;
/* 103 */
/* 104 */   }
/* 105 */
/* 106 */ }
```

Compilation of the generated code fails with error message: `org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 36, Column 6: Expression "isNull_2" is not an rvalue`

> after the patch

```
/* 001 */ public java.lang.Object generate(Object[] references) {
/* 002 */   return new SpecificUnsafeProjection(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificUnsafeProjection extends org.apache.spark.sql.catalyst.expressions.UnsafeProjection {
/* 006 */
/* 007 */   private Object[] references;
/* 008 */   private boolean globalIsNull_0;
/* 009 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
/* 010 */
/* 011 */   public SpecificUnsafeProjection(Object[] references) {
/* 012 */     this.references = references;
/* 013 */
/* 014 */     mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 32);
/* 015 */
/* 016 */   }
/* 017 */
/* 018 */   public void initialize(int partitionIndex) {
/* 019 */
/* 020 */   }
/* 021 */
/* 022 */   // Scala.Function1 need this
/* 023 */   public java.lang.Object apply(java.lang.Object row) {
/* 024 */     return apply((InternalRow) row);
/* 025 */   }
/* 026 */
/* 027 */   public UnsafeRow apply(InternalRow i) {
/* 028 */     mutableStateArray_0[0].reset();
/* 029 */
/* 030 */
/* 031 */     mutableStateArray_0[0].zeroOutNullBytes();
/* 032 */
/* 033 */     UTF8String value_34 = ConcatWs_0(i);
/* 034 */     mutableStateArray_0[0].write(0, value_34);
/* 035 */     return (mutableStateArray_0[0].getRow());
/* 036 */   }
/* 037 */
/* 038 */
/* 039 */   private void initializeArgsArrays_0_0(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
/* 040 */
/* 041 */     boolean isNull_2 = i.isNullAt(31);
/* 042 */     ArrayData value_2 = isNull_2 ?
/* 043 */     null : (i.getArray(31));
/* 044 */     isNullArgs_0[0] = isNull_2;
/* 045 */     valueArgs_0[0] = value_2;
/* 046 */
/* 047 */     boolean isNull_3 = i.isNullAt(32);
/* 048 */     ArrayData value_3 = isNull_3 ?
/* 049 */     null : (i.getArray(32));
/* 050 */     isNullArgs_0[1] = isNull_3;
/* 051 */     valueArgs_0[1] = value_3;
/* 052 */
/* 053 */     UTF8String value_4 = i.getUTF8String(1);
/* 054 */     isNullArgs_0[2] = false;
/* 055 */     valueArgs_0[2] = value_4;
/* 056 */
/* 057 */     UTF8String value_5 = i.getUTF8String(2);
/* 058 */     isNullArgs_0[3] = false;
/* 059 */     valueArgs_0[3] = value_5;
/* 060 */
/* 061 */     UTF8String value_6 = i.getUTF8String(3);
/* 062 */     isNullArgs_0[4] = false;
/* 063 */     valueArgs_0[4] = value_6;
/* 064 */
/* 065 */     UTF8String value_7 = i.getUTF8String(4);
/* 066 */     isNullArgs_0[5] = false;
/* 067 */     valueArgs_0[5] = value_7;
/* 068 */
/* 069 */     UTF8String value_8 = i.getUTF8String(5);
/* 070 */     isNullArgs_0[6] = false;
/* 071 */     valueArgs_0[6] = value_8;
/* 072 */
/* 073 */   }
/* 074 */
/* 075 */
/* 076 */   private void initializeArgsArrays_0_3(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
/* 077 */
/* 078 */     UTF8String value_25 = i.getUTF8String(22);
/* 079 */     isNullArgs_0[23] = false;
/* 080 */     valueArgs_0[23] = value_25;
/* 081 */
/* 082 */     UTF8String value_26 = i.getUTF8String(23);
/* 083 */     isNullArgs_0[24] = false;
/* 084 */     valueArgs_0[24] = value_26;
/* 085 */
/* 086 */     UTF8String value_27 = i.getUTF8String(24);
/* 087 */     isNullArgs_0[25] = false;
/* 088 */     valueArgs_0[25] = value_27;
/* 089 */
/* 090 */     UTF8String value_28 = i.getUTF8String(25);
/* 091 */     isNullArgs_0[26] = false;
/* 092 */     valueArgs_0[26] = value_28;
/* 093 */
/* 094 */     UTF8String value_29 = i.getUTF8String(26);
/* 095 */     isNullArgs_0[27] = false;
/* 096 */     valueArgs_0[27] = value_29;
/* 097 */
/* 098 */     UTF8String value_30 = i.getUTF8String(27);
/* 099 */     isNullArgs_0[28] = false;
/* 100 */     valueArgs_0[28] = value_30;
/* 101 */
/* 102 */     UTF8String value_31 = i.getUTF8String(28);
/* 103 */     isNullArgs_0[29] = false;
/* 104 */     valueArgs_0[29] = value_31;
/* 105 */
/* 106 */     UTF8String value_32 = i.getUTF8String(29);
/* 107 */     isNullArgs_0[30] = false;
/* 108 */     valueArgs_0[30] = value_32;
/* 109 */
/* 110 */   }
/* 111 */
/* 112 */
/* 113 */   private int varargBuildsConcatWs_0_3(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
/* 114 */
/* 115 */     array_0[idxInVararg_0 ++] = isNullArgs_0[29] ? (UTF8String) null : ((UTF8String) valueArgs_0[29]);array_0[idxInVararg_0 ++] = isNullArgs_0[30] ? (UTF8String) null : ((UTF8String) valueArgs_0[30]);array_0[idxInVararg_0 ++] = isNullArgs_0[31] ? (UTF8String) null : ((UTF8String) valueArgs_0[31]);
/* 116 */     return idxInVararg_0;
/* 117 */
/* 118 */   }
/* 119 */
/* 120 */
/* 121 */   private int varargBuildsConcatWs_0_0(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
/* 122 */
/* 123 */
/* 124 */     if (!isNullArgs_0[0]) {
/* 125 */       final int n_0 = ((ArrayData) valueArgs_0[0]).numElements();
/* 126 */       for (int j = 0; j < n_0; j ++) {
/* 127 */         array_0[idxInVararg_0 ++] = ((ArrayData) valueArgs_0[0]).getUTF8String(j);
/* 128 */       }
/* 129 */     }
/* 130 */
/* 131 */     if (!isNullArgs_0[1]) {
/* 132 */       final int n_1 = ((ArrayData) valueArgs_0[1]).numElements();
/* 133 */       for (int j = 0; j < n_1; j ++) {
/* 134 */         array_0[idxInVararg_0 ++] = ((ArrayData) valueArgs_0[1]).getUTF8String(j);
/* 135 */       }
/* 136 */     }
/* 137 */     array_0[idxInVararg_0 ++] = isNullArgs_0[2] ? (UTF8String) null : ((UTF8String) valueArgs_0[2]);array_0[idxInVararg_0 ++] = isNullArgs_0[3] ? (UTF8String) null : ((UTF8String) valueArgs_0[3]);array_0[idxInVararg_0 ++] = isNullArgs_0[4] ? (UTF8String) null : ((UTF8String) valueArgs_0[4]);array_0[idxInVararg_0 ++] = isNullArgs_0[5] ? (UTF8String) null : ((UTF8String) valueArgs_0[5]);array_0[idxInVararg_0 ++] = isNullArgs_0[6] ? (UTF8String) null : ((UTF8String) valueArgs_0[6]);
/* 138 */     return idxInVararg_0;
/* 139 */
/* 140 */   }
/* 141 */
/* 142 */
/* 143 */   private UTF8String ConcatWs_0(InternalRow i) {
/* 144 */     boolean[] isNullArgs_0 = new boolean[32];
/* 145 */     Object[] valueArgs_0 = new Object[32];
/* 146 */     initializeArgsArrays_0_0(i, isNullArgs_0, valueArgs_0);
/* 147 */     initializeArgsArrays_0_1(i, isNullArgs_0, valueArgs_0);
/* 148 */     initializeArgsArrays_0_2(i, isNullArgs_0, valueArgs_0);
/* 149 */     initializeArgsArrays_0_3(i, isNullArgs_0, valueArgs_0);
/* 150 */     initializeArgsArrays_0_4(i, isNullArgs_0, valueArgs_0);
/* 151 */     int varargNum_0 = 30;
/* 152 */     int idxInVararg_0 = 0;
/* 153 */
/* 154 */     if (!isNullArgs_0[0]) {
/* 155 */       varargNum_0 += ((ArrayData) valueArgs_0[0]).numElements();
/* 156 */     }
/* 157 */
/* 158 */     if (!isNullArgs_0[1]) {
/* 159 */       varargNum_0 += ((ArrayData) valueArgs_0[1]).numElements();
/* 160 */     }
/* 161 */
/* 162 */     UTF8String[] array_0 = new UTF8String[varargNum_0];
/* 163 */     idxInVararg_0 = varargBuildsConcatWs_0_0(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
/* 164 */     idxInVararg_0 = varargBuildsConcatWs_0_1(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
/* 165 */     idxInVararg_0 = varargBuildsConcatWs_0_2(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
/* 166 */     idxInVararg_0 = varargBuildsConcatWs_0_3(i, array_0, idxInVararg_0, isNullArgs_0, valueArgs_0);
/* 167 */     UTF8String value_0 = UTF8String.concatWs(((UTF8String) references[0] /* literal */), array_0);
/* 168 */     boolean isNull_0 = value_0 == null;
/* 169 */     globalIsNull_0 = isNull_0;
/* 170 */     return value_0;
/* 171 */   }
/* 172 */
/* 173 */
/* 174 */   private void initializeArgsArrays_0_2(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
/* 175 */
/* 176 */     UTF8String value_17 = i.getUTF8String(14);
/* 177 */     isNullArgs_0[15] = false;
/* 178 */     valueArgs_0[15] = value_17;
/* 179 */
/* 180 */     UTF8String value_18 = i.getUTF8String(15);
/* 181 */     isNullArgs_0[16] = false;
/* 182 */     valueArgs_0[16] = value_18;
/* 183 */
/* 184 */     UTF8String value_19 = i.getUTF8String(16);
/* 185 */     isNullArgs_0[17] = false;
/* 186 */     valueArgs_0[17] = value_19;
/* 187 */
/* 188 */     UTF8String value_20 = i.getUTF8String(17);
/* 189 */     isNullArgs_0[18] = false;
/* 190 */     valueArgs_0[18] = value_20;
/* 191 */
/* 192 */     UTF8String value_21 = i.getUTF8String(18);
/* 193 */     isNullArgs_0[19] = false;
/* 194 */     valueArgs_0[19] = value_21;
/* 195 */
/* 196 */     UTF8String value_22 = i.getUTF8String(19);
/* 197 */     isNullArgs_0[20] = false;
/* 198 */     valueArgs_0[20] = value_22;
/* 199 */
/* 200 */     UTF8String value_23 = i.getUTF8String(20);
/* 201 */     isNullArgs_0[21] = false;
/* 202 */     valueArgs_0[21] = value_23;
/* 203 */
/* 204 */     UTF8String value_24 = i.getUTF8String(21);
/* 205 */     isNullArgs_0[22] = false;
/* 206 */     valueArgs_0[22] = value_24;
/* 207 */
/* 208 */   }
/* 209 */
/* 210 */
/* 211 */   private int varargBuildsConcatWs_0_2(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
/* 212 */
/* 213 */     array_0[idxInVararg_0 ++] = isNullArgs_0[18] ? (UTF8String) null : ((UTF8String) valueArgs_0[18]);array_0[idxInVararg_0 ++] = isNullArgs_0[19] ? (UTF8String) null : ((UTF8String) valueArgs_0[19]);array_0[idxInVararg_0 ++] = isNullArgs_0[20] ? (UTF8String) null : ((UTF8String) valueArgs_0[20]);array_0[idxInVararg_0 ++] = isNullArgs_0[21] ? (UTF8String) null : ((UTF8String) valueArgs_0[21]);array_0[idxInVararg_0 ++] = isNullArgs_0[22] ? (UTF8String) null : ((UTF8String) valueArgs_0[22]);array_0[idxInVararg_0 ++] = isNullArgs_0[23] ? (UTF8String) null : ((UTF8String) valueArgs_0[23]);array_0[idxInVararg_0 ++] = isNullArgs_0[24] ? (UTF8String) null : ((UTF8String) valueArgs_0[24]);array_0[idxInVararg_0 ++] = isNullArgs_0[25] ? (UTF8String) null : ((UTF8String) valueArgs_0[25]);array_0[idxInVararg_0 ++] = isNullArgs_0[26] ? (UTF8String) null : ((UTF8String) valueArgs_0[26]);array_0[idxInVararg_0 ++] = isNullArgs_0[27] ? (UTF8String) null : ((UTF8String) valueArgs_0[27]);array_0[idxInVararg_0 ++] = isNullArgs_0[28] ? (UTF8String) null : ((UTF8String) valueArgs_0[28]);
/* 214 */     return idxInVararg_0;
/* 215 */
/* 216 */   }
/* 217 */
/* 218 */
/* 219 */   private void initializeArgsArrays_0_4(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
/* 220 */
/* 221 */     UTF8String value_33 = i.getUTF8String(30);
/* 222 */     isNullArgs_0[31] = false;
/* 223 */     valueArgs_0[31] = value_33;
/* 224 */
/* 225 */   }
/* 226 */
/* 227 */
/* 228 */   private void initializeArgsArrays_0_1(InternalRow i, boolean [] isNullArgs_0, Object [] valueArgs_0) {
/* 229 */
/* 230 */     UTF8String value_9 = i.getUTF8String(6);
/* 231 */     isNullArgs_0[7] = false;
/* 232 */     valueArgs_0[7] = value_9;
/* 233 */
/* 234 */     UTF8String value_10 = i.getUTF8String(7);
/* 235 */     isNullArgs_0[8] = false;
/* 236 */     valueArgs_0[8] = value_10;
/* 237 */
/* 238 */     UTF8String value_11 = i.getUTF8String(8);
/* 239 */     isNullArgs_0[9] = false;
/* 240 */     valueArgs_0[9] = value_11;
/* 241 */
/* 242 */     UTF8String value_12 = i.getUTF8String(9);
/* 243 */     isNullArgs_0[10] = false;
/* 244 */     valueArgs_0[10] = value_12;
/* 245 */
/* 246 */     UTF8String value_13 = i.getUTF8String(10);
/* 247 */     isNullArgs_0[11] = false;
/* 248 */     valueArgs_0[11] = value_13;
/* 249 */
/* 250 */     UTF8String value_14 = i.getUTF8String(11);
/* 251 */     isNullArgs_0[12] = false;
/* 252 */     valueArgs_0[12] = value_14;
/* 253 */
/* 254 */     UTF8String value_15 = i.getUTF8String(12);
/* 255 */     isNullArgs_0[13] = false;
/* 256 */     valueArgs_0[13] = value_15;
/* 257 */
/* 258 */     UTF8String value_16 = i.getUTF8String(13);
/* 259 */     isNullArgs_0[14] = false;
/* 260 */     valueArgs_0[14] = value_16;
/* 261 */
/* 262 */   }
/* 263 */
/* 264 */
/* 265 */   private int varargBuildsConcatWs_0_1(InternalRow i, UTF8String [] array_0, int idxInVararg_0, boolean [] isNullArgs_0, Object [] valueArgs_0) {
/* 266 */
/* 267 */     array_0[idxInVararg_0 ++] = isNullArgs_0[7] ? (UTF8String) null : ((UTF8String) valueArgs_0[7]);array_0[idxInVararg_0 ++] = isNullArgs_0[8] ? (UTF8String) null : ((UTF8String) valueArgs_0[8]);array_0[idxInVararg_0 ++] = isNullArgs_0[9] ? (UTF8String) null : ((UTF8String) valueArgs_0[9]);array_0[idxInVararg_0 ++] = isNullArgs_0[10] ? (UTF8String) null : ((UTF8String) valueArgs_0[10]);array_0[idxInVararg_0 ++] = isNullArgs_0[11] ? (UTF8String) null : ((UTF8String) valueArgs_0[11]);array_0[idxInVararg_0 ++] = isNullArgs_0[12] ? (UTF8String) null : ((UTF8String) valueArgs_0[12]);array_0[idxInVararg_0 ++] = isNullArgs_0[13] ? (UTF8String) null : ((UTF8String) valueArgs_0[13]);array_0[idxInVararg_0 ++] = isNullArgs_0[14] ? (UTF8String) null : ((UTF8String) valueArgs_0[14]);array_0[idxInVararg_0 ++] = isNullArgs_0[15] ? (UTF8String) null : ((UTF8String) valueArgs_0[15]);array_0[idxInVararg_0 ++] = isNullArgs_0[16] ? (UTF8String) null : ((UTF8String) valueArgs_0[16]);array_0[idxInVararg_0 ++] = isNullArgs_0[17] ? (UTF8String) null : ((UTF8String) valueArgs_0[17]);
/* 268 */     return idxInVararg_0;
/* 269 */
/* 270 */   }
/* 271 */
/* 272 */ }
```

### Why are the changes needed?

The generated code in `concat_ws` fails to compile when the below conditions are met:

* Plenty of columns are provided as input of `concat_ws`.
* There's at least one column with array[string] type. (In other words, not all columns are string type.)
* Splitting methods is triggered in `splitExpressionsWithCurrentInputs`.
  * This is a bit tricky, as the method won't split methods under whole stage codegen, as well as it will be simply no-op (inlined) if the number of blocks to convert into methods is 1.

a0187cd6b5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala (L88-L195)

There're three parts of generated code in `concat_ws` (`codes`, `varargCounts`, `varargBuilds`) and all parts try to split method by itself, while `varargCounts` and `varargBuilds` refer on the generated code in `codes`, hence the overall generated code fails to compile if any of part succeeds to split.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New UTs added. (One for verification of the patch, another one for regression test)

Closes #28831 from HeartSaVioR/SPARK-31993.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Jungtaek Lim (HeartSaVioR) 2020-06-19 06:01:06 +00:00 committed by Wenchen Fan
parent abc8ccc37b
commit 6fe3bf66eb
2 changed files with 106 additions and 17 deletions

View file

@ -118,47 +118,72 @@ case class ConcatWs(children: Seq[Expression])
boolean ${ev.isNull} = ${ev.value} == null;
""")
} else {
val isNullArgs = ctx.freshName("isNullArgs")
val valueArgs = ctx.freshName("valueArgs")
val array = ctx.freshName("array")
val varargNum = ctx.freshName("varargNum")
val idxVararg = ctx.freshName("idxInVararg")
val evals = children.map(_.genCode(ctx))
val (varargCount, varargBuild) = children.tail.zip(evals.tail).map { case (child, eval) =>
child.dataType match {
val (argBuild, varargCount, varargBuild) = children.tail.zip(evals.tail)
.zipWithIndex.map { case ((child, eval), idx) =>
val reprForIsNull = s"$isNullArgs[$idx]"
val reprForValue = s"$valueArgs[$idx]"
val arg =
s"""
${eval.code}
$reprForIsNull = ${eval.isNull};
$reprForValue = ${eval.value};
"""
val (varCount, varBuild) = child.dataType match {
case StringType =>
val reprForValueCast = s"((UTF8String) $reprForValue)"
("", // we count all the StringType arguments num at once below.
if (eval.isNull == TrueLiteral) {
""
} else {
s"$array[$idxVararg ++] = ${eval.isNull} ? (UTF8String) null : ${eval.value};"
})
if (eval.isNull == TrueLiteral) {
""
} else {
s"$array[$idxVararg ++] = $reprForIsNull ? (UTF8String) null : $reprForValueCast;"
})
case _: ArrayType =>
val reprForValueCast = s"((ArrayData) $reprForValue)"
val size = ctx.freshName("n")
if (eval.isNull == TrueLiteral) {
("", "")
} else {
// scalastyle:off line.size.limit
(s"""
if (!${eval.isNull}) {
$varargNum += ${eval.value}.numElements();
if (!$reprForIsNull) {
$varargNum += $reprForValueCast.numElements();
}
""",
s"""
if (!${eval.isNull}) {
final int $size = ${eval.value}.numElements();
s"""
if (!$reprForIsNull) {
final int $size = $reprForValueCast.numElements();
for (int j = 0; j < $size; j ++) {
$array[$idxVararg ++] = ${CodeGenerator.getValue(eval.value, StringType, "j")};
$array[$idxVararg ++] = ${CodeGenerator.getValue(reprForValueCast, StringType, "j")};
}
}
""")
// scalastyle:on line.size.limit
}
}
}.unzip
val codes = ctx.splitExpressionsWithCurrentInputs(evals.map(_.code.toString))
(arg, varCount, varBuild)
}.unzip3
val argBuilds = ctx.splitExpressionsWithCurrentInputs(
expressions = argBuild,
funcName = "initializeArgsArrays",
extraArguments = ("boolean []", isNullArgs) :: ("Object []", valueArgs) :: Nil
)
val varargCounts = ctx.splitExpressionsWithCurrentInputs(
expressions = varargCount,
funcName = "varargCountsConcatWs",
extraArguments = ("boolean []", isNullArgs) :: ("Object []", valueArgs) :: Nil,
returnType = "int",
makeSplitFunction = body =>
s"""
@ -171,7 +196,8 @@ case class ConcatWs(children: Seq[Expression])
val varargBuilds = ctx.splitExpressionsWithCurrentInputs(
expressions = varargBuild,
funcName = "varargBuildsConcatWs",
extraArguments = ("UTF8String []", array) :: ("int", idxVararg) :: Nil,
extraArguments = ("UTF8String []", array) :: ("int", idxVararg) ::
("boolean []", isNullArgs) :: ("Object []", valueArgs) :: Nil,
returnType = "int",
makeSplitFunction = body =>
s"""
@ -182,12 +208,15 @@ case class ConcatWs(children: Seq[Expression])
ev.copy(
code"""
$codes
boolean[] $isNullArgs = new boolean[${children.length - 1}];
Object[] $valueArgs = new Object[${children.length - 1}];
$argBuilds
int $varargNum = ${children.count(_.dataType == StringType) - 1};
int $idxVararg = 0;
$varargCounts
UTF8String[] $array = new UTF8String[$varargNum];
$varargBuilds
${evals.head.code}
UTF8String ${ev.value} = UTF8String.concatWs(${evals.head.value}, $array);
boolean ${ev.isNull} = ${ev.value} == null;
""")

View file

@ -18,6 +18,7 @@
package org.apache.spark.sql
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
@ -48,6 +49,65 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
Row("a||b"))
}
test("SPARK-31993: concat_ws in agg function with plenty of string/array types columns") {
withSQLConf(SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1024",
SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY") {
val (df, genColNames, genColValues) = prepareTestConcatWsColumns()
val groupedCols = Seq($"a") ++ genColNames.map(col)
val concatCols = Seq(collect_list($"b"), collect_list($"c")) ++ genColNames.map(col)
val df2 = df
.groupBy(groupedCols: _*)
.agg(concat_ws(",", concatCols: _*).as("con"))
.select("con")
val expected = Seq(
Row((Seq("b1", "b2") ++ genColValues).mkString(",")),
Row((Seq("b3", "b4") ++ genColValues).mkString(","))
)
checkAnswer(df2, expected)
}
}
// This test doesn't fail without SPARK-31993, but still be useful for regression test.
test("SPARK-31993: concat_ws in agg function with plenty of string types columns") {
withSQLConf(SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1024",
SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY") {
val (df, genColNames, genColValues) = prepareTestConcatWsColumns()
val groupedCols = Seq($"a") ++ genColNames.map(col)
val concatCols = groupedCols
val df2 = df
.groupBy(groupedCols: _*)
.agg(concat_ws(",", concatCols: _*).as("con"))
.select("con")
val expected = Seq(
Row((Seq("a") ++ genColValues).mkString(",")),
Row((Seq("b") ++ genColValues).mkString(","))
)
checkAnswer(df2, expected)
}
}
private def prepareTestConcatWsColumns(): (DataFrame, Seq[String], Seq[String]) = {
val genColNames = (1 to 30).map { idx => s"col_$idx" }
val genColValues = (1 to 30).map { _.toString }
val genCols = genColValues.map(lit)
val df = Seq[(String, String, String)](
("a", "b1", null),
("a", "b2", null),
("b", "b3", null),
("b", "b4", null))
.toDF("a", "b", "c")
.withColumns(genColNames, genCols)
(df, genColNames, genColValues)
}
test("string elt") {
val df = Seq[(String, String, String, Int)](("hello", "world", null, 15))
.toDF("a", "b", "c", "d")