spark-instrumented-optimizer

History

Marek Novotny a6e883feb3 [SPARK-23935][SQL] Adding map_entries function ## What changes were proposed in this pull request? This PR adds `map_entries` function that returns an unordered array of all entries in the given map. ## How was this patch tested? New tests added into: - `CollectionExpressionSuite` - `DataFrameFunctionsSuite` ## CodeGen examples ### Primitive types ``` val df = Seq(Map(1 -> 5, 2 -> 6)).toDF("m") df.filter('m.isNotNull).select(map_entries('m)).debugCodegen ``` Result: ``` /* 042 / boolean project_isNull_0 = false; / 043 / / 044 / ArrayData project_value_0 = null; / 045 / / 046 / final int project_numElements_0 = inputadapter_value_0.numElements(); / 047 / final ArrayData project_keys_0 = inputadapter_value_0.keyArray(); / 048 / final ArrayData project_values_0 = inputadapter_value_0.valueArray(); / 049 / / 050 / final long project_size_0 = UnsafeArrayData.calculateSizeOfUnderlyingByteArray( / 051 / project_numElements_0, / 052 / 32); / 053 / if (project_size_0 > 2147483632) { / 054 / final Object[] project_internalRowArray_0 = new Object[project_numElements_0]; / 055 / for (int z = 0; z < project_numElements_0; z++) { / 056 / project_internalRowArray_0[z] = new org.apache.spark.sql.catalyst.expressions.GenericInternalRow(new Object[]{project_keys_0.getInt(z), project_values_0.getInt(z)}); / 057 / } / 058 / project_value_0 = new org.apache.spark.sql.catalyst.util.GenericArrayData(project_internalRowArray_0); / 059 / / 060 / } else { / 061 / final byte[] project_arrayBytes_0 = new byte[(int)project_size_0]; / 062 / UnsafeArrayData project_unsafeArrayData_0 = new UnsafeArrayData(); / 063 / Platform.putLong(project_arrayBytes_0, 16, project_numElements_0); / 064 / project_unsafeArrayData_0.pointTo(project_arrayBytes_0, 16, (int)project_size_0); / 065 / / 066 / final int project_structsOffset_0 = UnsafeArrayData.calculateHeaderPortionInBytes(project_numElements_0) + project_numElements_0 8; /* 067 / UnsafeRow project_unsafeRow_0 = new UnsafeRow(2); / 068 / for (int z = 0; z < project_numElements_0; z++) { / 069 / long offset = project_structsOffset_0 + z 24L; /* 070 / project_unsafeArrayData_0.setLong(z, (offset << 32) + 24L); / 071 / project_unsafeRow_0.pointTo(project_arrayBytes_0, 16 + offset, 24); / 072 / project_unsafeRow_0.setInt(0, project_keys_0.getInt(z)); / 073 / project_unsafeRow_0.setInt(1, project_values_0.getInt(z)); / 074 / } / 075 / project_value_0 = project_unsafeArrayData_0; / 076 / / 077 / } ``` ### Non-primitive types ``` val df = Seq(Map("a" -> "foo", "b" -> null)).toDF("m") df.filter('m.isNotNull).select(map_entries('m)).debugCodegen ``` Result: ``` / 042 / boolean project_isNull_0 = false; / 043 / / 044 / ArrayData project_value_0 = null; / 045 / / 046 / final int project_numElements_0 = inputadapter_value_0.numElements(); / 047 / final ArrayData project_keys_0 = inputadapter_value_0.keyArray(); / 048 / final ArrayData project_values_0 = inputadapter_value_0.valueArray(); / 049 / / 050 / final Object[] project_internalRowArray_0 = new Object[project_numElements_0]; / 051 / for (int z = 0; z < project_numElements_0; z++) { / 052 / project_internalRowArray_0[z] = new org.apache.spark.sql.catalyst.expressions.GenericInternalRow(new Object[]{project_keys_0.getUTF8String(z), project_values_0.getUTF8String(z)}); / 053 / } / 054 */ project_value_0 = new org.apache.spark.sql.catalyst.util.GenericArrayData(project_internalRowArray_0); ``` Author: Marek Novotny <mn.mikke@gmail.com> Closes #21236 from mn-mikke/feature/array-api-map_entries-to-master.	2018-05-21 23:14:03 +09:00
..
src	[SPARK-23935][SQL] Adding map_entries function	2018-05-21 23:14:03 +09:00
pom.xml	[SPARK-19550][BUILD][FOLLOW-UP] Remove MaxPermSize for sql module	2018-01-15 07:49:34 -06:00

Marek Novotny a6e883feb3 [SPARK-23935][SQL] Adding map_entries function

## What changes were proposed in this pull request?

This PR adds `map_entries` function that returns an unordered array of all entries in the given map.

## How was this patch tested?

New tests added into:
- `CollectionExpressionSuite`
- `DataFrameFunctionsSuite`

## CodeGen examples
### Primitive types
```
val df = Seq(Map(1 -> 5, 2 -> 6)).toDF("m")
df.filter('m.isNotNull).select(map_entries('m)).debugCodegen
```
Result:
```
/* 042 */         boolean project_isNull_0 = false;
/* 043 */
/* 044 */         ArrayData project_value_0 = null;
/* 045 */
/* 046 */         final int project_numElements_0 = inputadapter_value_0.numElements();
/* 047 */         final ArrayData project_keys_0 = inputadapter_value_0.keyArray();
/* 048 */         final ArrayData project_values_0 = inputadapter_value_0.valueArray();
/* 049 */
/* 050 */         final long project_size_0 = UnsafeArrayData.calculateSizeOfUnderlyingByteArray(
/* 051 */           project_numElements_0,
/* 052 */           32);
/* 053 */         if (project_size_0 > 2147483632) {
/* 054 */           final Object[] project_internalRowArray_0 = new Object[project_numElements_0];
/* 055 */           for (int z = 0; z < project_numElements_0; z++) {
/* 056 */             project_internalRowArray_0[z] = new org.apache.spark.sql.catalyst.expressions.GenericInternalRow(new Object[]{project_keys_0.getInt(z), project_values_0.getInt(z)});
/* 057 */           }
/* 058 */           project_value_0 = new org.apache.spark.sql.catalyst.util.GenericArrayData(project_internalRowArray_0);
/* 059 */
/* 060 */         } else {
/* 061 */           final byte[] project_arrayBytes_0 = new byte[(int)project_size_0];
/* 062 */           UnsafeArrayData project_unsafeArrayData_0 = new UnsafeArrayData();
/* 063 */           Platform.putLong(project_arrayBytes_0, 16, project_numElements_0);
/* 064 */           project_unsafeArrayData_0.pointTo(project_arrayBytes_0, 16, (int)project_size_0);
/* 065 */
/* 066 */           final int project_structsOffset_0 = UnsafeArrayData.calculateHeaderPortionInBytes(project_numElements_0) + project_numElements_0 * 8;
/* 067 */           UnsafeRow project_unsafeRow_0 = new UnsafeRow(2);
/* 068 */           for (int z = 0; z < project_numElements_0; z++) {
/* 069 */             long offset = project_structsOffset_0 + z * 24L;
/* 070 */             project_unsafeArrayData_0.setLong(z, (offset << 32) + 24L);
/* 071 */             project_unsafeRow_0.pointTo(project_arrayBytes_0, 16 + offset, 24);
/* 072 */             project_unsafeRow_0.setInt(0, project_keys_0.getInt(z));
/* 073 */             project_unsafeRow_0.setInt(1, project_values_0.getInt(z));
/* 074 */           }
/* 075 */           project_value_0 = project_unsafeArrayData_0;
/* 076 */
/* 077 */         }
```
### Non-primitive types
```
val df = Seq(Map("a" -> "foo", "b" -> null)).toDF("m")
df.filter('m.isNotNull).select(map_entries('m)).debugCodegen
```
Result:
```
/* 042 */         boolean project_isNull_0 = false;
/* 043 */
/* 044 */         ArrayData project_value_0 = null;
/* 045 */
/* 046 */         final int project_numElements_0 = inputadapter_value_0.numElements();
/* 047 */         final ArrayData project_keys_0 = inputadapter_value_0.keyArray();
/* 048 */         final ArrayData project_values_0 = inputadapter_value_0.valueArray();
/* 049 */
/* 050 */         final Object[] project_internalRowArray_0 = new Object[project_numElements_0];
/* 051 */         for (int z = 0; z < project_numElements_0; z++) {
/* 052 */           project_internalRowArray_0[z] = new org.apache.spark.sql.catalyst.expressions.GenericInternalRow(new Object[]{project_keys_0.getUTF8String(z), project_values_0.getUTF8String(z)});
/* 053 */         }
/* 054 */         project_value_0 = new org.apache.spark.sql.catalyst.util.GenericArrayData(project_internalRowArray_0);
```

Author: Marek Novotny <mn.mikke@gmail.com>

Closes #21236 from mn-mikke/feature/array-api-map_entries-to-master.

2018-05-21 23:14:03 +09:00

src

[SPARK-23935][SQL] Adding map_entries function

2018-05-21 23:14:03 +09:00

pom.xml

[SPARK-19550][BUILD][FOLLOW-UP] Remove MaxPermSize for sql module

2018-01-15 07:49:34 -06:00