b32b2123dd
## What changes were proposed in this pull request? This pull-request exclusively includes the class splitting feature described in #16648. When code for a given class would grow beyond 1600k bytes, a private, nested sub-class is generated into which subsequent functions are inlined. Additional sub-classes are generated as the code threshold is met subsequent times. This code includes 3 changes: 1. Includes helper maps, lists, and functions for keeping track of sub-classes during code generation (included in the `CodeGenerator` class). These helper functions allow nested classes and split functions to be initialized/declared/inlined to the appropriate locations in the various projection classes. 2. Changes `addNewFunction` to return a string to support instances where a split function is inlined to a nested class and not the outer class (and so must be invoked using the class-qualified name). Uses of `addNewFunction` throughout the codebase are modified so that the returned name is properly used. 3. Removes instances of the `this` keyword when used on data inside generated classes. All state declared in the outer class is by default global and accessible to the nested classes. However, if a reference to global state in a nested class is prepended with the `this` keyword, it would attempt to reference state belonging to the nested class (which would not exist), rather than the correct variable belonging to the outer class. ## How was this patch tested? Added a test case to the `GeneratedProjectionSuite` that increases the number of columns tested in various projections to a threshold that would previously have triggered a `JaninoRuntimeException` for the Constant Pool. Note: This PR does not address the second Constant Pool issue with code generation (also mentioned in #16648): excess global mutable state. A second PR may be opened to resolve that issue. Author: ALeksander Eskilson <alek.eskilson@cerner.com> Closes #18075 from bdrillard/class_splitting_only.
159 lines
5.6 KiB
XML
159 lines
5.6 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
~ Licensed to the Apache Software Foundation (ASF) under one or more
|
|
~ contributor license agreements. See the NOTICE file distributed with
|
|
~ this work for additional information regarding copyright ownership.
|
|
~ The ASF licenses this file to You under the Apache License, Version 2.0
|
|
~ (the "License"); you may not use this file except in compliance with
|
|
~ the License. You may obtain a copy of the License at
|
|
~
|
|
~ http://www.apache.org/licenses/LICENSE-2.0
|
|
~
|
|
~ Unless required by applicable law or agreed to in writing, software
|
|
~ distributed under the License is distributed on an "AS IS" BASIS,
|
|
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
~ See the License for the specific language governing permissions and
|
|
~ limitations under the License.
|
|
-->
|
|
|
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
<modelVersion>4.0.0</modelVersion>
|
|
<parent>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-parent_2.11</artifactId>
|
|
<version>2.3.0-SNAPSHOT</version>
|
|
<relativePath>../../pom.xml</relativePath>
|
|
</parent>
|
|
|
|
<artifactId>spark-catalyst_2.11</artifactId>
|
|
<packaging>jar</packaging>
|
|
<name>Spark Project Catalyst</name>
|
|
<url>http://spark.apache.org/</url>
|
|
<properties>
|
|
<sbt.project.name>catalyst</sbt.project.name>
|
|
</properties>
|
|
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.scala-lang</groupId>
|
|
<artifactId>scala-reflect</artifactId>
|
|
</dependency>
|
|
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
<type>test-jar</type>
|
|
<scope>test</scope>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
|
</dependency>
|
|
|
|
<!--
|
|
This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
|
|
them will yield errors.
|
|
-->
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-tags_${scala.binary.version}</artifactId>
|
|
<type>test-jar</type>
|
|
<scope>test</scope>
|
|
</dependency>
|
|
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-unsafe_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.spark</groupId>
|
|
<artifactId>spark-sketch_${scala.binary.version}</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.scalacheck</groupId>
|
|
<artifactId>scalacheck_${scala.binary.version}</artifactId>
|
|
<scope>test</scope>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.codehaus.janino</groupId>
|
|
<artifactId>janino</artifactId>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.codehaus.janino</groupId>
|
|
<artifactId>commons-compiler</artifactId>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.antlr</groupId>
|
|
<artifactId>antlr4-runtime</artifactId>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>commons-codec</groupId>
|
|
<artifactId>commons-codec</artifactId>
|
|
</dependency>
|
|
</dependencies>
|
|
<build>
|
|
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
|
|
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
|
|
<plugins>
|
|
<!--
|
|
This plugin forces the generation of jar containing catalyst test classes,
|
|
so that the tests classes of external modules can use them. The two execution profiles
|
|
are necessary - first one for 'mvn package', second one for 'mvn test-compile'. Ideally,
|
|
'mvn compile' should not compile test classes and therefore should not need this.
|
|
However, an open Maven bug (http://jira.codehaus.org/browse/MNG-3559)
|
|
causes the compilation to fail if catalyst test-jar is not generated. Hence, the
|
|
second execution profile for 'mvn test-compile'.
|
|
-->
|
|
<plugin>
|
|
<groupId>org.apache.maven.plugins</groupId>
|
|
<artifactId>maven-jar-plugin</artifactId>
|
|
<executions>
|
|
<execution>
|
|
<goals>
|
|
<goal>test-jar</goal>
|
|
</goals>
|
|
</execution>
|
|
<execution>
|
|
<id>test-jar-on-test-compile</id>
|
|
<phase>test-compile</phase>
|
|
<goals>
|
|
<goal>test-jar</goal>
|
|
</goals>
|
|
</execution>
|
|
</executions>
|
|
</plugin>
|
|
<plugin>
|
|
<groupId>org.scalatest</groupId>
|
|
<artifactId>scalatest-maven-plugin</artifactId>
|
|
<configuration>
|
|
<argLine>-Xmx4g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
|
|
</configuration>
|
|
</plugin>
|
|
<plugin>
|
|
<groupId>org.antlr</groupId>
|
|
<artifactId>antlr4-maven-plugin</artifactId>
|
|
<executions>
|
|
<execution>
|
|
<goals>
|
|
<goal>antlr4</goal>
|
|
</goals>
|
|
</execution>
|
|
</executions>
|
|
<configuration>
|
|
<visitor>true</visitor>
|
|
<sourceDirectory>../catalyst/src/main/antlr4</sourceDirectory>
|
|
</configuration>
|
|
</plugin>
|
|
</plugins>
|
|
</build>
|
|
</project>
|