[SPARK-15109][SQL] Accept Dataset[_] in joins
## What changes were proposed in this pull request? This patch changes the join API in Dataset so they can accept any Dataset, rather than just DataFrames. ## How was this patch tested? N/A. Author: Reynold Xin <rxin@databricks.com> Closes #12886 from rxin/SPARK-15109.
This commit is contained in:
parent
e597ec6f1c
commit
d864c55cf8
|
@ -564,7 +564,7 @@ class Dataset[T] private[sql](
|
|||
* @group untypedrel
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def join(right: DataFrame): DataFrame = withPlan {
|
||||
def join(right: Dataset[_]): DataFrame = withPlan {
|
||||
Join(logicalPlan, right.logicalPlan, joinType = Inner, None)
|
||||
}
|
||||
|
||||
|
@ -589,7 +589,7 @@ class Dataset[T] private[sql](
|
|||
* @group untypedrel
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def join(right: DataFrame, usingColumn: String): DataFrame = {
|
||||
def join(right: Dataset[_], usingColumn: String): DataFrame = {
|
||||
join(right, Seq(usingColumn))
|
||||
}
|
||||
|
||||
|
@ -614,7 +614,7 @@ class Dataset[T] private[sql](
|
|||
* @group untypedrel
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def join(right: DataFrame, usingColumns: Seq[String]): DataFrame = {
|
||||
def join(right: Dataset[_], usingColumns: Seq[String]): DataFrame = {
|
||||
join(right, usingColumns, "inner")
|
||||
}
|
||||
|
||||
|
@ -635,7 +635,7 @@ class Dataset[T] private[sql](
|
|||
* @group untypedrel
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def join(right: DataFrame, usingColumns: Seq[String], joinType: String): DataFrame = {
|
||||
def join(right: Dataset[_], usingColumns: Seq[String], joinType: String): DataFrame = {
|
||||
// Analyze the self join. The assumption is that the analyzer will disambiguate left vs right
|
||||
// by creating a new instance for one of the branch.
|
||||
val joined = sparkSession.executePlan(
|
||||
|
@ -663,7 +663,7 @@ class Dataset[T] private[sql](
|
|||
* @group untypedrel
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def join(right: DataFrame, joinExprs: Column): DataFrame = join(right, joinExprs, "inner")
|
||||
def join(right: Dataset[_], joinExprs: Column): DataFrame = join(right, joinExprs, "inner")
|
||||
|
||||
/**
|
||||
* Join with another [[DataFrame]], using the given join expression. The following performs
|
||||
|
@ -686,7 +686,7 @@ class Dataset[T] private[sql](
|
|||
* @group untypedrel
|
||||
* @since 2.0.0
|
||||
*/
|
||||
def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = {
|
||||
def join(right: Dataset[_], joinExprs: Column, joinType: String): DataFrame = {
|
||||
// Note that in this function, we introduce a hack in the case of self-join to automatically
|
||||
// resolve ambiguous join conditions into ones that might make sense [SPARK-6231].
|
||||
// Consider this case: df.join(df, df("key") === df("key"))
|
||||
|
|
|
@ -931,8 +931,8 @@ object functions {
|
|||
* @group normal_funcs
|
||||
* @since 1.5.0
|
||||
*/
|
||||
def broadcast(df: DataFrame): DataFrame = {
|
||||
Dataset.ofRows(df.sparkSession, BroadcastHint(df.logicalPlan))
|
||||
def broadcast[T](df: Dataset[T]): Dataset[T] = {
|
||||
Dataset[T](df.sparkSession, BroadcastHint(df.logicalPlan))(df.unresolvedTEncoder)
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue