[SPARK-10836] [SPARKR] Added sort(x, decreasing, col, ... ) method to DataFrame

the sort function can be used as an alternative to arrange(... ).
As arguments it accepts x - dataframe, decreasing - TRUE/FALSE, a list of orderings for columns and the list of columns, represented as string names

for example:
sort(df, TRUE, "col1","col2","col3","col5") # for example, if we want to sort some of the columns in the same order

sort(df, decreasing=TRUE, "col1")
sort(df, decreasing=c(TRUE,FALSE), "col1","col2")

Author: Narine Kokhlikyan <narine.kokhlikyan@gmail.com>

Closes #8920 from NarineK/sparkrsort.
This commit is contained in:
Narine Kokhlikyan 2015-10-08 09:53:44 -07:00 committed by Shivaram Venkataraman
parent 56a9692fc0
commit e8f90d9dda
2 changed files with 49 additions and 9 deletions

View file

@ -1298,8 +1298,10 @@ setClassUnion("characterOrColumn", c("character", "Column"))
#' Sort a DataFrame by the specified column(s).
#'
#' @param x A DataFrame to be sorted.
#' @param col Either a Column object or character vector indicating the field to sort on
#' @param col A character or Column object vector indicating the fields to sort on
#' @param ... Additional sorting fields
#' @param decreasing A logical argument indicating sorting order for columns when
#' a character vector is specified for col
#' @return A DataFrame where all elements are sorted.
#' @rdname arrange
#' @name arrange
@ -1312,23 +1314,52 @@ setClassUnion("characterOrColumn", c("character", "Column"))
#' path <- "path/to/file.json"
#' df <- jsonFile(sqlContext, path)
#' arrange(df, df$col1)
#' arrange(df, "col1")
#' arrange(df, asc(df$col1), desc(abs(df$col2)))
#' arrange(df, "col1", decreasing = TRUE)
#' arrange(df, "col1", "col2", decreasing = c(TRUE, FALSE))
#' }
setMethod("arrange",
signature(x = "DataFrame", col = "characterOrColumn"),
signature(x = "DataFrame", col = "Column"),
function(x, col, ...) {
if (class(col) == "character") {
sdf <- callJMethod(x@sdf, "sort", col, list(...))
} else if (class(col) == "Column") {
jcols <- lapply(list(col, ...), function(c) {
c@jc
})
sdf <- callJMethod(x@sdf, "sort", jcols)
}
sdf <- callJMethod(x@sdf, "sort", jcols)
dataFrame(sdf)
})
#' @rdname arrange
#' @export
setMethod("arrange",
signature(x = "DataFrame", col = "character"),
function(x, col, ..., decreasing = FALSE) {
# all sorting columns
by <- list(col, ...)
if (length(decreasing) == 1) {
# in case only 1 boolean argument - decreasing value is specified,
# it will be used for all columns
decreasing <- rep(decreasing, length(by))
} else if (length(decreasing) != length(by)) {
stop("Arguments 'col' and 'decreasing' must have the same length")
}
# builds a list of columns of type Column
# example: [[1]] Column Species ASC
# [[2]] Column Petal_Length DESC
jcols <- lapply(seq_len(length(decreasing)), function(i){
if (decreasing[[i]]) {
desc(getColumn(x, by[[i]]))
} else {
asc(getColumn(x, by[[i]]))
}
})
do.call("arrange", c(x, jcols))
})
#' @rdname arrange
#' @name orderby
setMethod("orderBy",

View file

@ -989,7 +989,7 @@ test_that("arrange() and orderBy() on a DataFrame", {
sorted <- arrange(df, df$age)
expect_equal(collect(sorted)[1,2], "Michael")
sorted2 <- arrange(df, "name")
sorted2 <- arrange(df, "name", decreasing = FALSE)
expect_equal(collect(sorted2)[2,"age"], 19)
sorted3 <- orderBy(df, asc(df$age))
@ -999,6 +999,15 @@ test_that("arrange() and orderBy() on a DataFrame", {
sorted4 <- orderBy(df, desc(df$name))
expect_equal(first(sorted4)$name, "Michael")
expect_equal(collect(sorted4)[3,"name"], "Andy")
sorted5 <- arrange(df, "age", "name", decreasing = TRUE)
expect_equal(collect(sorted5)[1,2], "Andy")
sorted6 <- arrange(df, "age","name", decreasing = c(T, F))
expect_equal(collect(sorted6)[1,2], "Andy")
sorted7 <- arrange(df, "name", decreasing = FALSE)
expect_equal(collect(sorted7)[2,"age"], 19)
})
test_that("filter() on a DataFrame", {