[SPARK-10836] [SPARKR] Added sort(x, decreasing, col, ... ) method to DataFrame
the sort function can be used as an alternative to arrange(... ). As arguments it accepts x - dataframe, decreasing - TRUE/FALSE, a list of orderings for columns and the list of columns, represented as string names for example: sort(df, TRUE, "col1","col2","col3","col5") # for example, if we want to sort some of the columns in the same order sort(df, decreasing=TRUE, "col1") sort(df, decreasing=c(TRUE,FALSE), "col1","col2") Author: Narine Kokhlikyan <narine.kokhlikyan@gmail.com> Closes #8920 from NarineK/sparkrsort.
This commit is contained in:
parent
56a9692fc0
commit
e8f90d9dda
|
@ -1298,8 +1298,10 @@ setClassUnion("characterOrColumn", c("character", "Column"))
|
|||
#' Sort a DataFrame by the specified column(s).
|
||||
#'
|
||||
#' @param x A DataFrame to be sorted.
|
||||
#' @param col Either a Column object or character vector indicating the field to sort on
|
||||
#' @param col A character or Column object vector indicating the fields to sort on
|
||||
#' @param ... Additional sorting fields
|
||||
#' @param decreasing A logical argument indicating sorting order for columns when
|
||||
#' a character vector is specified for col
|
||||
#' @return A DataFrame where all elements are sorted.
|
||||
#' @rdname arrange
|
||||
#' @name arrange
|
||||
|
@ -1312,23 +1314,52 @@ setClassUnion("characterOrColumn", c("character", "Column"))
|
|||
#' path <- "path/to/file.json"
|
||||
#' df <- jsonFile(sqlContext, path)
|
||||
#' arrange(df, df$col1)
|
||||
#' arrange(df, "col1")
|
||||
#' arrange(df, asc(df$col1), desc(abs(df$col2)))
|
||||
#' arrange(df, "col1", decreasing = TRUE)
|
||||
#' arrange(df, "col1", "col2", decreasing = c(TRUE, FALSE))
|
||||
#' }
|
||||
setMethod("arrange",
|
||||
signature(x = "DataFrame", col = "characterOrColumn"),
|
||||
signature(x = "DataFrame", col = "Column"),
|
||||
function(x, col, ...) {
|
||||
if (class(col) == "character") {
|
||||
sdf <- callJMethod(x@sdf, "sort", col, list(...))
|
||||
} else if (class(col) == "Column") {
|
||||
jcols <- lapply(list(col, ...), function(c) {
|
||||
c@jc
|
||||
})
|
||||
sdf <- callJMethod(x@sdf, "sort", jcols)
|
||||
}
|
||||
|
||||
sdf <- callJMethod(x@sdf, "sort", jcols)
|
||||
dataFrame(sdf)
|
||||
})
|
||||
|
||||
#' @rdname arrange
|
||||
#' @export
|
||||
setMethod("arrange",
|
||||
signature(x = "DataFrame", col = "character"),
|
||||
function(x, col, ..., decreasing = FALSE) {
|
||||
|
||||
# all sorting columns
|
||||
by <- list(col, ...)
|
||||
|
||||
if (length(decreasing) == 1) {
|
||||
# in case only 1 boolean argument - decreasing value is specified,
|
||||
# it will be used for all columns
|
||||
decreasing <- rep(decreasing, length(by))
|
||||
} else if (length(decreasing) != length(by)) {
|
||||
stop("Arguments 'col' and 'decreasing' must have the same length")
|
||||
}
|
||||
|
||||
# builds a list of columns of type Column
|
||||
# example: [[1]] Column Species ASC
|
||||
# [[2]] Column Petal_Length DESC
|
||||
jcols <- lapply(seq_len(length(decreasing)), function(i){
|
||||
if (decreasing[[i]]) {
|
||||
desc(getColumn(x, by[[i]]))
|
||||
} else {
|
||||
asc(getColumn(x, by[[i]]))
|
||||
}
|
||||
})
|
||||
|
||||
do.call("arrange", c(x, jcols))
|
||||
})
|
||||
|
||||
#' @rdname arrange
|
||||
#' @name orderby
|
||||
setMethod("orderBy",
|
||||
|
|
|
@ -989,7 +989,7 @@ test_that("arrange() and orderBy() on a DataFrame", {
|
|||
sorted <- arrange(df, df$age)
|
||||
expect_equal(collect(sorted)[1,2], "Michael")
|
||||
|
||||
sorted2 <- arrange(df, "name")
|
||||
sorted2 <- arrange(df, "name", decreasing = FALSE)
|
||||
expect_equal(collect(sorted2)[2,"age"], 19)
|
||||
|
||||
sorted3 <- orderBy(df, asc(df$age))
|
||||
|
@ -999,6 +999,15 @@ test_that("arrange() and orderBy() on a DataFrame", {
|
|||
sorted4 <- orderBy(df, desc(df$name))
|
||||
expect_equal(first(sorted4)$name, "Michael")
|
||||
expect_equal(collect(sorted4)[3,"name"], "Andy")
|
||||
|
||||
sorted5 <- arrange(df, "age", "name", decreasing = TRUE)
|
||||
expect_equal(collect(sorted5)[1,2], "Andy")
|
||||
|
||||
sorted6 <- arrange(df, "age","name", decreasing = c(T, F))
|
||||
expect_equal(collect(sorted6)[1,2], "Andy")
|
||||
|
||||
sorted7 <- arrange(df, "name", decreasing = FALSE)
|
||||
expect_equal(collect(sorted7)[2,"age"], 19)
|
||||
})
|
||||
|
||||
test_that("filter() on a DataFrame", {
|
||||
|
|
Loading…
Reference in a new issue