[SPARK-10380][SQL] Fix confusing documentation examples for astype/drop_duplicates.
## What changes were proposed in this pull request? We have seen users getting confused by the documentation for astype and drop_duplicates, because the examples in them do not use these functions (but do uses their aliases). This patch simply removes all examples for these functions, and say that they are aliases. ## How was this patch tested? Existing PySpark unit tests. Closes #11543. Author: Reynold Xin <rxin@databricks.com> Closes #11698 from rxin/SPARK-10380.
This commit is contained in:
parent
4bf4609795
commit
8e0b030606
|
@ -37,6 +37,8 @@ Public classes:
|
|||
|
||||
"""
|
||||
|
||||
import types
|
||||
|
||||
from pyspark.conf import SparkConf
|
||||
from pyspark.context import SparkContext
|
||||
from pyspark.rdd import RDD
|
||||
|
@ -64,6 +66,24 @@ def since(version):
|
|||
return deco
|
||||
|
||||
|
||||
def copy_func(f, name=None, sinceversion=None, doc=None):
|
||||
"""
|
||||
Returns a function with same code, globals, defaults, closure, and
|
||||
name (or provide a new name).
|
||||
"""
|
||||
# See
|
||||
# http://stackoverflow.com/questions/6527633/how-can-i-make-a-deepcopy-of-a-function-in-python
|
||||
fn = types.FunctionType(f.__code__, f.__globals__, name or f.__name__, f.__defaults__,
|
||||
f.__closure__)
|
||||
# in case f was given attrs (note this dict is a shallow copy):
|
||||
fn.__dict__.update(f.__dict__)
|
||||
if doc is not None:
|
||||
fn.__doc__ = doc
|
||||
if sinceversion is not None:
|
||||
fn = since(sinceversion)(fn)
|
||||
return fn
|
||||
|
||||
|
||||
# for back compatibility
|
||||
from pyspark.sql import SQLContext, HiveContext, Row
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ if sys.version >= '3':
|
|||
basestring = str
|
||||
long = int
|
||||
|
||||
from pyspark import since
|
||||
from pyspark import copy_func, since
|
||||
from pyspark.context import SparkContext
|
||||
from pyspark.rdd import ignore_unicode_prefix
|
||||
from pyspark.sql.types import *
|
||||
|
@ -337,7 +337,7 @@ class Column(object):
|
|||
raise TypeError("unexpected type: %s" % type(dataType))
|
||||
return Column(jc)
|
||||
|
||||
astype = cast
|
||||
astype = copy_func(cast, sinceversion=1.4, doc=":func:`astype` is an alias for :func:`cast`.")
|
||||
|
||||
@since(1.3)
|
||||
def between(self, lowerBound, upperBound):
|
||||
|
|
|
@ -26,7 +26,7 @@ if sys.version >= '3':
|
|||
else:
|
||||
from itertools import imap as map
|
||||
|
||||
from pyspark import since
|
||||
from pyspark import copy_func, since
|
||||
from pyspark.rdd import RDD, _load_from_socket, ignore_unicode_prefix
|
||||
from pyspark.serializers import BatchedSerializer, PickleSerializer, UTF8Deserializer
|
||||
from pyspark.storagelevel import StorageLevel
|
||||
|
@ -829,8 +829,6 @@ class DataFrame(object):
|
|||
raise TypeError("condition should be string or Column")
|
||||
return DataFrame(jdf, self.sql_ctx)
|
||||
|
||||
where = filter
|
||||
|
||||
@ignore_unicode_prefix
|
||||
@since(1.3)
|
||||
def groupBy(self, *cols):
|
||||
|
@ -1361,8 +1359,20 @@ class DataFrame(object):
|
|||
# Pandas compatibility
|
||||
##########################################################################################
|
||||
|
||||
groupby = groupBy
|
||||
drop_duplicates = dropDuplicates
|
||||
groupby = copy_func(
|
||||
groupBy,
|
||||
sinceversion=1.4,
|
||||
doc=":func:`groupby` is an alias for :func:`groupBy`.")
|
||||
|
||||
drop_duplicates = copy_func(
|
||||
dropDuplicates,
|
||||
sinceversion=1.4,
|
||||
doc=":func:`drop_duplicates` is an alias for :func:`dropDuplicates`.")
|
||||
|
||||
where = copy_func(
|
||||
filter,
|
||||
sinceversion=1.3,
|
||||
doc=":func:`where` is an alias for :func:`filter`.")
|
||||
|
||||
|
||||
def _to_scala_map(sc, jm):
|
||||
|
|
Loading…
Reference in a new issue