[SPARK-7060][SQL] Add alias function to python dataframe

This pr tries to provide a way to let python users workaround https://issues.apache.org/jira/browse/SPARK-6231.

Author: Yin Huai <yhuai@databricks.com>

Closes #5634 from yhuai/pythonDFAlias and squashes the following commits:

8465acd [Yin Huai] Add an alias to a Python DF.
This commit is contained in:
Yin Huai 2015-04-23 18:52:55 -07:00 committed by Reynold Xin
parent 336f7f5373
commit 2d010f7afe

View file

@ -452,6 +452,20 @@ class DataFrame(object):
"""
return [f.name for f in self.schema.fields]
@ignore_unicode_prefix
def alias(self, alias):
"""Returns a new :class:`DataFrame` with an alias set.
>>> from pyspark.sql.functions import *
>>> df_as1 = df.alias("df_as1")
>>> df_as2 = df.alias("df_as2")
>>> joined_df = df_as1.join(df_as2, col("df_as1.name") == col("df_as2.name"), 'inner')
>>> joined_df.select(col("df_as1.name"), col("df_as2.name"), col("df_as2.age")).collect()
[Row(name=u'Alice', name=u'Alice', age=2), Row(name=u'Bob', name=u'Bob', age=5)]
"""
assert isinstance(alias, basestring), "alias should be a string"
return DataFrame(getattr(self._jdf, "as")(alias), self.sql_ctx)
@ignore_unicode_prefix
def join(self, other, joinExprs=None, joinType=None):
"""Joins with another :class:`DataFrame`, using the given join expression.