From c6e574213d47357aefc82347b73d925de47140b5 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Sat, 23 May 2015 09:07:14 -0700 Subject: [PATCH] [SPARK-7840] add insertInto() to Writer Add tests later. Author: Davies Liu Closes #6375 from davies/insertInto and squashes the following commits: 826423e [Davies Liu] add insertInto() to Writer (cherry picked from commit be47af1bdba469f84775c2b5936f8cb956c7c02b) Signed-off-by: Davies Liu --- python/pyspark/sql/dataframe.py | 2 +- python/pyspark/sql/readwriter.py | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 55cad8238e..936487519a 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -163,7 +163,7 @@ class DataFrame(object): Optionally overwriting any existing data. """ - self._jdf.insertInto(tableName, overwrite) + self.write.insertInto(tableName, overwrite) @since(1.3) def saveAsTable(self, tableName, source=None, mode="error", **options): diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 02b3aab2b1..b6fd413bec 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -226,17 +226,25 @@ class DataFrameWriter(object): else: jwrite.save(path) + def insertInto(self, tableName, overwrite=False): + """ + Inserts the content of the :class:`DataFrame` to the specified table. + It requires that the schema of the class:`DataFrame` is the same as the + schema of the table. + + Optionally overwriting any existing data. + """ + self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName) + @since(1.4) def saveAsTable(self, name, format=None, mode="error", **options): """ - Saves the contents of this :class:`DataFrame` to a data source as a table. + Saves the content of the :class:`DataFrame` as the specified table. - The data source is specified by the ``source`` and a set of ``options``. - If ``source`` is not specified, the default data source configured by - ``spark.sql.sources.default`` will be used. - - Additionally, mode is used to specify the behavior of the saveAsTable operation when - table already exists in the data source. There are four modes: + In the case the table already exists, behavior of this function depends on the + save mode, specified by the `mode` function (default to throwing an exception). + When `mode` is `Overwrite`, the schema of the [[DataFrame]] does not need to be + the same as that of the existing table. * `append`: Append contents of this :class:`DataFrame` to existing data. * `overwrite`: Overwrite existing data.