diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index bd3411d8d7..1e85551990 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2491,6 +2491,16 @@ class Dataset[T] private[sql]( } } + /** + * Returns a new Dataset by updating an existing column with metadata. + * + * @group untypedrel + * @since 3.3.0 + */ + def withMetadata(columnName: String, metadata: Metadata): DataFrame = { + withColumn(columnName, col(columnName), metadata) + } + /** * Returns a new Dataset with a column dropped. This is a no-op if schema doesn't contain * column name. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index bf161a1080..54fb90aa76 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -702,6 +702,18 @@ class DataFrameSuite extends QueryTest "The size of column names: 2 isn't equal to the size of metadata elements: 1")) } + test("SPARK-36642: withMetadata: replace metadata of a column") { + val metadata = new MetadataBuilder().putLong("key", 1L).build() + val df1 = sparkContext.parallelize(Array(1, 2, 3)).toDF("x") + val df2 = df1.withMetadata("x", metadata) + assert(df2.schema(0).metadata === metadata) + + val err = intercept[AnalysisException] { + df1.withMetadata("x1", metadata) + } + assert(err.getMessage.contains("Cannot resolve column name")) + } + test("replace column using withColumn") { val df2 = sparkContext.parallelize(Array(1, 2, 3)).toDF("x") val df3 = df2.withColumn("x", df2("x") + 1)