spark-instrumented-optimizer/python/pyspark/sql/avro/functions.py

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""
A collections of builtin avro functions
"""


from pyspark import since, SparkContext
from pyspark.rdd import ignore_unicode_prefix
from pyspark.sql.column import Column, _to_java_column
from pyspark.util import _print_missing_jar


@ignore_unicode_prefix
@since(3.0)
def from_avro(data, jsonFormatSchema, options={}):
    """
    Converts a binary column of Avro format into its corresponding catalyst value.
    The specified schema must match the read data, otherwise the behavior is undefined:
    it may fail or return arbitrary result.
    To deserialize the data with a compatible and evolved schema, the expected Avro schema can be
    set via the option avroSchema.

    Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the
    application as per the deployment section of "Apache Avro Data Source Guide".

    :param data: the binary column.
    :param jsonFormatSchema: the avro schema in JSON string format.
    :param options: options to control how the Avro record is parsed.

    >>> from pyspark.sql import Row
    >>> from pyspark.sql.avro.functions import from_avro, to_avro
    >>> data = [(1, Row(name='Alice', age=2))]
    >>> df = spark.createDataFrame(data, ("key", "value"))
    >>> avroDf = df.select(to_avro(df.value).alias("avro"))
    >>> avroDf.collect()
    [Row(avro=bytearray(b'\\x00\\x00\\x04\\x00\\nAlice'))]
    >>> jsonFormatSchema = '''{"type":"record","name":"topLevelRecord","fields":
    ...     [{"name":"avro","type":[{"type":"record","name":"value","namespace":"topLevelRecord",
    ...     "fields":[{"name":"age","type":["long","null"]},
    ...     {"name":"name","type":["string","null"]}]},"null"]}]}'''
    >>> avroDf.select(from_avro(avroDf.avro, jsonFormatSchema).alias("value")).collect()
    [Row(value=Row(avro=Row(age=2, name=u'Alice')))]
    """

    sc = SparkContext._active_spark_context
    try:
        jc = sc._jvm.org.apache.spark.sql.avro.functions.from_avro(
            _to_java_column(data), jsonFormatSchema, options)
    except TypeError as e:
        if str(e) == "'JavaPackage' object is not callable":
            _print_missing_jar("Avro", "avro", "avro", sc.version)
        raise
    return Column(jc)


@ignore_unicode_prefix
@since(3.0)
def to_avro(data, jsonFormatSchema=""):
    """
    Converts a column into binary of avro format.

    Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the
    application as per the deployment section of "Apache Avro Data Source Guide".

    :param data: the data column.
    :param jsonFormatSchema: user-specified output avro schema in JSON string format.

    >>> from pyspark.sql import Row
    >>> from pyspark.sql.avro.functions import to_avro
    >>> data = ['SPADES']
    >>> df = spark.createDataFrame(data, "string")
    >>> df.select(to_avro(df.value).alias("suite")).collect()
    [Row(suite=bytearray(b'\\x00\\x0cSPADES'))]
    >>> jsonFormatSchema = '''["null", {"type": "enum", "name": "value",
    ...     "symbols": ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]}]'''
    >>> df.select(to_avro(df.value, jsonFormatSchema).alias("suite")).collect()
    [Row(suite=bytearray(b'\\x02\\x00'))]
    """

    sc = SparkContext._active_spark_context
    try:
        if jsonFormatSchema == "":
            jc = sc._jvm.org.apache.spark.sql.avro.functions.to_avro(_to_java_column(data))
        else:
            jc = sc._jvm.org.apache.spark.sql.avro.functions.to_avro(
                _to_java_column(data), jsonFormatSchema)
    except TypeError as e:
        if str(e) == "'JavaPackage' object is not callable":
            _print_missing_jar("Avro", "avro", "avro", sc.version)
        raise
    return Column(jc)


def _test():
    import os
    import sys
    from pyspark.testing.utils import search_jar
    avro_jar = search_jar("external/avro", "spark-avro", "spark-avro")
    if avro_jar is None:
        print(
            "Skipping all Avro Python tests as the optional Avro project was "
            "not compiled into a JAR. To run these tests, "
            "you need to build Spark with 'build/sbt -Pavro package' or "
            "'build/mvn -Pavro package' before running this test.")
        sys.exit(0)
    else:
        existing_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
        jars_args = "--jars %s" % avro_jar
        os.environ["PYSPARK_SUBMIT_ARGS"] = " ".join([jars_args, existing_args])

    import doctest
    from pyspark.sql import Row, SparkSession
    import pyspark.sql.avro.functions
    globs = pyspark.sql.avro.functions.__dict__.copy()
    spark = SparkSession.builder\
        .master("local[4]")\
        .appName("sql.avro.functions tests")\
        .getOrCreate()
    globs['spark'] = spark
    (failure_count, test_count) = doctest.testmod(
        pyspark.sql.avro.functions, globs=globs,
        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
    spark.stop()
    if failure_count:
        sys.exit(-1)


if __name__ == "__main__":
    _test()
[SPARK-26856][PYSPARK] Python support for from_avro and to_avro APIs ## What changes were proposed in this pull request? Avro is built-in but external data source module since Spark 2.4 but `from_avro` and `to_avro` APIs not yet supported in pyspark. In this PR I've made them available from pyspark. ## How was this patch tested? Please see the python API examples what I've added. cd docs/ SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 jekyll build Manual webpage check. Closes #23797 from gaborgsomogyi/SPARK-26856. Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-03-10 21:15:07 -04:00			`#`
			`# Licensed to the Apache Software Foundation (ASF) under one or more`
			`# contributor license agreements. See the NOTICE file distributed with`
			`# this work for additional information regarding copyright ownership.`
			`# The ASF licenses this file to You under the Apache License, Version 2.0`
			`# (the "License"); you may not use this file except in compliance with`
			`# the License. You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`#`

			`"""`
			`A collections of builtin avro functions`
			`"""`


			`from pyspark import since, SparkContext`
			`from pyspark.rdd import ignore_unicode_prefix`
			`from pyspark.sql.column import Column, _to_java_column`
			`from pyspark.util import _print_missing_jar`


			`@ignore_unicode_prefix`
			`@since(3.0)`
			`def from_avro(data, jsonFormatSchema, options={}):`
			`"""`
[SPARK-27506][SQL][FOLLOWUP] Use option `avroSchema` to specify an evolved schema in `from_avro` ### What changes were proposed in this pull request? This is a follow-up of https://github.com/apache/spark/pull/26780 In https://github.com/apache/spark/pull/26780, a new Avro data source option `actualSchema` is introduced for setting the original Avro schema in function `from_avro`, while the expected schema is supposed to be set in the parameter `jsonFormatSchema` of `from_avro`. However, there is another Avro data source option `avroSchema`. It is used for setting the expected schema in readiong and writing. This PR is to use the option `avroSchema` option for reading Avro data with an evolved schema and remove the new one `actualSchema` ### Why are the changes needed? Unify and simplify the Avro data source options. ### Does this PR introduce any user-facing change? Yes. To deserialize Avro data with an evolved schema, before changes: ``` from_avro('col, expectedSchema, ("actualSchema" -> actualSchema)) ``` After changes: ``` from_avro('col, actualSchema, ("avroSchema" -> expectedSchema)) ``` The second parameter is always the actual Avro schema after changes. ### How was this patch tested? Update the existing tests in https://github.com/apache/spark/pull/26780 Closes #27045 from gengliangwang/renameAvroOption. Authored-by: Gengliang Wang <gengliang.wang@databricks.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org> 2019-12-30 04:14:21 -05:00			`Converts a binary column of Avro format into its corresponding catalyst value.`
			`The specified schema must match the read data, otherwise the behavior is undefined:`
			`it may fail or return arbitrary result.`
			`To deserialize the data with a compatible and evolved schema, the expected Avro schema can be`
			`set via the option avroSchema.`
[SPARK-26856][PYSPARK] Python support for from_avro and to_avro APIs ## What changes were proposed in this pull request? Avro is built-in but external data source module since Spark 2.4 but `from_avro` and `to_avro` APIs not yet supported in pyspark. In this PR I've made them available from pyspark. ## How was this patch tested? Please see the python API examples what I've added. cd docs/ SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 jekyll build Manual webpage check. Closes #23797 from gaborgsomogyi/SPARK-26856. Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-03-10 21:15:07 -04:00
			`Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the`
			`application as per the deployment section of "Apache Avro Data Source Guide".`

			`:param data: the binary column.`
			`:param jsonFormatSchema: the avro schema in JSON string format.`
			`:param options: options to control how the Avro record is parsed.`

			`>>> from pyspark.sql import Row`
			`>>> from pyspark.sql.avro.functions import from_avro, to_avro`
			`>>> data = [(1, Row(name='Alice', age=2))]`
			`>>> df = spark.createDataFrame(data, ("key", "value"))`
			`>>> avroDf = df.select(to_avro(df.value).alias("avro"))`
			`>>> avroDf.collect()`
			`[Row(avro=bytearray(b'\\x00\\x00\\x04\\x00\\nAlice'))]`
			`>>> jsonFormatSchema = '''{"type":"record","name":"topLevelRecord","fields":`
			`... [{"name":"avro","type":[{"type":"record","name":"value","namespace":"topLevelRecord",`
			`... "fields":[{"name":"age","type":["long","null"]},`
			`... {"name":"name","type":["string","null"]}]},"null"]}]}'''`
			`>>> avroDf.select(from_avro(avroDf.avro, jsonFormatSchema).alias("value")).collect()`
			`[Row(value=Row(avro=Row(age=2, name=u'Alice')))]`
			`"""`

			`sc = SparkContext._active_spark_context`
			`try:`
			`jc = sc._jvm.org.apache.spark.sql.avro.functions.from_avro(`
			`_to_java_column(data), jsonFormatSchema, options)`
			`except TypeError as e:`
			`if str(e) == "'JavaPackage' object is not callable":`
			`_print_missing_jar("Avro", "avro", "avro", sc.version)`
			`raise`
			`return Column(jc)`


			`@ignore_unicode_prefix`
			`@since(3.0)`
[SPARK-28698][SQL] Support user-specified output schema in `to_avro` ## What changes were proposed in this pull request? The mapping of Spark schema to Avro schema is many-to-many. (See https://spark.apache.org/docs/latest/sql-data-sources-avro.html#supported-types-for-spark-sql---avro-conversion) The default schema mapping might not be exactly what users want. For example, by default, a "string" column is always written as "string" Avro type, but users might want to output the column as "enum" Avro type. With PR https://github.com/apache/spark/pull/21847, Spark supports user-specified schema in the batch writer. For the function `to_avro`, we should support user-specified output schema as well. ## How was this patch tested? Unit test. Closes #25419 from gengliangwang/to_avro. Authored-by: Gengliang Wang <gengliang.wang@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com> 2019-08-13 08:52:16 -04:00			`def to_avro(data, jsonFormatSchema=""):`
[SPARK-26856][PYSPARK] Python support for from_avro and to_avro APIs ## What changes were proposed in this pull request? Avro is built-in but external data source module since Spark 2.4 but `from_avro` and `to_avro` APIs not yet supported in pyspark. In this PR I've made them available from pyspark. ## How was this patch tested? Please see the python API examples what I've added. cd docs/ SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 jekyll build Manual webpage check. Closes #23797 from gaborgsomogyi/SPARK-26856. Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-03-10 21:15:07 -04:00			`"""`
			`Converts a column into binary of avro format.`

			`Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the`
			`application as per the deployment section of "Apache Avro Data Source Guide".`

			`:param data: the data column.`
[SPARK-28698][SQL] Support user-specified output schema in `to_avro` ## What changes were proposed in this pull request? The mapping of Spark schema to Avro schema is many-to-many. (See https://spark.apache.org/docs/latest/sql-data-sources-avro.html#supported-types-for-spark-sql---avro-conversion) The default schema mapping might not be exactly what users want. For example, by default, a "string" column is always written as "string" Avro type, but users might want to output the column as "enum" Avro type. With PR https://github.com/apache/spark/pull/21847, Spark supports user-specified schema in the batch writer. For the function `to_avro`, we should support user-specified output schema as well. ## How was this patch tested? Unit test. Closes #25419 from gengliangwang/to_avro. Authored-by: Gengliang Wang <gengliang.wang@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com> 2019-08-13 08:52:16 -04:00			`:param jsonFormatSchema: user-specified output avro schema in JSON string format.`
[SPARK-26856][PYSPARK] Python support for from_avro and to_avro APIs ## What changes were proposed in this pull request? Avro is built-in but external data source module since Spark 2.4 but `from_avro` and `to_avro` APIs not yet supported in pyspark. In this PR I've made them available from pyspark. ## How was this patch tested? Please see the python API examples what I've added. cd docs/ SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 jekyll build Manual webpage check. Closes #23797 from gaborgsomogyi/SPARK-26856. Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-03-10 21:15:07 -04:00
			`>>> from pyspark.sql import Row`
			`>>> from pyspark.sql.avro.functions import to_avro`
[SPARK-28698][SQL] Support user-specified output schema in `to_avro` ## What changes were proposed in this pull request? The mapping of Spark schema to Avro schema is many-to-many. (See https://spark.apache.org/docs/latest/sql-data-sources-avro.html#supported-types-for-spark-sql---avro-conversion) The default schema mapping might not be exactly what users want. For example, by default, a "string" column is always written as "string" Avro type, but users might want to output the column as "enum" Avro type. With PR https://github.com/apache/spark/pull/21847, Spark supports user-specified schema in the batch writer. For the function `to_avro`, we should support user-specified output schema as well. ## How was this patch tested? Unit test. Closes #25419 from gengliangwang/to_avro. Authored-by: Gengliang Wang <gengliang.wang@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com> 2019-08-13 08:52:16 -04:00			`>>> data = ['SPADES']`
			`>>> df = spark.createDataFrame(data, "string")`
			`>>> df.select(to_avro(df.value).alias("suite")).collect()`
			`[Row(suite=bytearray(b'\\x00\\x0cSPADES'))]`
			`>>> jsonFormatSchema = '''["null", {"type": "enum", "name": "value",`
			`... "symbols": ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]}]'''`
			`>>> df.select(to_avro(df.value, jsonFormatSchema).alias("suite")).collect()`
			`[Row(suite=bytearray(b'\\x02\\x00'))]`
[SPARK-26856][PYSPARK] Python support for from_avro and to_avro APIs ## What changes were proposed in this pull request? Avro is built-in but external data source module since Spark 2.4 but `from_avro` and `to_avro` APIs not yet supported in pyspark. In this PR I've made them available from pyspark. ## How was this patch tested? Please see the python API examples what I've added. cd docs/ SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 jekyll build Manual webpage check. Closes #23797 from gaborgsomogyi/SPARK-26856. Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-03-10 21:15:07 -04:00			`"""`

			`sc = SparkContext._active_spark_context`
			`try:`
[SPARK-28698][SQL] Support user-specified output schema in `to_avro` ## What changes were proposed in this pull request? The mapping of Spark schema to Avro schema is many-to-many. (See https://spark.apache.org/docs/latest/sql-data-sources-avro.html#supported-types-for-spark-sql---avro-conversion) The default schema mapping might not be exactly what users want. For example, by default, a "string" column is always written as "string" Avro type, but users might want to output the column as "enum" Avro type. With PR https://github.com/apache/spark/pull/21847, Spark supports user-specified schema in the batch writer. For the function `to_avro`, we should support user-specified output schema as well. ## How was this patch tested? Unit test. Closes #25419 from gengliangwang/to_avro. Authored-by: Gengliang Wang <gengliang.wang@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com> 2019-08-13 08:52:16 -04:00			`if jsonFormatSchema == "":`
			`jc = sc._jvm.org.apache.spark.sql.avro.functions.to_avro(_to_java_column(data))`
			`else:`
			`jc = sc._jvm.org.apache.spark.sql.avro.functions.to_avro(`
			`_to_java_column(data), jsonFormatSchema)`
[SPARK-26856][PYSPARK] Python support for from_avro and to_avro APIs ## What changes were proposed in this pull request? Avro is built-in but external data source module since Spark 2.4 but `from_avro` and `to_avro` APIs not yet supported in pyspark. In this PR I've made them available from pyspark. ## How was this patch tested? Please see the python API examples what I've added. cd docs/ SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 jekyll build Manual webpage check. Closes #23797 from gaborgsomogyi/SPARK-26856. Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-03-10 21:15:07 -04:00			`except TypeError as e:`
			`if str(e) == "'JavaPackage' object is not callable":`
			`_print_missing_jar("Avro", "avro", "avro", sc.version)`
			`raise`
			`return Column(jc)`


			`def _test():`
			`import os`
			`import sys`
			`from pyspark.testing.utils import search_jar`
[SPARK-26856][PYSPARK][FOLLOWUP] Fix UT failure due to wrong patterns for Kinesis assembly ## What changes were proposed in this pull request? After [SPARK-26856](https://github.com/apache/spark/pull/23797), `Kinesis` Python UT fails with `Found multiple JARs` exception due to a wrong pattern. - https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/104171/console ``` Exception: Found multiple JARs: .../spark-streaming-kinesis-asl-assembly-3.0.0-SNAPSHOT.jar, .../spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT.jar; please remove all but one ``` It's because the pattern was changed in a wrong way. Original ```python kinesis_asl_assembly_dir, "target/scala-/%s-.jar" % name_prefix)) kinesis_asl_assembly_dir, "target/%s_.jar" % name_prefix)) ``` After SPARK-26856* ```python project_full_path, "target/scala-/%s.jar" % jar_name_prefix)) project_full_path, "target/%s.jar" % jar_name_prefix)) ``` The actual kinesis assembly jar files look like the followings. SBT Build* ``` -rw-r--r-- 1 dongjoon staff 87459461 Apr 1 19:01 spark-streaming-kinesis-asl-assembly-3.0.0-SNAPSHOT.jar -rw-r--r-- 1 dongjoon staff 309 Apr 1 18:58 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT-tests.jar -rw-r--r-- 1 dongjoon staff 309 Apr 1 18:58 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT.jar ``` MAVEN Build ``` -rw-r--r-- 1 dongjoon staff 8.6K Apr 1 18:55 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT-sources.jar -rw-r--r-- 1 dongjoon staff 8.6K Apr 1 18:55 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT-test-sources.jar -rw-r--r-- 1 dongjoon staff 8.7K Apr 1 18:55 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT-tests.jar -rw-r--r-- 1 dongjoon staff 21M Apr 1 18:55 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT.jar ``` In addition, after SPARK-26856, the utility function `search_jar` is shared to find `avro` jar files which are identical for both `sbt` and `mvn`. To sum up, The current jar pattern parameter cannot handle both `kinesis` and `avro` jars. This PR splits the single pattern into two patterns. ## How was this patch tested? Manual. Please note that this will remove only `Found multiple JARs` exception. Kinesis tests need more configurations to run locally. ``` $ build/sbt -Pkinesis-asl test:package streaming-kinesis-asl-assembly/assembly $ export ENABLE_KINESIS_TESTS=1 $ python/run-tests.py --python-executables python2.7 --module pyspark-streaming ``` Closes #24268 from dongjoon-hyun/SPARK-26856. Authored-by: Dongjoon Hyun <dhyun@apple.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-04-02 01:52:56 -04:00			`avro_jar = search_jar("external/avro", "spark-avro", "spark-avro")`
[SPARK-26856][PYSPARK] Python support for from_avro and to_avro APIs ## What changes were proposed in this pull request? Avro is built-in but external data source module since Spark 2.4 but `from_avro` and `to_avro` APIs not yet supported in pyspark. In this PR I've made them available from pyspark. ## How was this patch tested? Please see the python API examples what I've added. cd docs/ SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 jekyll build Manual webpage check. Closes #23797 from gaborgsomogyi/SPARK-26856. Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org> 2019-03-10 21:15:07 -04:00			`if avro_jar is None:`
			`print(`
			`"Skipping all Avro Python tests as the optional Avro project was "`
			`"not compiled into a JAR. To run these tests, "`
			`"you need to build Spark with 'build/sbt -Pavro package' or "`
			`"'build/mvn -Pavro package' before running this test.")`
			`sys.exit(0)`
			`else:`
			`existing_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")`
			`jars_args = "--jars %s" % avro_jar`
			`os.environ["PYSPARK_SUBMIT_ARGS"] = " ".join([jars_args, existing_args])`

			`import doctest`
			`from pyspark.sql import Row, SparkSession`
			`import pyspark.sql.avro.functions`
			`globs = pyspark.sql.avro.functions.__dict__.copy()`
			`spark = SparkSession.builder\`
			`.master("local[4]")\`
			`.appName("sql.avro.functions tests")\`
			`.getOrCreate()`
			`globs['spark'] = spark`
			`(failure_count, test_count) = doctest.testmod(`
			`pyspark.sql.avro.functions, globs=globs,`
			`optionflags=doctest.ELLIPSIS \| doctest.NORMALIZE_WHITESPACE)`
			`spark.stop()`
			`if failure_count:`
			`sys.exit(-1)`


			`if __name__ == "__main__":`
			`_test()`