From adc8d687cee33316a4ec1e006efaea8e823491f6 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 9 Sep 2020 10:29:58 +0900
Subject: [PATCH] [SPARK-32810][SQL][TESTS][FOLLOWUP] Check path globbing in
 JSON/CSV datasources v1 and v2

### What changes were proposed in this pull request?
In the PR, I propose to move the test `SPARK-32810: CSV and JSON data sources should be able to read files with escaped glob metacharacter in the paths` from `DataFrameReaderWriterSuite` to `CSVSuite` and to `JsonSuite`. This will allow to run the same test in `CSVv1Suite`/`CSVv2Suite` and in `JsonV1Suite`/`JsonV2Suite`.

### Why are the changes needed?
To improve test coverage by checking JSON/CSV datasources v1 and v2.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running affected test suites:
```
$ build/sbt "sql/test:testOnly org.apache.spark.sql.execution.datasources.csv.*"
$ build/sbt "sql/test:testOnly org.apache.spark.sql.execution.datasources.json.*"
```

Closes #29684 from MaxGekk/globbing-paths-when-inferring-schema-dsv2.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../execution/datasources/csv/CSVSuite.scala  | 13 +++++++++++
 .../datasources/json/JsonSuite.scala          | 13 +++++++++++
 .../sql/test/DataFrameReaderWriterSuite.scala | 23 -------------------
 3 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 378695d154..066259075d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -2407,6 +2407,19 @@ abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvDa
       }
     }
   }
+
+  test("SPARK-32810: CSV data source should be able to read files with " +
+    "escaped glob metacharacter in the paths") {
+    withTempDir { dir =>
+      val basePath = dir.getCanonicalPath
+      // test CSV writer / reader without specifying schema
+      val csvTableName = "[abc]"
+      spark.range(3).coalesce(1).write.csv(s"$basePath/$csvTableName")
+      val readback = spark.read
+        .csv(s"$basePath/${"""(\[|\]|\{|\})""".r.replaceAllIn(csvTableName, """\\$1""")}")
+      assert(readback.collect sameElements Array(Row("0"), Row("1"), Row("2")))
+    }
+  }
 }
 
 class CSVv1Suite extends CSVSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 43b9967a32..d9270024d5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -2824,6 +2824,19 @@ abstract class JsonSuite extends QueryTest with SharedSparkSession with TestJson
       }
     }
   }
+
+  test("SPARK-32810: JSON data source should be able to read files with " +
+    "escaped glob metacharacter in the paths") {
+    withTempDir { dir =>
+      val basePath = dir.getCanonicalPath
+      // test JSON writer / reader without specifying schema
+      val jsonTableName = "{def}"
+      spark.range(3).coalesce(1).write.json(s"$basePath/$jsonTableName")
+      val readback = spark.read
+        .json(s"$basePath/${"""(\[|\]|\{|\})""".r.replaceAllIn(jsonTableName, """\\$1""")}")
+      assert(readback.collect sameElements Array(Row(0), Row(1), Row(2)))
+    }
+  }
 }
 
 class JsonV1Suite extends JsonSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 580a4ff8a6..c84d361024 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -1184,27 +1184,4 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
     verifyLoadFails(df.write.option("path", path).parquet(path))
     verifyLoadFails(df.write.option("path", path).format("parquet").save(path))
   }
-
-  test("SPARK-32810: CSV and JSON data sources should be able to read files with " +
-    "escaped glob metacharacter in the paths") {
-    def escape(str: String): String = {
-      """(\[|\]|\{|\})""".r.replaceAllIn(str, """\\$1""")
-    }
-
-    withTempDir { dir =>
-      val basePath = dir.getCanonicalPath
-
-      // test CSV writer / reader without specifying schema
-      val csvTableName = "[abc]"
-      spark.range(3).coalesce(1).write.csv(s"$basePath/$csvTableName")
-      val csvDf = spark.read.csv(s"$basePath/${escape(csvTableName)}")
-      assert(csvDf.collect sameElements Array(Row("0"), Row("1"), Row("2")))
-
-      // test JSON writer / reader without specifying schema
-      val jsonTableName = "{def}"
-      spark.range(3).coalesce(1).write.json(s"$basePath/$jsonTableName")
-      val jsonDf = spark.read.json(s"$basePath/${escape(jsonTableName)}")
-      assert(jsonDf.collect sameElements Array(Row(0), Row(1), Row(2)))
-    }
-  }
 }