[SPARK-32121][SHUFFLE] Support Windows OS in ExecutorDiskUtils

### What changes were proposed in this pull request?
Correct file seprate use in `ExecutorDiskUtils.createNormalizedInternedPathname` on Windows

### Why are the changes needed?
`ExternalShuffleBlockResolverSuite` failed on Windows, see detail at:
https://issues.apache.org/jira/browse/SPARK-32121

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
The existed test suite.

Closes #28940 from pan3793/SPARK-32121.

Lead-authored-by: pancheng <379377944@qq.com>
Co-authored-by: chengpan <cheng.pan@idiaoyan.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
pancheng 2020-07-02 19:21:11 +09:00 committed by HyukjinKwon
parent 3f7780d30d
commit 7fda184f0f
2 changed files with 30 additions and 10 deletions

View file

@ -23,11 +23,19 @@ import java.util.regex.Pattern;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang3.SystemUtils;
import org.apache.spark.network.util.JavaUtils;
public class ExecutorDiskUtils {
private static final Pattern MULTIPLE_SEPARATORS = Pattern.compile(File.separator + "{2,}");
private static final Pattern MULTIPLE_SEPARATORS;
static {
if (SystemUtils.IS_OS_WINDOWS) {
MULTIPLE_SEPARATORS = Pattern.compile("[/\\\\]+");
} else {
MULTIPLE_SEPARATORS = Pattern.compile("/{2,}");
}
}
/**
* Hashes a filename into the corresponding local directory, in a manner consistent with
@ -50,14 +58,18 @@ public class ExecutorDiskUtils {
* the internal code in java.io.File would normalize it later, creating a new "foo/bar"
* String copy. Unfortunately, we cannot just reuse the normalization code that java.io.File
* uses, since it is in the package-private class java.io.FileSystem.
*
* On Windows, separator "\" is used instead of "/".
*
* "\\" is a legal character in path name on Unix-like OS, but illegal on Windows.
*/
@VisibleForTesting
static String createNormalizedInternedPathname(String dir1, String dir2, String fname) {
String pathname = dir1 + File.separator + dir2 + File.separator + fname;
Matcher m = MULTIPLE_SEPARATORS.matcher(pathname);
pathname = m.replaceAll("/");
pathname = m.replaceAll(Matcher.quoteReplacement(File.separator));
// A single trailing slash needs to be taken care of separately
if (pathname.length() > 1 && pathname.endsWith("/")) {
if (pathname.length() > 1 && pathname.charAt(pathname.length() - 1) == File.separatorChar) {
pathname = pathname.substring(0, pathname.length() - 1);
}
return pathname.intern();

View file

@ -25,6 +25,7 @@ import java.nio.charset.StandardCharsets;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.io.CharStreams;
import org.apache.commons.lang3.SystemUtils;
import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
import org.apache.spark.network.util.MapConfigProvider;
import org.apache.spark.network.util.TransportConf;
@ -146,12 +147,19 @@ public class ExternalShuffleBlockResolverSuite {
@Test
public void testNormalizeAndInternPathname() {
assertPathsMatch("/foo", "bar", "baz", "/foo/bar/baz");
assertPathsMatch("//foo/", "bar/", "//baz", "/foo/bar/baz");
assertPathsMatch("foo", "bar", "baz///", "foo/bar/baz");
assertPathsMatch("/foo/", "/bar//", "/baz", "/foo/bar/baz");
assertPathsMatch("/", "", "", "/");
assertPathsMatch("/", "/", "/", "/");
String sep = File.separator;
String expectedPathname = sep + "foo" + sep + "bar" + sep + "baz";
assertPathsMatch("/foo", "bar", "baz", expectedPathname);
assertPathsMatch("//foo/", "bar/", "//baz", expectedPathname);
assertPathsMatch("/foo/", "/bar//", "/baz", expectedPathname);
assertPathsMatch("foo", "bar", "baz///", "foo" + sep + "bar" + sep + "baz");
assertPathsMatch("/", "", "", sep);
assertPathsMatch("/", "/", "/", sep);
if (SystemUtils.IS_OS_WINDOWS) {
assertPathsMatch("/foo\\/", "bar", "baz", expectedPathname);
} else {
assertPathsMatch("/foo\\/", "bar", "baz", sep + "foo\\" + sep + "bar" + sep + "baz");
}
}
private void assertPathsMatch(String p1, String p2, String p3, String expectedPathname) {
@ -160,6 +168,6 @@ public class ExternalShuffleBlockResolverSuite {
assertEquals(expectedPathname, normPathname);
File file = new File(normPathname);
String returnedPath = file.getPath();
assertTrue(normPathname == returnedPath);
assertEquals(normPathname, returnedPath);
}
}