From d8cbef1abfc8cd88503757e235cc278004cb9a2d Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Mon, 18 Jan 2021 11:36:10 -0600 Subject: [PATCH] [SPARK-34093][ML] param maxDepth should check upper bound ### What changes were proposed in this pull request? update the ParamValidators of `maxDepth` ### Why are the changes needed? current impl of tree models only support maxDepth<=30 ### Does this PR introduce _any_ user-facing change? If `maxDepth`>30, fail quickly ### How was this patch tested? existing testsuites Closes #31163 from zhengruifeng/param_maxDepth_upbound. Authored-by: Ruifeng Zheng Signed-off-by: Sean Owen --- .../src/main/scala/org/apache/spark/ml/tree/treeParams.scala | 5 +++-- python/pyspark/ml/tree.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala index 19ea8ae477..768e14f4b7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala @@ -60,8 +60,9 @@ private[ml] trait DecisionTreeParams extends PredictorParams */ final val maxDepth: IntParam = new IntParam(this, "maxDepth", "Maximum depth of the tree. (Nonnegative)" + - " E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.", - ParamValidators.gtEq(0)) + " E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes." + + " Must be in range [0, 30].", + ParamValidators.inRange(0, 30)) /** * Maximum number of bins used for discretizing continuous features and for choosing how to split diff --git a/python/pyspark/ml/tree.py b/python/pyspark/ml/tree.py index dfb24a2295..7ddeb097c4 100644 --- a/python/pyspark/ml/tree.py +++ b/python/pyspark/ml/tree.py @@ -67,7 +67,8 @@ class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol): typeConverter=TypeConverters.toString) maxDepth = Param(Params._dummy(), "maxDepth", "Maximum depth of the tree. (>= 0) E.g., " + - "depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.", + "depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. " + + "Must be in range [0, 30].", typeConverter=TypeConverters.toInt) maxBins = Param(Params._dummy(), "maxBins", "Max number of bins for discretizing continuous " +