[SPARK-9130][SQL] throw exception when check equality between external and internal row

instead of return false, throw exception when check equality between external and internal row is better.

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #7460 from cloud-fan/row-compare and squashes the following commits:

8a20911 [Wenchen Fan] improve equals
402daa8 [Wenchen Fan] throw exception when check equality between external and internal row
This commit is contained in:
Wenchen Fan 2015-07-17 09:31:13 -07:00 committed by Reynold Xin
parent 441e072a22
commit 59d24c226a
3 changed files with 53 additions and 7 deletions

View file

@ -364,18 +364,33 @@ trait Row extends Serializable {
false false
} }
protected def canEqual(other: Any) = { /**
// Note that InternalRow overrides canEqual. These two canEqual's together makes sure that * Returns true if we can check equality for these 2 rows.
// comparing the external Row and InternalRow will always yield false. * Equality check between external row and internal row is not allowed.
* Here we do this check to prevent call `equals` on external row with internal row.
*/
protected def canEqual(other: Row) = {
// Note that `Row` is not only the interface of external row but also the parent
// of `InternalRow`, so we have to ensure `other` is not a internal row here to prevent
// call `equals` on external row with internal row.
// `InternalRow` overrides canEqual, and these two canEquals together makes sure that
// equality check between external Row and InternalRow will always fail.
// In the future, InternalRow should not extend Row. In that case, we can remove these // In the future, InternalRow should not extend Row. In that case, we can remove these
// canEqual methods. // canEqual methods.
other.isInstanceOf[Row] && !other.isInstanceOf[InternalRow] !other.isInstanceOf[InternalRow]
} }
override def equals(o: Any): Boolean = { override def equals(o: Any): Boolean = {
if (o == null || !canEqual(o)) return false if (!o.isInstanceOf[Row]) return false
val other = o.asInstanceOf[Row] val other = o.asInstanceOf[Row]
if (!canEqual(other)) {
throw new UnsupportedOperationException(
"cannot check equality between external and internal rows")
}
if (other eq null) return false
if (length != other.length) { if (length != other.length) {
return false return false
} }

View file

@ -54,7 +54,12 @@ abstract class InternalRow extends Row {
// A default implementation to change the return type // A default implementation to change the return type
override def copy(): InternalRow = this override def copy(): InternalRow = this
protected override def canEqual(other: Any) = other.isInstanceOf[InternalRow] /**
* Returns true if we can check equality for these 2 rows.
* Equality check between external row and internal row is not allowed.
* Here we do this check to prevent call `equals` on internal row with external row.
*/
protected override def canEqual(other: Row) = other.isInstanceOf[InternalRow]
// Custom hashCode function that matches the efficient code generated version. // Custom hashCode function that matches the efficient code generated version.
override def hashCode: Int = { override def hashCode: Int = {

View file

@ -17,6 +17,7 @@
package org.apache.spark.sql package org.apache.spark.sql
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{GenericRow, GenericRowWithSchema} import org.apache.spark.sql.catalyst.expressions.{GenericRow, GenericRowWithSchema}
import org.apache.spark.sql.types._ import org.apache.spark.sql.types._
import org.scalatest.{Matchers, FunSpec} import org.scalatest.{Matchers, FunSpec}
@ -68,4 +69,29 @@ class RowTest extends FunSpec with Matchers {
sampleRow.getValuesMap(List("col1", "col2")) shouldBe expected sampleRow.getValuesMap(List("col1", "col2")) shouldBe expected
} }
} }
describe("row equals") {
val externalRow = Row(1, 2)
val externalRow2 = Row(1, 2)
val internalRow = InternalRow(1, 2)
val internalRow2 = InternalRow(1, 2)
it("equality check for external rows") {
externalRow shouldEqual externalRow2
}
it("equality check for internal rows") {
internalRow shouldEqual internalRow2
}
it("throws an exception when check equality between external and internal rows") {
def assertError(f: => Unit): Unit = {
val e = intercept[UnsupportedOperationException](f)
e.getMessage.contains("cannot check equality between external and internal rows")
}
assertError(internalRow.equals(externalRow))
assertError(externalRow.equals(internalRow))
}
}
} }