[SPARK-21052][SQL][FOLLOW-UP] Add hash map metrics to join
## What changes were proposed in this pull request? Remove `numHashCollisions` in `BytesToBytesMap`. And change `getAverageProbesPerLookup()` to `getAverageProbesPerLookup` as suggested. ## How was this patch tested? Existing tests. Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #18480 from viirya/SPARK-21052-followup.
This commit is contained in:
parent
eed9c4ef85
commit
fd13255225
|
@ -160,14 +160,10 @@ public final class BytesToBytesMap extends MemoryConsumer {
|
|||
|
||||
private final boolean enablePerfMetrics;
|
||||
|
||||
private long timeSpentResizingNs = 0;
|
||||
|
||||
private long numProbes = 0;
|
||||
|
||||
private long numKeyLookups = 0;
|
||||
|
||||
private long numHashCollisions = 0;
|
||||
|
||||
private long peakMemoryUsedBytes = 0L;
|
||||
|
||||
private final int initialCapacity;
|
||||
|
@ -489,10 +485,6 @@ public final class BytesToBytesMap extends MemoryConsumer {
|
|||
);
|
||||
if (areEqual) {
|
||||
return;
|
||||
} else {
|
||||
if (enablePerfMetrics) {
|
||||
numHashCollisions++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -859,16 +851,6 @@ public final class BytesToBytesMap extends MemoryConsumer {
|
|||
return peakMemoryUsedBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total amount of time spent resizing this map (in nanoseconds).
|
||||
*/
|
||||
public long getTimeSpentResizingNs() {
|
||||
if (!enablePerfMetrics) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
return timeSpentResizingNs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the average number of probes per key lookup.
|
||||
*/
|
||||
|
@ -879,13 +861,6 @@ public final class BytesToBytesMap extends MemoryConsumer {
|
|||
return (1.0 * numProbes) / numKeyLookups;
|
||||
}
|
||||
|
||||
public long getNumHashCollisions() {
|
||||
if (!enablePerfMetrics) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
return numHashCollisions;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public int getNumDataPages() {
|
||||
return dataPages.size();
|
||||
|
@ -923,10 +898,6 @@ public final class BytesToBytesMap extends MemoryConsumer {
|
|||
void growAndRehash() {
|
||||
assert(longArray != null);
|
||||
|
||||
long resizeStartTime = -1;
|
||||
if (enablePerfMetrics) {
|
||||
resizeStartTime = System.nanoTime();
|
||||
}
|
||||
// Store references to the old data structures to be used when we re-hash
|
||||
final LongArray oldLongArray = longArray;
|
||||
final int oldCapacity = (int) oldLongArray.size() / 2;
|
||||
|
@ -951,9 +922,5 @@ public final class BytesToBytesMap extends MemoryConsumer {
|
|||
longArray.set(newPos * 2 + 1, hashcode);
|
||||
}
|
||||
freeArray(oldLongArray);
|
||||
|
||||
if (enablePerfMetrics) {
|
||||
timeSpentResizingNs += System.nanoTime() - resizeStartTime;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -215,7 +215,7 @@ trait HashJoin {
|
|||
|
||||
// At the end of the task, we update the avg hash probe.
|
||||
TaskContext.get().addTaskCompletionListener(_ =>
|
||||
avgHashProbe.set(hashed.getAverageProbesPerLookup()))
|
||||
avgHashProbe.set(hashed.getAverageProbesPerLookup))
|
||||
|
||||
val resultProj = createResultProjection
|
||||
joinedIter.map { r =>
|
||||
|
|
|
@ -83,7 +83,7 @@ private[execution] sealed trait HashedRelation extends KnownSizeEstimation {
|
|||
/**
|
||||
* Returns the average number of probes per key lookup.
|
||||
*/
|
||||
def getAverageProbesPerLookup(): Double
|
||||
def getAverageProbesPerLookup: Double
|
||||
}
|
||||
|
||||
private[execution] object HashedRelation {
|
||||
|
@ -280,7 +280,7 @@ private[joins] class UnsafeHashedRelation(
|
|||
read(in.readInt, in.readLong, in.readBytes)
|
||||
}
|
||||
|
||||
override def getAverageProbesPerLookup(): Double = binaryMap.getAverageProbesPerLookup()
|
||||
override def getAverageProbesPerLookup: Double = binaryMap.getAverageProbesPerLookup
|
||||
}
|
||||
|
||||
private[joins] object UnsafeHashedRelation {
|
||||
|
@ -776,7 +776,7 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
|
|||
/**
|
||||
* Returns the average number of probes per key lookup.
|
||||
*/
|
||||
def getAverageProbesPerLookup(): Double = numProbes.toDouble / numKeyLookups
|
||||
def getAverageProbesPerLookup: Double = numProbes.toDouble / numKeyLookups
|
||||
}
|
||||
|
||||
private[joins] class LongHashedRelation(
|
||||
|
@ -829,7 +829,7 @@ private[joins] class LongHashedRelation(
|
|||
map = in.readObject().asInstanceOf[LongToUnsafeRowMap]
|
||||
}
|
||||
|
||||
override def getAverageProbesPerLookup(): Double = map.getAverageProbesPerLookup()
|
||||
override def getAverageProbesPerLookup: Double = map.getAverageProbesPerLookup
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue