[SPARK-31013][CORE][WEBUI] InMemoryStore: improve removeAllByIndexValues over natural key index

### What changes were proposed in this pull request?

The method `removeAllByIndexValues` in KVStore is to delete all the objects which have certain values in the given index.
However, in the current implementation of `InMemoryStore`, when the given index is the natural key index, there is no special handling for it and a linear search over all the task data is performed.

We can improve it by deleting the natural keys directly from the internal hashmap.

### Why are the changes needed?

Better performance if the given index for `removeAllByIndexValues` is the natural key index in
`InMemoryStore`
### Does this PR introduce any user-facing change?

No

### How was this patch tested?

Enhance the existing test.

Closes #27763 from gengliangwang/useNaturalIndex.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Gengliang Wang 2020-03-03 19:34:19 +08:00 committed by Wenchen Fan
parent 313e62c376
commit b5166aac1f
2 changed files with 24 additions and 12 deletions

View file

@ -231,11 +231,16 @@ public class InMemoryStore implements KVStore {
}
int countingRemoveAllByIndexValues(String index, Collection<?> indexValues) {
if (hasNaturalParentIndex && naturalParentIndexName.equals(index)) {
int count = 0;
if (KVIndex.NATURAL_INDEX_NAME.equals(index)) {
for (Object naturalKey : indexValues) {
count += delete(asKey(naturalKey)) ? 1 : 0;
}
return count;
} else if (hasNaturalParentIndex && naturalParentIndexName.equals(index)) {
// If there is a parent index for the natural index and `index` happens to be it,
// Spark can use the `parentToChildrenMap` to get the related natural keys, and then
// delete them from `data`.
int count = 0;
for (Object indexValue : indexValues) {
Comparable<Object> parentKey = asKey(indexValue);
NaturalKeys children = parentToChildrenMap.getOrDefault(parentKey, new NaturalKeys());
@ -271,9 +276,9 @@ public class InMemoryStore implements KVStore {
}
}
public void delete(Object key) {
data.remove(asKey(key));
if (hasNaturalParentIndex) {
public boolean delete(Object key) {
boolean entryExists = data.remove(asKey(key)) != null;
if (entryExists && hasNaturalParentIndex) {
for (NaturalKeys v : parentToChildrenMap.values()) {
if (v.remove(asKey(key))) {
// `v` can be empty after removing the natural key and we can remove it from
@ -284,6 +289,7 @@ public class InMemoryStore implements KVStore {
}
}
}
return entryExists;
}
public int size() {

View file

@ -158,23 +158,29 @@ public class InMemoryStoreSuite {
assertEquals(9, store.count(ArrayKeyIndexType.class));
store.removeAllByIndexValues(
// Try removing non-existing keys
assert(!store.removeAllByIndexValues(
ArrayKeyIndexType.class,
KVIndex.NATURAL_INDEX_NAME,
ImmutableSet.of(new int[] {0, 0, 0}, new int[] { 2, 2, 2 }));
ImmutableSet.of(new int[] {10, 10, 10}, new int[] { 3, 3, 3 })));
assertEquals(9, store.count(ArrayKeyIndexType.class));
assert(store.removeAllByIndexValues(
ArrayKeyIndexType.class,
KVIndex.NATURAL_INDEX_NAME,
ImmutableSet.of(new int[] {0, 0, 0}, new int[] { 2, 2, 2 })));
assertEquals(7, store.count(ArrayKeyIndexType.class));
store.removeAllByIndexValues(
assert(store.removeAllByIndexValues(
ArrayKeyIndexType.class,
"id",
ImmutableSet.of(new String [] { "things" }));
ImmutableSet.of(new String [] { "things" })));
assertEquals(4, store.count(ArrayKeyIndexType.class));
store.removeAllByIndexValues(
assert(store.removeAllByIndexValues(
ArrayKeyIndexType.class,
"id",
ImmutableSet.of(new String [] { "more things" }));
ImmutableSet.of(new String [] { "more things" })));
assertEquals(0, store.count(ArrayKeyIndexType.class));
}