Skip to content
Draft
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,8 @@ static NodeArray merge(NodeArray a1, NodeArray a2) {
NodeArray merged = new NodeArray(a1.size() + a2.size());
int i = 0, j = 0;

// since nodes are only guaranteed to be sorted by score -- ties can appear in any node order --
// we need to remember all the nodes with the current score to avoid adding duplicates
var nodesWithLastScore = new IntHashSet();
// To avoid duplicates, we need to remember all the nodes added so far
var mergedNodes = new IntHashSet();
float lastAddedScore = Float.NaN;

// loop through both source arrays, adding the highest score element to the merged array,
Expand All @@ -75,33 +74,30 @@ static NodeArray merge(NodeArray a1, NodeArray a2) {
if (a1.scores[i] < a2.scores[j]) {
// add from a2
if (a2.scores[j] != lastAddedScore) {
nodesWithLastScore.clear();
lastAddedScore = a2.scores[j];
}
if (nodesWithLastScore.add(a2.nodes[j])) {
if (mergedNodes.add(a2.nodes[j])) {
merged.addInOrder(a2.nodes[j], a2.scores[j]);
}
j++;
} else if (a1.scores[i] > a2.scores[j]) {
// add from a1
if (a1.scores[i] != lastAddedScore) {
nodesWithLastScore.clear();
lastAddedScore = a1.scores[i];
}
if (nodesWithLastScore.add(a1.nodes[i])) {
if (mergedNodes.add(a1.nodes[i])) {
merged.addInOrder(a1.nodes[i], a1.scores[i]);
}
i++;
} else {
// same score -- add both
if (a1.scores[i] != lastAddedScore) {
nodesWithLastScore.clear();
lastAddedScore = a1.scores[i];
}
if (nodesWithLastScore.add(a1.nodes[i])) {
if (mergedNodes.add(a1.nodes[i])) {
merged.addInOrder(a1.nodes[i], a1.scores[i]);
}
if (nodesWithLastScore.add(a2.nodes[j])) {
if (mergedNodes.add(a2.nodes[j])) {
merged.addInOrder(a2.nodes[j], a2.scores[j]);
}
i++;
Expand All @@ -112,30 +108,22 @@ static NodeArray merge(NodeArray a1, NodeArray a2) {
// If elements remain in a1, add them
if (i < a1.size()) {
// avoid duplicates while adding nodes with the same score
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

stale comment describing old strategy -- can clean up on commit

while (i < a1.size && a1.scores[i] == lastAddedScore) {
if (!nodesWithLastScore.contains(a1.nodes[i])) {
for (; i < a1.size; i++) {
if (mergedNodes.add(a1.nodes[i])) {
merged.addInOrder(a1.nodes[i], a1.scores[i]);
}
i++;
}
// the remaining nodes have a different score, so we can bulk-add them
System.arraycopy(a1.nodes, i, merged.nodes, merged.size, a1.size - i);
System.arraycopy(a1.scores, i, merged.scores, merged.size, a1.size - i);
merged.size += a1.size - i;
}

// If elements remain in a2, add them
if (j < a2.size()) {
// avoid duplicates while adding nodes with the same score
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

stale comment describing old strategy -- can clean up on commit to make this clearer

while (j < a2.size && a2.scores[j] == lastAddedScore) {
if (!nodesWithLastScore.contains(a2.nodes[j])) {
for (; j < a2.size; j++) {
if (mergedNodes.add(a2.nodes[j])) {
merged.addInOrder(a2.nodes[j], a2.scores[j]);
}
j++;
}
// the remaining nodes have a different score, so we can bulk-add them
System.arraycopy(a2.nodes, j, merged.nodes, merged.size, a2.size - j);
System.arraycopy(a2.scores, j, merged.scores, merged.size, a2.size - j);
merged.size += a2.size - j;
}

Expand Down Expand Up @@ -169,7 +157,7 @@ public void addInOrder(int newNode, float newScore) {
*/
int insertionPoint(int newNode, float newScore) {
int insertionPoint = descSortFindRightMostInsertionPoint(newScore);
return duplicateExistsNear(insertionPoint, newNode, newScore) ? -1 : insertionPoint;
return duplicateExists(insertionPoint, newNode) ? -1 : insertionPoint;
}

/**
Expand Down Expand Up @@ -209,21 +197,20 @@ private int insertInternal(int insertionPoint, int newNode, float newScore) {
return insertionPoint;
}

private boolean duplicateExistsNear(int insertionPoint, int newNode, float newScore) {
// Check to the left
for (int i = insertionPoint - 1; i >= 0 && scores[i] == newScore; i--) {
if (nodes[i] == newNode) {
return true;
private boolean duplicateExists(int insertionPoint, int newNode) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this could be marginally cleaner -- at i = 0, we check nodes[insertion] point at each conditional, and the loop bounds are unnecessarily pessimistic (i.e., if the insertion point is right in the middle, we'll waste a bunch of loop iterations in both directions, when we could just go to the max radius around the insertion point to hit one end of the array). No idea if this will matter in practice but might be worth measuring.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e.g., something like

    int n = this.size;
    int[] a = this.nodes;

    int left  = insertionPoint - 1;
    int right = insertionPoint;

    // Exact hit fast path
    if (right < n && a[right] == newNode) return true;

    // Expand outward
    while (left >= 0 || right < n) {
        if (left >= 0 && a[left] == newNode) return true;
        if (right < n && a[right] == newNode) return true;
        left--;
        right++;
    }
    return false;

// Checking close to the insertion point first should be better that doing a scan from 0 to size
for (int i = 0; i < size + 1; i++) {
if (insertionPoint >= i && insertionPoint - i < size) {
if (nodes[insertionPoint - i] == newNode) {
return true;
}
}
}

// Check to the right
for (int i = insertionPoint; i < size && scores[i] == newScore; i++) {
if (nodes[i] == newNode) {
if (insertionPoint + i < size) {
if(nodes[insertionPoint + i] == newNode) {
return true;
}
}
}

return false;
}

Expand Down
Loading