The Fork/Join task ForEachOps.ForEachOrderedTask can be ...: 1) Instantiate the ConcurrentHashMap with a size of Math.max(16, AbstractTask.LEAF_TARGET << 1), to avoid resizing that can result in slower concurrent operation on the map 2) Evaluate whether the the reporting of elements needs to be within a synchronized block to guarantee a happens-before relationship, if so synchronization should be applied consistently and perhaps the code should be cleaned up as follows: if (task.getPendingCount() > 0) { Node.Builder<T> nb = task.helper.makeNodeBuilder( task.helper.exactOutputSizeIfKnown(rightSplit), size -> (T[]) new Object[size]); task.node = task.helper.wrapAndCopyInto(nb, rightSplit).build(); task.spliterator = null; } task.tryComplete(); } @Override public void onCompletion(CountedCompleter<?> caller) { if (node != null) { Node<T> _node = node; node = null; // Dump any data from this leaf into the sink synchronized (lock) { _node.forEach(action); } } else if (spliterator != null) { Spliterator<S> _spliterator = spliterator; spliterator = null; // Dump any data output from this leaf's pipeline into the sink synchronized (lock) { helper.wrapAndCopyInto(action, _spliterator); } } ForEachOrderedTask<S, T> victim = completionMap.remove(this); if (victim != null) victim.tryComplete(); }