Name: jl125535 Date: 07/19/2002
FULL PRODUCT VERSION :
java version "1.4.0"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.0-b92)
Java HotSpot(TM) Client VM (build 1.4.0-b92, mixed mode)
FULL OPERATING SYSTEM VERSION :Microsoft Windows 2000
[Version 5.00.2195]
A DESCRIPTION OF THE PROBLEM :
This report is similar to 4072712, except that this one
suggests that it is possible to maintain bounds checking
while accelerating array indexing.
The Client VM is particularly slow at dealing with multi-
dimensional arrays, e.g. matrices (2D arrays) that are
typical of many science and engineering programs. This has
lead people to do micro optimizations, e.g.
http://tilde-hoschek.home.cern.ch/~hoschek/colt/
and
http://www.alphaworks.ibm.com/tech/ninja
the latter is the basis for JSR 83 (Multiarray package)
http://jcp.org/jsr/detail/83.jsp
I am currently trying to get changes to these particular
packages, but I suspect that many people do micro
optimisations to suite the Client VM because they test
primarily using this VM. (This is the one there IDE will
probably be using.)
The problem is demonstrated by the attached program which
solves the same problem 4 different ways. Most of the
program is concerned with timing and verifying results. At
the start of the program are 4 inner classes, each class
contains a method called body. It is these 4 body methods
that are of interest.
The first body is the baseline and is the obvious way of
doing 2D arrays in Java, i.e. an array of arrays accessed
using a[row][col].
The second body simulates a 2D array using a 1D array and
calculates the location inside the 1D array by multiplying
by the column length, i.e. a[row * cols + col].
The third method is like the second method except that the
multiplication is eliminated by using an extra index
variable.
Finally, the forth method uses an array of arrays but uses
pointers to rows so that instead of a[row][col] you use aRow
[col] where aRow = a[row] and is a temporary variable
assigned outside the inner loop.
All the methods (2-4) show a speed up on the client VM,
which is presumably why package writers use these micro
optimisations and other optimisations. The IBM VM and the
server VM don't show such a marked speed up (in fact the
micro optimisations can be slower on these VMs).
Is it possible to improve the handling of multi-dimensional
arrays in the client VM so that package writers aren't
tempted into micro optimisations?
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Run program below
EXPECTED VERSUS ACTUAL BEHAVIOR :
Expected Behavior:
Ideally the 4 timings given by the program below would be
identical
Actual Behavior: (trial run with JDK 1.4 on Solaris 8)
% java MockFD
7.8 s - Test 1: 2D array using array[row][col]
6.6 s - Test 2: 1D array using array[row * colSize + col]
5.3 s - Test 3: 1D array using array[i]
5.3 s - Test 4: 2D array using arrayRow[col]
Spread = 1.5 times
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
/* Mock up of a finite difference calculation to show array access speed for
different methods of implementing a1 2D matrix in Java */
package benchmarks.arrays;
import java.util.*;
public abstract class MockFD {
private static final class ArrayOfArrays extends MockFD {
public String toString() { return "2D array using array[row][col]"; }
private static final double[][] a1 = new double[rowSize][colSize];
private static final double[][] a2 = new double[rowSize][colSize];
private static final double[][] a3 = new double[rowSize][colSize];
private static final double[][] a4 = new double[rowSize][colSize];
private static void
body(final double[][] d1, final double[][] s1,
final double[][] d2, final double[][] s2)
{
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
d1[r][c] = alpha * s1[r][c]
+ beta * (s1[r-1][c] + s1[r][c-1]
+ s1[r][c+1] + s1[r+1][c]);
d2[r][c] = alpha * s2[r][c]
+ beta * (s2[r-1][c] + s2[r][c-1]
+ s2[r][c+1] + s2[r+1][c]);
}
}
}
protected void calc() {
body(a2, a1, a4, a3);
body(a1, a2, a3, a4);
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
a1[r][c] += a3[r][c]; }
}
}
protected double sum() {
double sum = 0;
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
sum += a1[r][c]; }
}
return sum;
}
protected void fill() {
for (int r = border; r < rows + border; r++) {
System.arraycopy(row, 0, a1[r], border, cols); }
}
}
private static final class ArrayWithMult extends MockFD {
public String toString() {
return "1D array using array[row * colSize + col]"; }
private static final double[] a1 = new double[rowSize * colSize];
private static final double[] a2 = new double[rowSize * colSize];
private static final double[] a3 = new double[rowSize * colSize];
private static final double[] a4 = new double[rowSize * colSize];
private static void
body(final double[] d1, final double[] s1, final
double[] d2, final double[] s2)
{
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
d1[r * colSize + c] =
alpha * s1[r * colSize + c]
+ beta * (s1[(r-1) * colSize + c]
+ s1[r * colSize + (c-1)]
+ s1[r * colSize + (c+1)]
+ s1[(r+1) * colSize + c]);
d2[r * colSize + c] =
alpha * s2[r * colSize + c]
+ beta * (s2[(r-1) * colSize + c]
+ s2[r * colSize + (c-1)]
+ s2[r * colSize + (c+1)]
+ s2[(r+1) * colSize + c]);
}
}
}
protected void calc() {
body(a2, a1, a4, a3);
body(a1, a2, a3, a4);
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
a1[r * colSize + c] += a3[r * colSize + c]; }
}
}
protected double sum() {
double sum = 0;
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
sum += a1[r * colSize + c];
}
}
return sum;
}
protected void fill() {
for (int r = border; r < rows + border; r++) {
System.arraycopy(row, 0, a1, r * colSize + border, cols);
}
}
}
private static final class ArrayWithExtraIndex extends MockFD {
public String toString() { return "1D array using array[i]"; }
private static final double[] a1 = new double[rowSize * colSize];
private static final double[] a2 = new double[rowSize * colSize];
private static final double[] a3 = new double[rowSize * colSize];
private static final double[] a4 = new double[rowSize * colSize];
private static void
body(final double[] d1, final double[] s1,
final double[] d2, final double[] s2)
{
for (int r = border * colSize; r < (rows + border) * colSize;
r += colSize)
{
for (int c = 0, i = r + border; c < cols; c++, i++) {
d1[i] = alpha * s1[i]
+ beta * (s1[i-colSize] + s1[i-1]
+ s1[i+1] + s1[i+colSize]);
d2[i] = alpha * s2[i]
+ beta * (s2[i-colSize] + s2[i-1]
+ s2[i+1] + s2[i+colSize]);
}
}
}
protected void calc() {
body(a2, a1, a4, a3);
body(a1, a2, a3, a4);
for (int r = border * colSize; r < (rows + border) * colSize;
r += colSize)
{
for (int c = 0, i = r + border; c < cols; c++, i++) {
a1[i] += a3[i];
}
}
}
protected double sum() {
double sum = 0;
for (int r = border * colSize; r < (rows + border) * colSize;
r += colSize)
{
for (int c = 0, i = r + border; c < cols; c++, i++) {
sum += a1[i];
}
}
return sum;
}
protected void fill() {
for (int r = border; r < rows + border; r++) { System.arraycopy
(row, 0, a1, r * colSize + border, cols); }
}
}
private static final class ArrayOfArraysWithTempRows extends MockFD {
public String toString() { return "2D array using arrayRow[col]"; }
private static final double[][] a1 = new double[rowSize][colSize];
private static final double[][] a2 = new double[rowSize][colSize];
private static final double[][] a3 = new double[rowSize][colSize];
private static final double[][] a4 = new double[rowSize][colSize];
private static void
body(final double[][] d1, final double[][] s1,
final double[][] d2, final double[][] s2) {
for (int r = border; r < rows + border; r++) {
double[] d1R = d1[r];
double[] s1Rm1 = s1[r-1];
double[] s1R = s1[r];
double[] s1Rp1 = s1[r+1];
double[] d2R = d2[r];
double[] s2Rm1 = s2[r-1];
double[] s2R = s2[r];
double[] s2Rp1 = s2[r+1];
for (int c = border; c < cols + border; c++) {
d1R[c] = alpha * s1R[c]
+ beta * (s1Rm1[c] + s1R[c-1]
+ s1R[c+1] + s1Rp1[c]);
d2R[c] = alpha * s2R[c]
+ beta * (s2Rm1[c] + s2R[c-1]
+ s2R[c+1] + s2Rp1[c]);
}
}
}
protected void calc() {
body(a2, a1, a4, a3);
body(a1, a2, a3, a4);
for (int r = border; r < rows + border; r++) {
double[] a1R = a1[r];
double[] a3R = a3[r];
for (int c = border; c < cols + border; c++) {
a1R[c] += a3R[c];
}
}
}
protected double sum() {
double sum = 0;
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
sum += a1[r][c];
}
}
return sum;
}
protected void fill() {
for (int r = border; r < rows + border; r++) {
System.arraycopy(row, 0, a1[r], border, cols); }
}
}
private static final int loops = 100;
private static final int rows = 256;
private static final int cols = 256;
private static final int border = 1; // matrix borders are typically
// used in finite difference
// calculations for boundaries
private static final int rowSize = rows + 2 * border;
private static final int colSize = cols + 2 * border;
private static final double alpha = 2d / 3d;
private static final double beta = 1d / 3d;
private static final double[] row = new double[cols];
private static double result;
private static int testNumber;
private static double fastest = Double.MAX_VALUE;
private static double slowest;
private MockFD() {}
protected abstract void fill();
protected abstract void calc();
protected abstract double sum();
private static void initRow() {
for (int c = 0; c < cols; c++) { row[c] = Math.random(); }
}
private static double run(final MockFD t, final int loops) {
t.fill();
final long start = System.currentTimeMillis();
for (int loop = 0; loop < loops; loop++) { t.calc(); }
return Math.rint( (System.currentTimeMillis() - start) / 100d ) / 10;
}
private static void test(final MockFD t) {
run(t, loops / 10); // disgard first run, compilation cause first run
// to be slow, which in turn means more loops to
// get acurate assesmenmt of a big job, which in
// turn means longer run time. To speed things up
// ignore compilation.
final double time = run(t, loops);
if (testNumber == 0) { result = t.sum(); }
if ( result == t.sum() ) {
System.out.println( time + " s - Test " + (++testNumber) + ": " + t );
slowest = Math.max(slowest, time);
fastest = Math.min(fastest, time);
}
else { System.out.println(t + " gave incorrect result of "
+ t.sum()+ ", should be " + result); }
}
public static void main(final String[] args) {
initRow();
test( new ArrayOfArrays() );
test( new ArrayWithMult() );
test( new ArrayWithExtraIndex() );
test( new ArrayOfArraysWithTempRows() );
final double spread = Math.rint(10 * slowest / fastest) / 10;
System.out.println("Spread = " + spread + " times");
}
}
---------- END SOURCE ----------
CUSTOMER WORKAROUND :
Avoid using packages with mirco optimizations that are only
suitable for the Client VM.
(Review ID: 159392)
======================================================================