JDK-6206844 : few fixes in i486.ad for SSE2
  • Type: Enhancement
  • Component: hotspot
  • Sub-Component: compiler
  • Affected Version: 6
  • Priority: P4
  • Status: Resolved
  • Resolution: Fixed
  • OS: solaris_9
  • CPU: sparc
  • Submitted: 2004-12-10
  • Updated: 2010-04-03
  • Resolved: 2005-01-12
The Version table provides details related to the release that this issue/RFE will be addressed.

Unresolved : Release in which this issue/RFE will be addressed.
Resolved: Release in which this issue/RFE has been resolved.
Fixed : Release in which this issue/RFE has been fixed. The release containing this fix may be available for download as an Early Access Release or a General Availability Release.

To download the current JDK release, click here.
JDK 6
6 b19Fixed
Description
Add MOVD SSE2 instructions for long volatile moves, d2l, l2d conversions
and long raw moves. 
Use 'fst STn' in case of 'fld ST0; fstp STn' in FPU-unit.
And few fixes:
  MachSpillCopy - use 'lea' instead of 'sub/add' to preserv flags.
  MODX and MODXD - mov popFPU to the end of instruction.

The next java methods were used to verify the generated code:

 public class mtest {
  static int ops0;
  static int ops1;
  static int ops2;

  static double DVAL = 13.d;
  static float  FVAL = 13.f;
  static final int LIMIT = 2000000000;

  static volatile long L = 0L;

  static long check0() {
    L += 32L;
    return L; // L still live here
  }

  static int check1(double dval) {
      //modD
      double d1 = dval % DVAL;
      //ConvD2F
      float  f1 = (float)d1;
      //modF
      float  f2 = f1 % FVAL;
      //ConvF2D
      double d2 = (double)f2;
      //sinD
      d1 = Math.sin(d2);
      d2 = d1 * DVAL;
      //cosD
      d1 = Math.cos(d2);
      d2 = d1 / DVAL;
      //tanD
      d1 = Math.tan(d2);
      d2 = DVAL - d1;
      //logD
      d1 = Math.log(d2);
      d2 = d1 + DVAL;
      //log10D
      d1 = Math.log10(d2);
      //ConvL2D
      long l = (long)d1;
      //ConvL2F
      l += (long)f1;
      return (int)l;
  }

  static int check2(double d) {
      long longbits = Double.doubleToRawLongBits(d);
      double d2 = Double.longBitsToDouble(longbits);
      if (d != d2) {
        throw new InternalError("value mismatch");
      }
      long longbits2 = Double.doubleToRawLongBits(d2);
      if (longbits != longbits2) {
        throw new InternalError("value mismatch");
      }

      float f = (float) d;
      int intbits = Float.floatToRawIntBits(f);
      float f2 = Float.intBitsToFloat(intbits);
      if (f != f2) {
        throw new InternalError("value mismatch");
      }
      int intbits2 = Float.floatToRawIntBits(f2);
      if (intbits != intbits2) {
        throw new InternalError("value mismatch");
      }
      return intbits2;
  }

  static int test0(int limit) {
    int i = 0;
    L = 0;
    for (ops0 = 0; ops0 < limit; ops0++) {
      i += (int)check0();
    }
    return i;
  }
  static int test1(double d, int limit) {
    int i = 0;
    for (ops1 = 0; ops1 < limit; ops1++) {
      i += check1(d);
    }
    return i;
  }

  static int test2(double d, int limit) {
    int i = 0;
    for (ops2 = 0; ops2 < limit; ops2++) {
      i += check2(d);
    }
    return i;
  }

  public static void main(String[] args) {
    double d = 0.0123456789d;
    int i = test0(11000);     // warmup
    i = test0(10000);         // warmup
    i = test1(d, 11000);      // warmup
    i = test1(d, 10000);      // warmup
    i = test2(d, 11000);      // warmup
    i = test2(d, 10000);      // warmup
    System.out.println("Start check0!");
    new Thread() {
      public void run() {
        int seconds = 10;
        try {
          Thread.sleep(seconds * 1000);
        } catch (Exception e) { }
        System.out.println("check0 (volatile long): " + (ops0 / seconds) + " loops per second");
        ops0 = LIMIT;
      }
    }.start();
    i = test0(LIMIT);  // run

    System.out.println("Start check1!");
    new Thread() {
      public void run() {
        int seconds = 10;
        try {
          Thread.sleep(seconds * 1000);
        } catch (Exception e) { }
        System.out.println("check1 (math on FPU): " + (ops1 / seconds) + " loops per second");
        ops1 = LIMIT;
      }
    }.start();
    i = test1(d, LIMIT);  // run

    System.out.println("Start check2!");
    new Thread() {
      public void run() {
        int seconds = 10;
        try {
          Thread.sleep(seconds * 1000);
        } catch (Exception e) { }
        System.out.println("check2 (raw 64 bits): " + (ops2 / seconds) + " loops per second");
        ops2 = LIMIT;
      }
    }.start();
    i = test2(d, LIMIT);  // run
  }
}

###@###.### 2004-12-10 01:01:42 GMT
###@###.### 2004-12-14 00:30:33 GMT

Comments
SUGGESTED FIX http://analemma.sfbay.sun.com/net/prt-archiver.sfbay/data/archived_workspaces/main/c2_baseline/2004/20041220124309.kvn.6206844/workspace/webrevs/webrev-2004.12.20/index.html ###@###.### 2005-1-19 03:28:58 GMT
19-01-2005

EVALUATION xmm registers are faster then fpu stack for sse2. ###@###.### 2004-12-14 00:30:33 GMT ###@###.### 2005-1-19 03:28:58 GMT
14-12-2004