The C2 lookup switch implementation (using a binary search) seems to have unnecessary compare instructions. Below is a switch case with 16 cases, looking like this in Java code:
public int switch_calculation() {
int choice = ThreadLocalRandom.current().nextInt(16) + 1;
switch (choice) {
case 1: return 5;
case 2: return 1;
case 3: return 2;
case 4: return 16;
case 5: return 11;
case 6: return 13;
case 7: return 10;
case 8: return 14;
case 9: return 3;
case 10: return 4;
case 11: return 6;
case 12: return 15;
case 13: return 10;
case 14: return 8;
case 15: return 9;
case 16: return 7;
default: return 0;
}
}
And the x64 assembly, I've pointed out some of the places for improvement by comment ";HERE".
[Verified Entry Point]
0x00007f34fd071700: mov %eax,-0x14000(%rsp)
0x00007f34fd071707: push %rbp
0x00007f34fd071708: sub $0x10,%rsp
0x00007f34fd07170c: mov 0x1b8(%r15),%rcx ;*invokestatic currentThread
; - java.util.concurrent.ThreadLocalRandom::current@3 (line 177)
; - org.sample.MyBenchmark::switch_calculation@0 (line 37)
0x00007f34fd071713: mov 0xf0(%rcx), %r10d
0x00007f34fd07171a: test %r10d,%r10d
0x00007f34fd07171d: je 0x00007f34fd071912 ;*getstatic instance
; - java.util.concurrent.ThreadLocalRandom::current@18 (line 179)
; - org.sample.MyBenchmark::switch_calculation@0 (line 37)
0x00007f34fd071723: mov $0x5deece66d,%r10
0x00007f34fd07172d: imul 0xe8(%rcx),%r10
0x00007f34fd071735: add $0xb,%r10 ;*ladd
; - java.util.concurrent.ThreadLocalRandom::next@28 (line 198)
; - java.util.Random::nextInt@40 (line 311)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd071739: mov %r10,%r11
0x00007f34fd07173c: shr $0x11,%r11
0x00007f34fd071740: and $0x7fffffff,%r11
0x00007f34fd071747: mov %r11d,%ebx ;*l2i ; - java.util.concurrent.ThreadLocalRandom::next@44 (line 199)
; - java.util.Random::nextInt@40 (line 311)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd07174a: movslq %ebx,%r11
0x00007f34fd07174d: mov %ebx,%r8d
0x00007f34fd071750: sar $0x1f,%r8d
0x00007f34fd071754: imul $0x78787879,%r11,%r11
0x00007f34fd07175b: sar $0x23,%r11
0x00007f34fd07175f: mov %r11d,%r11d
0x00007f34fd071762: sub %r8d,%r11d
0x00007f34fd071765: mov %r11d,%r8d
0x00007f34fd071768: shl $0x4,%r8d
0x00007f34fd07176c: add %r11d,%r8d
0x00007f34fd07176f: mov %ebx,%r9d
0x00007f34fd071772: sub %r8d,%r9d ;*irem
; - java.util.Random::nextInt@46 (line 312)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd071775: sub %r9d,%ebx
0x00007f34fd071778: add $0x10,%ebx
0x00007f34fd07177b: mov $0xffffffffffff,%rdi
0x00007f34fd071785: and %rdi,%r10
0x00007f34fd071788: mov %r10,0xe8(%rcx) ;*invokevirtual putLong
; - java.util.concurrent.ThreadLocalRandom::next@35 (line 197)
; - java.util.Random::nextInt@40 (line 311)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd07178f: test %ebx,%ebx
0x00007f34fd071791: jl 0x00007f34fd0718a0 ;*iload_3
; - java.util.Random::nextInt@58 (line 314)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd071797: inc %r9d ;*iadd
; - org.sample.MyBenchmark::switch_calculation@9 (line 37)
0x00007f34fd07179a: cmp $0x9,%r9d
0x00007f34fd07179e: je 0x00007f34fd071888
0x00007f34fd0717a4: mov $0xa,%eax
0x00007f34fd0717a9: cmp $0x9,%r9d
0x00007f34fd0717ad: jg 0x00007f34fd071827
0x00007f34fd0717af: cmp $0x4,%r9d
0x00007f34fd0717b3: je 0x00007f34fd071820
; HERE
0x00007f34fd0717b5: cmp $0x4,%r9d
0x00007f34fd0717b9: jg 0x00007f34fd0717f2
0x00007f34fd0717bb: cmp $0x2,%r9d
0x00007f34fd0717bf: je 0x00007f34fd0717e8
; HERE
0x00007f34fd0717c1: cmp $0x2,%r9d
0x00007f34fd0717c5: jg 0x00007f34fd0717de
0x00007f34fd0717c7: cmp $0x1,%r9d
0x00007f34fd0717cb: jne 0x00007f34fd0717d7 ;*tableswitch
; - org.sample.MyBenchmark::switch_calculation@12 (line 39)
0x00007f34fd0717cd: mov $0x5,%eax
0x00007f34fd0717d2: jmpq 0x00007f34fd07188d ;*iconst_0
; - org.sample.MyBenchmark::switch_calculation@142 (line 58)
0x00007f34fd0717d7: xor %eax,%eax
0x00007f34fd0717d9: jmpq 0x00007f34fd07188d
0x00007f34fd0717de: mov $0x2,%eax
0x00007f34fd0717e3: jmpq 0x00007f34fd07188d
0x00007f34fd0717e8: mov $0x1,%eax
0x00007f34fd0717ed: jmpq 0x00007f34fd07188d
0x00007f34fd0717f2: cmp $0x7,%r9d
0x00007f34fd0717f6: je 0x00007f34fd07188d
; HERE
0x00007f34fd0717fc: cmp $0x7,%r9d
0x00007f34fd071800: jle 0x00007f34fd07180c
0x00007f34fd071802: mov $0xe,%eax
0x00007f34fd071807: jmpq 0x00007f34fd07188d
0x00007f34fd07180c: cmp $0x6,%r9d
0x00007f34fd071810: jne 0x00007f34fd071819
0x00007f34fd071812: mov $0x11,%eax
0x00007f34fd071817: jmp 0x00007f34fd07188d
0x00007f34fd071819: mov $0xb,%eax
0x00007f34fd07181e: jmp 0x00007f34fd07188d
0x00007f34fd071820: mov $0x10,%eax
0x00007f34fd071825: jmp 0x00007f34fd07188d
0x00007f34fd071827: cmp $0xe,%r9d
0x00007f34fd07182b: je 0x00007f34fd071881
; HERE
0x00007f34fd07182d: cmp $0xe,%r9d
0x00007f34fd071831: jle 0x00007f34fd07185a
0x00007f34fd071833: cmp $0x11,%r9d
0x00007f34fd071837: je 0x00007f34fd071853
; HERE
0x00007f34fd071839: cmp $0x11,%r9d
0x00007f34fd07183d: jg 0x00007f34fd0717d7
0x00007f34fd07183f: cmp $0x10,%r9d
0x00007f34fd071843: jne 0x00007f34fd07184c
0x00007f34fd071845: mov $0x7,%eax
0x00007f34fd07184a: jmp 0x00007f34fd07188d
0x00007f34fd07184c: mov $0x9,%eax
0x00007f34fd071851: jmp 0x00007f34fd07188d
0x00007f34fd071853: mov $0xd,%eax
0x00007f34fd071858: jmp 0x00007f34fd07188d
0x00007f34fd07185a: cmp $0xc,%r9d
0x00007f34fd07185e: je 0x00007f34fd07187a
; HERE
0x00007f34fd071860: cmp $0xc,%r9d
0x00007f34fd071864: jg 0x00007f34fd07188d
0x00007f34fd071866: cmp $0xb,%r9d
0x00007f34fd07186a: jne 0x00007f34fd071873 ;*tableswitch
; - org.sample.MyBenchmark::switch_calculation@12 (line 39)
0x00007f34fd07186c: mov $0x6,%eax
0x00007f34fd071871: jmp 0x00007f34fd07188d
0x00007f34fd071873: mov $0x4,%eax
0x00007f34fd071878: jmp 0x00007f34fd07188d
0x00007f34fd07187a: mov $0xf,%eax
0x00007f34fd07187f: jmp 0x00007f34fd07188d
0x00007f34fd071881: mov $0x8,%eax
0x00007f34fd071886: jmp 0x00007f34fd07188d
0x00007f34fd071888: mov $0x3,%eax ;*invokestatic currentThread
; - java.util.concurrent.ThreadLocalRandom::current@3 (line 177)
; - org.sample.MyBenchmark::switch_calculation@0 (line 37)
0x00007f34fd07188d: add $0x10,%rsp
0x00007f34fd071891: pop %rbp
0x00007f34fd071892: test %eax,0xbfb0768(%rip) # 0x00007f3509022000 ; {poll_return}
0x00007f34fd071898: retq
0x00007f34fd071899: nopl 0x0(%rax) ;*aload_0
; - java.util.Random::nextInt@37 (line 311)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd0718a0: mov $0x5deece66d,%r10
0x00007f34fd0718aa: imul 0xe8(%rcx),%r10
0x00007f34fd0718b2: add $0xb,%r10 ;*ladd
; - java.util.concurrent.ThreadLocalRandom::next@28 (line 198)
; - java.util.Random::nextInt@40 (line 311)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd0718b6: mov %r10,%r11
0x00007f34fd0718b9: and %rdi,%r11
0x00007f34fd0718bc: mov %r11,0xe8(%rcx) ;*iflt
; - java.util.Random::nextInt@55 (line 313)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd0718c3: shr $0x11,%r10
0x00007f34fd0718c7: and $0x7fffffff,%r10
0x00007f34fd0718ce: mov %r10d,%r11d ;*l2i ; - java.util.concurrent.ThreadLocalRandom::next@44 (line 199)
; - java.util.Random::nextInt@40 (line 311)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd0718d1: movslq %r11d,%r10
0x00007f34fd0718d4: mov %r11d,%r8d
0x00007f34fd0718d7: sar $0x1f,%r8d
0x00007f34fd0718db: imul $0x78787879,%r10,%r10
0x00007f34fd0718e2: sar $0x23,%r10
0x00007f34fd0718e6: mov %r10d,%r10d
0x00007f34fd0718e9: sub %r8d,%r10d
0x00007f34fd0718ec: mov %r10d,%ebx
0x00007f34fd0718ef: shl $0x4,%ebx
0x00007f34fd0718f2: add %r10d,%ebx
0x00007f34fd0718f5: mov %r11d,%r9d
0x00007f34fd0718f8: sub %ebx,%r9d ;*irem
; - java.util.Random::nextInt@46 (line 312)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd0718fb: sub %r9d,%r11d
0x00007f34fd0718fe: add $0x10,%r11d ; OopMap{rcx=Oop off=546}
;*iflt
; - java.util.Random::nextInt@55 (line 313)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd071902: test %eax,0xbfb06f8(%rip) # 0x00007f3509022000
; {poll}
0x00007f34fd071908: test %r11d,%r11d
0x00007f34fd07190b: jl 0x00007f34fd0718a0 ;*iflt
; - java.util.Random::nextInt@55 (line 313)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd07190d: jmpq 0x00007f34fd071797
0x00007f34fd071912: nop
0x00007f34fd071913: callq 0x00007f34fd037f60 ; OopMap{off=568}
;*invokestatic localInit
; - java.util.concurrent.ThreadLocalRandom::current@15 (line 178)
; - org.sample.MyBenchmark::switch_calculation@0 (line 37)
; {static_call}
0x00007f34fd071918: mov 0x1b8(%r15),%rcx ;*invokestatic currentThread
; - java.util.concurrent.ThreadLocalRandom::next@3 (line 197)
; - java.util.Random::nextInt@40 (line 311)
; - org.sample.MyBenchmark::switch_calculation@5 (line 37)
0x00007f34fd07191f: jmpq 0x00007f34fd071723 ;*invokestatic localInit
; - java.util.concurrent.ThreadLocalRandom::current@15 (line 178)
; - org.sample.MyBenchmark::switch_calculation@0 (line 37)
0x00007f34fd071924: mov %rax,%rsi
0x00007f34fd071927: add $0x10,%rsp
0x00007f34fd07192b: pop %rbp
0x00007f34fd07192c: jmpq 0x00007f34fd0638a0 ; {runtime_call}
This has only been tested for x86, SPARC might also suffer from this.