Relates :
|
For JDK8 & JDK9, there two redundant lsr instructions in jbyte_arraycopy and jbyte_disjoint_arraycopy stub as shown in the following code snippet: StubRoutines::jbyte_disjoint_arraycopy [0x0000007f7c0bef80, 0x0000007f7c0bf020[ (160 bytes) 0x0000007f7c0bef80: stp x29, x30, [sp,#-16]! 0x0000007f7c0bef84: mov x29, sp 0x0000007f7c0bef88: cmp x2, #0x10 0x0000007f7c0bef8c: b.cc Stub::jbyte_disjoint_arraycopy+100 0x0x7f7c0befe4 0x0000007f7c0bef90: neg x9, x0 0x0000007f7c0bef94: and x9, x9, #0xf 0x0000007f7c0bef98: cbz x9, Stub::jbyte_disjoint_arraycopy+92 0x0x7f7c0befdc 0x0000007f7c0bef9c: lsr x9, x9, #0 0x0000007f7c0befa0: sub x2, x2, x9 0x0000007f7c0befa4: tbz w9, #3, Stub::jbyte_disjoint_arraycopy+48 0x0x7f7c0befb0 0x0000007f7c0befa8: ldr x8, [x0],#8 0x0000007f7c0befac: str x8, [x1],#8 0x0000007f7c0befb0: tbz w9, #2, Stub::jbyte_disjoint_arraycopy+60 0x0x7f7c0befbc 0x0000007f7c0befb4: ldr w8, [x0],#4 0x0000007f7c0befb8: str w8, [x1],#4 0x0000007f7c0befbc: tbz w9, #1, Stub::jbyte_disjoint_arraycopy+72 0x0x7f7c0befc8 0x0000007f7c0befc0: ldrh w8, [x0],#2 0x0000007f7c0befc4: strh w8, [x1],#2 0x0000007f7c0befc8: tbz w9, #0, Stub::jbyte_disjoint_arraycopy+84 0x0x7f7c0befd4 0x0000007f7c0befcc: ldrb w8, [x0],#1 0x0000007f7c0befd0: strb w8, [x1],#1 ........ A proposed patch to fix the issue: diff -r 119702fc4dea src/cpu/aarch64/vm/stubGenerator_aarch64.cpp --- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Tue Jan 26 17:13:18 2016 +0100 +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Wed Jan 27 12:11:24 2016 +0800 @@ -954,7 +954,7 @@ Register count, Register tmp, int step) { copy_direction direction = step < 0 ? copy_backwards : copy_forwards; bool is_backwards = step < 0; - int granularity = uabs(step); + int shift, granularity = uabs(step); const Register t0 = r3, t1 = r4; if (is_backwards) { @@ -962,7 +962,7 @@ __ lea(d, Address(d, count, Address::lsl(exact_log2(-step)))); } - Label done, tail; + Label tail; __ cmp(count, 16/granularity); __ br(Assembler::LO, tail); @@ -985,9 +985,12 @@ __ neg(rscratch2, s); __ andr(rscratch2, rscratch2, 2 * wordSize - 1); } + shift = exact_log2(granularity); // rscratch2 is the byte adjustment needed to align s. __ cbz(rscratch2, aligned); - __ lsr(rscratch2, rscratch2, exact_log2(granularity)); + if (shift > 0) { + __ lsr(rscratch2, rscratch2, shift); + } __ sub(count, count, rscratch2); #if 0