```
// opto/vectornode.hpp
// Vector left shift count
class LShiftCntVNode : public VectorNode {
public:
LShiftCntVNode(Node* cnt, const TypeVect* vt) : VectorNode(cnt,vt) {}
virtual int Opcode() const;
};
//------------------------------RShiftCntVNode---------------------------------
// Vector right shift count
class RShiftCntVNode : public VectorNode {
public:
RShiftCntVNode(Node* cnt, const TypeVect* vt) : VectorNode(cnt,vt) {}
virtual int Opcode() const;
};
```
The vector shift count was defined by two separate nodes, but they have the same behavior actually.
// test case
public static void test_shiftv(int sh) {
for (int i = 0; i < N; i+=1) {
a0[i] = a1[i] << sh;
b0[i] = b1[i] >> sh;
}
}
// c2 generated assembly
0x0000ffffa9105f88: dup v16.16b, w12
0x0000ffffa9105f8c: add w18, w20, #0xf
0x0000ffffa9105f90: cmp w20, w10
...
0x0000ffffa9105ff0: dup v17.16b, w12 // duplicated with "dup v16.16b, w12"
...
0x0000ffffa910606c: ldr q18, [x18, #16]
0x0000ffffa9106070: add x12, x14, x12
0x0000ffffa9106074: neg v19.16b, v17.16b
0x0000ffffa9106078: sshl v18.4s, v18.4s, v19.4s // b0[i] = b1[i] >> sh;
0x0000ffffa910607c: str q18, [x12, #16]
0x0000ffffa9106080: ldr q18, [x13, #32]
0x0000ffffa9106084: sshl v18.4s, v18.4s, v16.4s
0x0000ffffa9106088: str q18, [x4, #32] // a0[i] = a1[i] << sh;
0x0000ffffa910608c: ldr q18, [x18, #32]
0x0000ffffa9106090: neg v19.16b, v17.16b
0x0000ffffa9106094: sshl v18.4s, v18.4s, v19.4s
0x0000ffffa9106098: str q18, [x12, #32] // b0[i] = b1[i] >> sh;
0x0000ffffa910609c: ldr q18, [x13, #48]
0x0000ffffa91060a0: sshl v18.4s, v18.4s, v16.4s
0x0000ffffa91060a4: str q18, [x4, #48] // a0[i] = a1[i] << sh;
By merging them into a single node, the final code could reduce one extra ���dup���, so that saves one register.