Function: _Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1D ... | Module: exec | Source: pack_kernel.cpp:156-160 [...] | Coverage: 0.01% |
---|
Function: _Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1D ... | Module: exec | Source: pack_kernel.cpp:156-160 [...] | Coverage: 0.01% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/pack_kernel.cpp: 156 - 160 |
-------------------------------------------------------------------------------- |
156: #pragma omp parallel for simd |
157: for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { |
158: for (int j = 0; j < depth; ++j) { |
159: int index = buffer_offset + j + k * depth; |
160: field(x_max + x_inc + j + 2, k) = right_rcv[index]; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x4389c0 PUSH %RBP |
0x4389c1 MOV %RSP,%RBP |
0x4389c4 PUSH %R15 |
0x4389c6 PUSH %R14 |
0x4389c8 PUSH %R13 |
0x4389ca PUSH %R12 |
0x4389cc MOV %RDI,%R12 |
0x4389cf PUSH %RBX |
0x4389d0 AND $-0x40,%RSP |
0x4389d4 ADD $-0x80,%RSP |
0x4389d8 MOV 0x1c(%RDI),%EBX |
0x4389db MOV 0x14(%RDI),%R14D |
0x4389df CALL 4046c0 <omp_get_num_threads@plt> |
0x4389e4 MOV %EAX,%R13D |
0x4389e7 SUB %EBX,%R14D |
0x4389ea CALL 4045b0 <omp_get_thread_num@plt> |
0x4389ef INC %R14D |
0x4389f2 MOV %EAX,%ECX |
0x4389f4 MOV 0x18(%R12),%EAX |
0x4389f9 ADD 0x28(%R12),%EAX |
0x4389fe LEA 0x2(%RBX,%RAX,1),%EAX |
0x438a02 SUB %R14D,%EAX |
0x438a05 CLTD |
0x438a06 IDIV %R13D |
0x438a09 CMP %EDX,%ECX |
0x438a0b JL 438e93 |
0x438a11 IMUL %EAX,%ECX |
0x438a14 ADD %ECX,%EDX |
0x438a16 ADD %EDX,%EAX |
0x438a18 CMP %EAX,%EDX |
0x438a1a JGE 438c5a |
0x438a20 MOVSXD 0x20(%R12),%R10 |
0x438a25 ADD %R14D,%EAX |
0x438a28 ADD %R14D,%EDX |
0x438a2b MOVSXD 0x24(%R12),%R15 |
0x438a30 MOVSXD 0x10(%R12),%RSI |
0x438a35 MOV 0x8(%R12),%RDI |
0x438a3a MOV %EAX,0x7c(%RSP) |
0x438a3e MOV %R10D,0x40(%RSP) |
0x438a43 MOV (%R12),%R14 |
0x438a47 TEST %EBX,%EBX |
0x438a49 JLE 438c5a |
0x438a4f LEA -0x1(%RBX),%EAX |
0x438a52 MOV %EBX,%R12D |
0x438a55 MOVSXD %EBX,%R11 |
0x438a58 MOV 0x8(%RDI),%R9 |
0x438a5c IMUL %EDX,%R12D |
0x438a60 MOV %EAX,0x78(%RSP) |
0x438a64 MOV %EBX,%EDI |
0x438a66 LEA (%R15,%RSI,1),%EAX |
0x438a6a LEA (%RSI,%R15,1),%R15 |
0x438a6e MOV %EBX,%ESI |
0x438a70 MOV (%R14),%RCX |
0x438a73 MOV %R11,0x68(%RSP) |
0x438a78 AND $0x7,%ESI |
0x438a7b LEA (,%R11,8),%R13 |
0x438a83 SHR $0x3,%EDI |
0x438a86 MOV 0x10(%R14),%R11 |
0x438a8a MOV %EBX,%R14D |
0x438a8d MOV %EAX,0x3c(%RSP) |
0x438a91 SAL $0x6,%RDI |
0x438a95 CLTQ |
0x438a97 MOV %ESI,0x50(%RSP) |
0x438a9b AND $-0x8,%R14D |
0x438a9f DEC %ESI |
0x438aa1 MOVSXD %R12D,%R8 |
0x438aa4 MOV %RCX,0x70(%RSP) |
0x438aa9 ADD %R8,%R10 |
0x438aac MOVSXD %EDX,%RDX |
0x438aaf MOV %RDI,0x60(%RSP) |
0x438ab4 LEA (,%R10,8),%R8 |
0x438abc MOV %R14D,0x54(%RSP) |
0x438ac1 MOV %RAX,0x48(%RSP) |
0x438ac6 MOV %R15,0x58(%RSP) |
0x438acb MOV %ESI,0x44(%RSP) |
0x438acf NOP |
(200) 0x438ad0 MOV 0x70(%RSP),%RDI |
(200) 0x438ad5 MOV 0x78(%RSP),%R15D |
(200) 0x438ada IMUL %RDX,%RDI |
(200) 0x438ade CMP $0x2,%R15D |
(200) 0x438ae2 JBE 438b07 |
(200) 0x438ae4 MOV 0x58(%RSP),%RCX |
(200) 0x438ae9 LEA 0x8(%R9,%R8,1),%RSI |
(200) 0x438aee LEA 0x2(%RDI,%RCX,1),%R14 |
(200) 0x438af3 LEA (%R11,%R14,8),%RCX |
(200) 0x438af7 MOV %RCX,%RAX |
(200) 0x438afa SUB %RSI,%RAX |
(200) 0x438afd CMP $0x30,%RAX |
(200) 0x438b01 JA 438c70 |
(200) 0x438b07 MOV 0x48(%RSP),%R15 |
(200) 0x438b0c XOR %ESI,%ESI |
(200) 0x438b0e LEA (%R9,%R8,1),%R14 |
(200) 0x438b12 ADD %R15,%RDI |
(200) 0x438b15 LEA (%R11,%RDI,8),%RCX |
(200) 0x438b19 LEA -0x8(%R13),%RDI |
(200) 0x438b1d SHR $0x3,%RDI |
(200) 0x438b21 INC %RDI |
(200) 0x438b24 AND $0x7,%EDI |
(200) 0x438b27 JE 438bc5 |
(200) 0x438b2d CMP $0x1,%RDI |
(200) 0x438b31 JE 438bb0 |
(200) 0x438b33 CMP $0x2,%RDI |
(200) 0x438b37 JE 438ba0 |
(200) 0x438b39 CMP $0x3,%RDI |
(200) 0x438b3d JE 438b90 |
(200) 0x438b3f CMP $0x4,%RDI |
(200) 0x438b43 JE 438b80 |
(200) 0x438b45 CMP $0x5,%RDI |
(200) 0x438b49 JE 438b70 |
(200) 0x438b4b CMP $0x6,%RDI |
(200) 0x438b4f JE 438b60 |
(200) 0x438b51 VMOVSD (%R14),%XMM7 |
(200) 0x438b56 MOV $0x8,%ESI |
(200) 0x438b5b VMOVSD %XMM7,0x10(%RCX) |
(200) 0x438b60 VMOVSD (%R14,%RSI,1),%XMM6 |
(200) 0x438b66 VMOVSD %XMM6,0x10(%RCX,%RSI,1) |
(200) 0x438b6c ADD $0x8,%RSI |
(200) 0x438b70 VMOVSD (%R14,%RSI,1),%XMM3 |
(200) 0x438b76 VMOVSD %XMM3,0x10(%RCX,%RSI,1) |
(200) 0x438b7c ADD $0x8,%RSI |
(200) 0x438b80 VMOVSD (%R14,%RSI,1),%XMM4 |
(200) 0x438b86 VMOVSD %XMM4,0x10(%RCX,%RSI,1) |
(200) 0x438b8c ADD $0x8,%RSI |
(200) 0x438b90 VMOVSD (%R14,%RSI,1),%XMM5 |
(200) 0x438b96 VMOVSD %XMM5,0x10(%RCX,%RSI,1) |
(200) 0x438b9c ADD $0x8,%RSI |
(200) 0x438ba0 VMOVSD (%R14,%RSI,1),%XMM8 |
(200) 0x438ba6 VMOVSD %XMM8,0x10(%RCX,%RSI,1) |
(200) 0x438bac ADD $0x8,%RSI |
(200) 0x438bb0 VMOVSD (%R14,%RSI,1),%XMM9 |
(200) 0x438bb6 VMOVSD %XMM9,0x10(%RCX,%RSI,1) |
(200) 0x438bbc ADD $0x8,%RSI |
(200) 0x438bc0 CMP %R13,%RSI |
(200) 0x438bc3 JE 438c38 |
(199) 0x438bc5 VMOVSD (%R14,%RSI,1),%XMM10 |
(199) 0x438bcb VMOVSD %XMM10,0x10(%RCX,%RSI,1) |
(199) 0x438bd1 VMOVSD 0x8(%RSI,%R14,1),%XMM11 |
(199) 0x438bd8 VMOVSD %XMM11,0x18(%RCX,%RSI,1) |
(199) 0x438bde VMOVSD 0x10(%RSI,%R14,1),%XMM12 |
(199) 0x438be5 VMOVSD %XMM12,0x20(%RCX,%RSI,1) |
(199) 0x438beb VMOVSD 0x18(%RSI,%R14,1),%XMM13 |
(199) 0x438bf2 VMOVSD %XMM13,0x28(%RCX,%RSI,1) |
(199) 0x438bf8 VMOVSD 0x20(%RSI,%R14,1),%XMM14 |
(199) 0x438bff VMOVSD %XMM14,0x30(%RCX,%RSI,1) |
(199) 0x438c05 VMOVSD 0x28(%RSI,%R14,1),%XMM15 |
(199) 0x438c0c VMOVSD %XMM15,0x38(%RCX,%RSI,1) |
(199) 0x438c12 VMOVSD 0x30(%RSI,%R14,1),%XMM1 |
(199) 0x438c19 VMOVSD %XMM1,0x40(%RCX,%RSI,1) |
(199) 0x438c1f VMOVSD 0x38(%RSI,%R14,1),%XMM0 |
(199) 0x438c26 VMOVSD %XMM0,0x48(%RCX,%RSI,1) |
(199) 0x438c2c ADD $0x40,%RSI |
(199) 0x438c30 CMP %R13,%RSI |
(199) 0x438c33 JNE 438bc5 |
(200) 0x438c35 NOPL (%RAX) |
(200) 0x438c38 MOV 0x68(%RSP),%RAX |
(200) 0x438c3d INC %RDX |
(200) 0x438c40 ADD %EBX,%R12D |
(200) 0x438c43 ADD %R13,%R8 |
(200) 0x438c46 LEA (%RDX),%R15D |
(200) 0x438c49 ADD %RAX,%R10 |
(200) 0x438c4c CMP %R15D,0x7c(%RSP) |
(200) 0x438c51 JG 438ad0 |
0x438c57 VZEROUPPER |
0x438c5a LEA -0x28(%RBP),%RSP |
0x438c5e POP %RBX |
0x438c5f POP %R12 |
0x438c61 POP %R13 |
0x438c63 POP %R14 |
0x438c65 POP %R15 |
0x438c67 POP %RBP |
0x438c68 RET |
0x438c69 NOPL (%RAX) |
(200) 0x438c70 CMP $0x6,%R15D |
(200) 0x438c74 JBE 438eb2 |
(200) 0x438c7a MOV 0x60(%RSP),%RSI |
(200) 0x438c7f LEA (%R9,%R8,1),%R15 |
(200) 0x438c83 XOR %EAX,%EAX |
(200) 0x438c85 SUB $0x40,%RSI |
(200) 0x438c89 SHR $0x6,%RSI |
(200) 0x438c8d INC %RSI |
(200) 0x438c90 AND $0x7,%ESI |
(200) 0x438c93 JE 438d38 |
(200) 0x438c99 CMP $0x1,%RSI |
(200) 0x438c9d JE 438d1b |
(200) 0x438c9f CMP $0x2,%RSI |
(200) 0x438ca3 JE 438d09 |
(200) 0x438ca5 CMP $0x3,%RSI |
(200) 0x438ca9 JE 438cf7 |
(200) 0x438cab CMP $0x4,%RSI |
(200) 0x438caf JE 438ce5 |
(200) 0x438cb1 CMP $0x5,%RSI |
(200) 0x438cb5 JE 438cd3 |
(200) 0x438cb7 CMP $0x6,%RSI |
(200) 0x438cbb JNE 438e9c |
(200) 0x438cc1 VMOVUPD (%R15,%RAX,1),%ZMM0 |
(200) 0x438cc8 VMOVUPD %ZMM0,(%RCX,%RAX,1) |
(200) 0x438ccf ADD $0x40,%RAX |
(200) 0x438cd3 VMOVUPD (%R15,%RAX,1),%ZMM2 |
(200) 0x438cda VMOVUPD %ZMM2,(%RCX,%RAX,1) |
(200) 0x438ce1 ADD $0x40,%RAX |
(200) 0x438ce5 VMOVUPD (%R15,%RAX,1),%ZMM7 |
(200) 0x438cec VMOVUPD %ZMM7,(%RCX,%RAX,1) |
(200) 0x438cf3 ADD $0x40,%RAX |
(200) 0x438cf7 VMOVUPD (%R15,%RAX,1),%ZMM6 |
(200) 0x438cfe VMOVUPD %ZMM6,(%RCX,%RAX,1) |
(200) 0x438d05 ADD $0x40,%RAX |
(200) 0x438d09 VMOVUPD (%R15,%RAX,1),%ZMM3 |
(200) 0x438d10 VMOVUPD %ZMM3,(%RCX,%RAX,1) |
(200) 0x438d17 ADD $0x40,%RAX |
(200) 0x438d1b VMOVUPD (%R15,%RAX,1),%ZMM4 |
(200) 0x438d22 VMOVUPD %ZMM4,(%RCX,%RAX,1) |
(200) 0x438d29 ADD $0x40,%RAX |
(200) 0x438d2d CMP %RAX,0x60(%RSP) |
(200) 0x438d32 JE 438dc7 |
(201) 0x438d38 VMOVUPD (%R15,%RAX,1),%ZMM5 |
(201) 0x438d3f VMOVUPD %ZMM5,(%RCX,%RAX,1) |
(201) 0x438d46 VMOVUPD 0x40(%R15,%RAX,1),%ZMM8 |
(201) 0x438d4e VMOVUPD %ZMM8,0x40(%RCX,%RAX,1) |
(201) 0x438d56 VMOVUPD 0x80(%R15,%RAX,1),%ZMM9 |
(201) 0x438d5e VMOVUPD %ZMM9,0x80(%RCX,%RAX,1) |
(201) 0x438d66 VMOVUPD 0xc0(%R15,%RAX,1),%ZMM10 |
(201) 0x438d6e VMOVUPD %ZMM10,0xc0(%RCX,%RAX,1) |
(201) 0x438d76 VMOVUPD 0x100(%R15,%RAX,1),%ZMM11 |
(201) 0x438d7e VMOVUPD %ZMM11,0x100(%RCX,%RAX,1) |
(201) 0x438d86 VMOVUPD 0x140(%R15,%RAX,1),%ZMM12 |
(201) 0x438d8e VMOVUPD %ZMM12,0x140(%RCX,%RAX,1) |
(201) 0x438d96 VMOVUPD 0x180(%R15,%RAX,1),%ZMM13 |
(201) 0x438d9e VMOVUPD %ZMM13,0x180(%RCX,%RAX,1) |
(201) 0x438da6 VMOVUPD 0x1c0(%R15,%RAX,1),%ZMM14 |
(201) 0x438dae VMOVUPD %ZMM14,0x1c0(%RCX,%RAX,1) |
(201) 0x438db6 ADD $0x200,%RAX |
(201) 0x438dbc CMP %RAX,0x60(%RSP) |
(201) 0x438dc1 JNE 438d38 |
(200) 0x438dc7 CMP %EBX,0x54(%RSP) |
(200) 0x438dcb JE 438c38 |
(200) 0x438dd1 CMPL $0x2,0x44(%RSP) |
(200) 0x438dd6 MOV 0x50(%RSP),%ECX |
(200) 0x438dda JBE 438ebe |
(200) 0x438de0 MOV 0x54(%RSP),%R15D |
(200) 0x438de5 MOV %R15D,%EAX |
(200) 0x438de8 LEA (%R10,%RAX,1),%RSI |
(200) 0x438dec ADD %R14,%RAX |
(200) 0x438def VMOVUPD (%R9,%RSI,8),%YMM15 |
(200) 0x438df5 VMOVUPD %YMM15,(%R11,%RAX,8) |
(200) 0x438dfb MOV %ECX,%EAX |
(200) 0x438dfd AND $-0x4,%EAX |
(200) 0x438e00 ADD %R15D,%EAX |
(200) 0x438e03 AND $0x3,%ECX |
(200) 0x438e06 JE 438c38 |
(200) 0x438e0c MOV 0x40(%RSP),%R14D |
(200) 0x438e11 LEA (%R14,%RAX,1),%ECX |
(200) 0x438e15 ADD %R12D,%ECX |
(200) 0x438e18 MOVSXD %ECX,%R15 |
(200) 0x438e1b VMOVSD (%R9,%R15,8),%XMM1 |
(200) 0x438e21 MOV 0x3c(%RSP),%R15D |
(200) 0x438e26 LEA 0x2(%RAX,%R15,1),%ESI |
(200) 0x438e2b MOVSXD %ESI,%RCX |
(200) 0x438e2e LEA 0x1(%RAX),%ESI |
(200) 0x438e31 ADD %RDI,%RCX |
(200) 0x438e34 VMOVSD %XMM1,(%R11,%RCX,8) |
(200) 0x438e3a CMP %ESI,%EBX |
(200) 0x438e3c JLE 438c38 |
(200) 0x438e42 LEA (%R14,%RSI,1),%ECX |
(200) 0x438e46 LEA 0x2(%RSI,%R15,1),%ESI |
(200) 0x438e4b ADD $0x2,%EAX |
(200) 0x438e4e ADD %R12D,%ECX |
(200) 0x438e51 MOVSXD %ECX,%RCX |
(200) 0x438e54 VMOVSD (%R9,%RCX,8),%XMM0 |
(200) 0x438e5a MOVSXD %ESI,%RCX |
(200) 0x438e5d ADD %RDI,%RCX |
(200) 0x438e60 VMOVSD %XMM0,(%R11,%RCX,8) |
(200) 0x438e66 CMP %EAX,%EBX |
(200) 0x438e68 JLE 438c38 |
(200) 0x438e6e LEA (%R14,%RAX,1),%R14D |
(200) 0x438e72 LEA 0x2(%RAX,%R15,1),%EAX |
(200) 0x438e77 ADD %R12D,%R14D |
(200) 0x438e7a CLTQ |
(200) 0x438e7c MOVSXD %R14D,%RSI |
(200) 0x438e7f ADD %RDI,%RAX |
(200) 0x438e82 VMOVSD (%R9,%RSI,8),%XMM2 |
(200) 0x438e88 VMOVSD %XMM2,(%R11,%RAX,8) |
(200) 0x438e8e JMP 438c38 |
0x438e93 INC %EAX |
0x438e95 XOR %EDX,%EDX |
0x438e97 JMP 438a11 |
(200) 0x438e9c VMOVUPD (%R15),%ZMM1 |
(200) 0x438ea2 MOV $0x40,%EAX |
(200) 0x438ea7 VMOVUPD %ZMM1,(%RCX) |
(200) 0x438ead JMP 438cc1 |
(200) 0x438eb2 MOV %EBX,%ECX |
(200) 0x438eb4 XOR %EAX,%EAX |
(200) 0x438eb6 XOR %R15D,%R15D |
(200) 0x438eb9 JMP 438de8 |
(200) 0x438ebe MOV 0x54(%RSP),%EAX |
(200) 0x438ec2 JMP 438e0c |
0x438ec7 NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | pack_kernel.cpp:156-160 |
Module | exec |
nb instructions | 90 |
nb uops | 96 |
loop length | 315 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 16.00 cycles |
front end | 16.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.30 | 7.20 | 6.33 | 6.33 | 10.00 | 7.20 | 7.10 | 10.00 | 10.00 | 10.00 | 7.20 | 6.33 |
cycles | 7.30 | 11.30 | 6.33 | 6.33 | 10.00 | 7.20 | 7.10 | 10.00 | 10.00 | 10.00 | 7.20 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.50-15.51 |
Stall cycles | 0.00 |
Front-end | 16.00 |
Dispatch | 11.30 |
DIV/SQRT | 6.00 |
Overall L1 | 16.00 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 9% |
load | 12% |
store | 8% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x28(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 438e93 <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x4d3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 438c5a <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x29a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x20(%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x24(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 438c5a <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x29a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RBX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %EBX,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x8(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EDX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R15,%RSI,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RSI,%R15,1),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $0x7,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (,%R11,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x10(%R14),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EBX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EAX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %ESI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R12D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R8,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R10,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 438a11 <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | pack_kernel.cpp:156-160 |
Module | exec |
nb instructions | 90 |
nb uops | 96 |
loop length | 315 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 16.00 cycles |
front end | 16.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.30 | 7.20 | 6.33 | 6.33 | 10.00 | 7.20 | 7.10 | 10.00 | 10.00 | 10.00 | 7.20 | 6.33 |
cycles | 7.30 | 11.30 | 6.33 | 6.33 | 10.00 | 7.20 | 7.10 | 10.00 | 10.00 | 10.00 | 7.20 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.50-15.51 |
Stall cycles | 0.00 |
Front-end | 16.00 |
Dispatch | 11.30 |
DIV/SQRT | 6.00 |
Overall L1 | 16.00 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 9% |
load | 12% |
store | 8% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x28(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 438e93 <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x4d3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 438c5a <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x29a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x20(%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x24(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 438c5a <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x29a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RBX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %EBX,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x8(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EDX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R15,%RSI,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RSI,%R15,1),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $0x7,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (,%R11,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x3,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x10(%R14),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EBX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EAX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %ESI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R12D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R8,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R10,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14D,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 438a11 <_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z27clover_unpack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0– | 0.01 | 0.01 |
▼Loop 200 - pack_kernel.cpp:156-160 - exec– | 0.01 | 0.02 |
○Loop 201 - pack_kernel.cpp:158-160 - exec | 0 | 0 |
○Loop 199 - pack_kernel.cpp:158-160 - exec | 0 | 0 |