Function: _Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdE ... | Module: exec | Source: pack_kernel.cpp:55-59 [...] | Coverage: 0.02% |
---|
Function: _Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdE ... | Module: exec | Source: pack_kernel.cpp:55-59 [...] | Coverage: 0.02% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/pack_kernel.cpp: 55 - 59 |
-------------------------------------------------------------------------------- |
55: #pragma omp parallel for simd |
56: for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { |
57: for (int j = 0; j < depth; ++j) { |
58: int index = buffer_offset + j + k * depth; |
59: left_snd[index] = field(x_min + x_inc - 1 + j + 2, k); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x43bde0 PUSH %RBP |
0x43bde1 MOV %RSP,%RBP |
0x43bde4 PUSH %R15 |
0x43bde6 PUSH %R14 |
0x43bde8 PUSH %R13 |
0x43bdea PUSH %R12 |
0x43bdec MOV %RDI,%R12 |
0x43bdef PUSH %RBX |
0x43bdf0 AND $-0x40,%RSP |
0x43bdf4 ADD $-0x80,%RSP |
0x43bdf8 MOV 0x1c(%RDI),%EBX |
0x43bdfb MOV 0x14(%RDI),%R14D |
0x43bdff CALL 4046c0 <omp_get_num_threads@plt> |
0x43be04 MOV %EAX,%R13D |
0x43be07 SUB %EBX,%R14D |
0x43be0a CALL 4045b0 <omp_get_thread_num@plt> |
0x43be0f INC %R14D |
0x43be12 MOV %EAX,%ECX |
0x43be14 MOV 0x18(%R12),%EAX |
0x43be19 ADD 0x28(%R12),%EAX |
0x43be1e LEA 0x2(%RBX,%RAX,1),%EAX |
0x43be22 SUB %R14D,%EAX |
0x43be25 CLTD |
0x43be26 IDIV %R13D |
0x43be29 CMP %EDX,%ECX |
0x43be2b JL 43c2b6 |
0x43be31 IMUL %EAX,%ECX |
0x43be34 ADD %EDX,%ECX |
0x43be36 ADD %ECX,%EAX |
0x43be38 CMP %EAX,%ECX |
0x43be3a JGE 43c080 |
0x43be40 MOV 0x20(%R12),%R10D |
0x43be45 ADD %R14D,%EAX |
0x43be48 MOVSXD 0x24(%R12),%R15 |
0x43be4d ADD %R14D,%ECX |
0x43be50 MOVSXD 0x10(%R12),%RSI |
0x43be55 MOV (%R12),%RDI |
0x43be59 MOV %EAX,0x78(%RSP) |
0x43be5d MOV %R10D,0x40(%RSP) |
0x43be62 MOV 0x8(%R12),%R13 |
0x43be67 TEST %EBX,%EBX |
0x43be69 JLE 43c080 |
0x43be6f MOV %EBX,%R12D |
0x43be72 MOV (%RDI),%RAX |
0x43be75 MOVSXD %EBX,%R11 |
0x43be78 MOVSXD %R10D,%R8 |
0x43be7b IMUL %ECX,%R12D |
0x43be7f MOV %R11,0x68(%RSP) |
0x43be84 MOV 0x10(%RDI),%R9 |
0x43be88 LEA (,%R11,8),%R14 |
0x43be90 MOV %RAX,0x70(%RSP) |
0x43be95 MOV %EBX,%EDI |
0x43be97 LEA (%R15,%RSI,1),%EAX |
0x43be9b LEA (%RSI,%R15,1),%R15 |
0x43be9f MOV %EBX,%ESI |
0x43bea1 LEA -0x1(%RBX),%EDX |
0x43bea4 SHR $0x3,%EDI |
0x43bea7 MOV %R15,0x58(%RSP) |
0x43beac MOVSXD %R12D,%R11 |
0x43beaf AND $0x7,%ESI |
0x43beb2 MOV %EDX,0x7c(%RSP) |
0x43beb6 SAL $0x6,%RDI |
0x43beba ADD %R8,%R11 |
0x43bebd MOV %EBX,%R8D |
0x43bec0 MOVSXD %EAX,%RDX |
0x43bec3 MOV %ESI,0x50(%RSP) |
0x43bec7 AND $-0x8,%R8D |
0x43becb DEC %EAX |
0x43becd DEC %ESI |
0x43becf MOV %RDI,0x60(%RSP) |
0x43bed4 MOV %R8D,0x54(%RSP) |
0x43bed9 MOV 0x8(%R13),%R13 |
0x43bedd LEA (,%R11,8),%R10 |
0x43bee5 MOVSXD %ECX,%RCX |
0x43bee8 MOV %RDX,0x48(%RSP) |
0x43beed MOV %EAX,0x3c(%RSP) |
0x43bef1 MOV %ESI,0x44(%RSP) |
0x43bef5 NOPL (%RAX) |
(216) 0x43bef8 MOV 0x70(%RSP),%RDI |
(216) 0x43befd LEA (%R13,%R10,1),%RAX |
(216) 0x43bf02 IMUL %RCX,%RDI |
(216) 0x43bf06 CMPL $0x2,0x7c(%RSP) |
(216) 0x43bf0b JBE 43bf32 |
(216) 0x43bf0d MOV 0x58(%RSP),%R8 |
(216) 0x43bf12 MOV %RAX,%RSI |
(216) 0x43bf15 LEA (%R8,%RDI,1),%R15 |
(216) 0x43bf19 LEA 0x10(,%R15,8),%RDX |
(216) 0x43bf21 LEA (%R9,%RDX,1),%R8 |
(216) 0x43bf25 SUB %R8,%RSI |
(216) 0x43bf28 CMP $0x30,%RSI |
(216) 0x43bf2c JA 43c090 |
(216) 0x43bf32 MOV 0x48(%RSP),%R8 |
(216) 0x43bf37 XOR %EDX,%EDX |
(216) 0x43bf39 ADD %R8,%RDI |
(216) 0x43bf3c LEA (%R9,%RDI,8),%R15 |
(216) 0x43bf40 LEA -0x8(%R14),%RDI |
(216) 0x43bf44 SHR $0x3,%RDI |
(216) 0x43bf48 INC %RDI |
(216) 0x43bf4b AND $0x7,%EDI |
(216) 0x43bf4e JE 43bfec |
(216) 0x43bf54 CMP $0x1,%RDI |
(216) 0x43bf58 JE 43bfd7 |
(216) 0x43bf5a CMP $0x2,%RDI |
(216) 0x43bf5e JE 43bfc7 |
(216) 0x43bf60 CMP $0x3,%RDI |
(216) 0x43bf64 JE 43bfb7 |
(216) 0x43bf66 CMP $0x4,%RDI |
(216) 0x43bf6a JE 43bfa7 |
(216) 0x43bf6c CMP $0x5,%RDI |
(216) 0x43bf70 JE 43bf97 |
(216) 0x43bf72 CMP $0x6,%RDI |
(216) 0x43bf76 JE 43bf87 |
(216) 0x43bf78 VMOVSD 0x8(%R15),%XMM7 |
(216) 0x43bf7e MOV $0x8,%EDX |
(216) 0x43bf83 VMOVSD %XMM7,(%RAX) |
(216) 0x43bf87 VMOVSD 0x8(%R15,%RDX,1),%XMM6 |
(216) 0x43bf8e VMOVSD %XMM6,(%RAX,%RDX,1) |
(216) 0x43bf93 ADD $0x8,%RDX |
(216) 0x43bf97 VMOVSD 0x8(%R15,%RDX,1),%XMM3 |
(216) 0x43bf9e VMOVSD %XMM3,(%RAX,%RDX,1) |
(216) 0x43bfa3 ADD $0x8,%RDX |
(216) 0x43bfa7 VMOVSD 0x8(%R15,%RDX,1),%XMM4 |
(216) 0x43bfae VMOVSD %XMM4,(%RAX,%RDX,1) |
(216) 0x43bfb3 ADD $0x8,%RDX |
(216) 0x43bfb7 VMOVSD 0x8(%R15,%RDX,1),%XMM5 |
(216) 0x43bfbe VMOVSD %XMM5,(%RAX,%RDX,1) |
(216) 0x43bfc3 ADD $0x8,%RDX |
(216) 0x43bfc7 VMOVSD 0x8(%R15,%RDX,1),%XMM8 |
(216) 0x43bfce VMOVSD %XMM8,(%RAX,%RDX,1) |
(216) 0x43bfd3 ADD $0x8,%RDX |
(216) 0x43bfd7 VMOVSD 0x8(%R15,%RDX,1),%XMM9 |
(216) 0x43bfde VMOVSD %XMM9,(%RAX,%RDX,1) |
(216) 0x43bfe3 ADD $0x8,%RDX |
(216) 0x43bfe7 CMP %R14,%RDX |
(216) 0x43bfea JE 43c060 |
(215) 0x43bfec VMOVSD 0x8(%R15,%RDX,1),%XMM10 |
(215) 0x43bff3 VMOVSD %XMM10,(%RAX,%RDX,1) |
(215) 0x43bff8 VMOVSD 0x10(%R15,%RDX,1),%XMM11 |
(215) 0x43bfff VMOVSD %XMM11,0x8(%RDX,%RAX,1) |
(215) 0x43c005 VMOVSD 0x18(%R15,%RDX,1),%XMM12 |
(215) 0x43c00c VMOVSD %XMM12,0x10(%RDX,%RAX,1) |
(215) 0x43c012 VMOVSD 0x20(%R15,%RDX,1),%XMM13 |
(215) 0x43c019 VMOVSD %XMM13,0x18(%RDX,%RAX,1) |
(215) 0x43c01f VMOVSD 0x28(%R15,%RDX,1),%XMM14 |
(215) 0x43c026 VMOVSD %XMM14,0x20(%RDX,%RAX,1) |
(215) 0x43c02c VMOVSD 0x30(%R15,%RDX,1),%XMM15 |
(215) 0x43c033 VMOVSD %XMM15,0x28(%RDX,%RAX,1) |
(215) 0x43c039 VMOVSD 0x38(%R15,%RDX,1),%XMM1 |
(215) 0x43c040 VMOVSD %XMM1,0x30(%RDX,%RAX,1) |
(215) 0x43c046 VMOVSD 0x40(%R15,%RDX,1),%XMM0 |
(215) 0x43c04d ADD $0x40,%RDX |
(215) 0x43c051 VMOVSD %XMM0,-0x8(%RDX,%RAX,1) |
(215) 0x43c057 CMP %R14,%RDX |
(215) 0x43c05a JNE 43bfec |
(216) 0x43c05c NOPL (%RAX) |
(216) 0x43c060 MOV 0x68(%RSP),%RAX |
(216) 0x43c065 INC %RCX |
(216) 0x43c068 ADD %EBX,%R12D |
(216) 0x43c06b ADD %R14,%R10 |
(216) 0x43c06e LEA (%RCX),%ESI |
(216) 0x43c070 ADD %RAX,%R11 |
(216) 0x43c073 CMP %ESI,0x78(%RSP) |
(216) 0x43c077 JG 43bef8 |
0x43c07d VZEROUPPER |
0x43c080 LEA -0x28(%RBP),%RSP |
0x43c084 POP %RBX |
0x43c085 POP %R12 |
0x43c087 POP %R13 |
0x43c089 POP %R14 |
0x43c08b POP %R15 |
0x43c08d POP %RBP |
0x43c08e RET |
0x43c08f NOP |
(216) 0x43c090 CMPL $0x6,0x7c(%RSP) |
(216) 0x43c095 JBE 43c2d5 |
(216) 0x43c09b MOV 0x60(%RSP),%RSI |
(216) 0x43c0a0 LEA -0x8(%R9,%RDX,1),%R8 |
(216) 0x43c0a5 XOR %EDX,%EDX |
(216) 0x43c0a7 SUB $0x40,%RSI |
(216) 0x43c0ab SHR $0x6,%RSI |
(216) 0x43c0af INC %RSI |
(216) 0x43c0b2 AND $0x7,%ESI |
(216) 0x43c0b5 JE 43c15a |
(216) 0x43c0bb CMP $0x1,%RSI |
(216) 0x43c0bf JE 43c13d |
(216) 0x43c0c1 CMP $0x2,%RSI |
(216) 0x43c0c5 JE 43c12b |
(216) 0x43c0c7 CMP $0x3,%RSI |
(216) 0x43c0cb JE 43c119 |
(216) 0x43c0cd CMP $0x4,%RSI |
(216) 0x43c0d1 JE 43c107 |
(216) 0x43c0d3 CMP $0x5,%RSI |
(216) 0x43c0d7 JE 43c0f5 |
(216) 0x43c0d9 CMP $0x6,%RSI |
(216) 0x43c0dd JNE 43c2bf |
(216) 0x43c0e3 VMOVUPD (%R8,%RDX,1),%ZMM0 |
(216) 0x43c0ea VMOVUPD %ZMM0,(%RAX,%RDX,1) |
(216) 0x43c0f1 ADD $0x40,%RDX |
(216) 0x43c0f5 VMOVUPD (%R8,%RDX,1),%ZMM2 |
(216) 0x43c0fc VMOVUPD %ZMM2,(%RAX,%RDX,1) |
(216) 0x43c103 ADD $0x40,%RDX |
(216) 0x43c107 VMOVUPD (%R8,%RDX,1),%ZMM7 |
(216) 0x43c10e VMOVUPD %ZMM7,(%RAX,%RDX,1) |
(216) 0x43c115 ADD $0x40,%RDX |
(216) 0x43c119 VMOVUPD (%R8,%RDX,1),%ZMM6 |
(216) 0x43c120 VMOVUPD %ZMM6,(%RAX,%RDX,1) |
(216) 0x43c127 ADD $0x40,%RDX |
(216) 0x43c12b VMOVUPD (%R8,%RDX,1),%ZMM3 |
(216) 0x43c132 VMOVUPD %ZMM3,(%RAX,%RDX,1) |
(216) 0x43c139 ADD $0x40,%RDX |
(216) 0x43c13d VMOVUPD (%R8,%RDX,1),%ZMM4 |
(216) 0x43c144 VMOVUPD %ZMM4,(%RAX,%RDX,1) |
(216) 0x43c14b ADD $0x40,%RDX |
(216) 0x43c14f CMP %RDX,0x60(%RSP) |
(216) 0x43c154 JE 43c1ea |
(217) 0x43c15a VMOVUPD (%R8,%RDX,1),%ZMM5 |
(217) 0x43c161 VMOVUPD %ZMM5,(%RAX,%RDX,1) |
(217) 0x43c168 VMOVUPD 0x40(%R8,%RDX,1),%ZMM8 |
(217) 0x43c170 VMOVUPD %ZMM8,0x40(%RDX,%RAX,1) |
(217) 0x43c178 VMOVUPD 0x80(%R8,%RDX,1),%ZMM9 |
(217) 0x43c180 VMOVUPD %ZMM9,0x80(%RDX,%RAX,1) |
(217) 0x43c188 VMOVUPD 0xc0(%R8,%RDX,1),%ZMM10 |
(217) 0x43c190 VMOVUPD %ZMM10,0xc0(%RDX,%RAX,1) |
(217) 0x43c198 VMOVUPD 0x100(%R8,%RDX,1),%ZMM11 |
(217) 0x43c1a0 VMOVUPD %ZMM11,0x100(%RDX,%RAX,1) |
(217) 0x43c1a8 VMOVUPD 0x140(%R8,%RDX,1),%ZMM12 |
(217) 0x43c1b0 VMOVUPD %ZMM12,0x140(%RDX,%RAX,1) |
(217) 0x43c1b8 VMOVUPD 0x180(%R8,%RDX,1),%ZMM13 |
(217) 0x43c1c0 VMOVUPD %ZMM13,0x180(%RDX,%RAX,1) |
(217) 0x43c1c8 VMOVUPD 0x1c0(%R8,%RDX,1),%ZMM14 |
(217) 0x43c1d0 ADD $0x200,%RDX |
(217) 0x43c1d7 VMOVUPD %ZMM14,-0x40(%RDX,%RAX,1) |
(217) 0x43c1df CMP %RDX,0x60(%RSP) |
(217) 0x43c1e4 JNE 43c15a |
(216) 0x43c1ea CMP %EBX,0x54(%RSP) |
(216) 0x43c1ee JE 43c060 |
(216) 0x43c1f4 CMPL $0x2,0x44(%RSP) |
(216) 0x43c1f9 MOV 0x50(%RSP),%EDX |
(216) 0x43c1fd JBE 43c2e0 |
(216) 0x43c203 MOV 0x54(%RSP),%ESI |
(216) 0x43c207 MOV %ESI,%EAX |
(216) 0x43c209 LEA 0x1(%R15,%RAX,1),%R15 |
(216) 0x43c20e ADD %R11,%RAX |
(216) 0x43c211 VMOVUPD (%R9,%R15,8),%YMM15 |
(216) 0x43c217 VMOVUPD %YMM15,(%R13,%RAX,8) |
(216) 0x43c21e MOV %EDX,%EAX |
(216) 0x43c220 AND $-0x4,%EAX |
(216) 0x43c223 ADD %ESI,%EAX |
(216) 0x43c225 AND $0x3,%EDX |
(216) 0x43c228 JE 43c060 |
(216) 0x43c22e MOV 0x3c(%RSP),%R15D |
(216) 0x43c233 LEA 0x2(%RAX,%R15,1),%R8D |
(216) 0x43c238 MOVSXD %R8D,%RSI |
(216) 0x43c23b MOV 0x40(%RSP),%R8D |
(216) 0x43c240 ADD %RDI,%RSI |
(216) 0x43c243 LEA (%R8,%RAX,1),%EDX |
(216) 0x43c247 VMOVSD (%R9,%RSI,8),%XMM1 |
(216) 0x43c24d ADD %R12D,%EDX |
(216) 0x43c250 MOVSXD %EDX,%RSI |
(216) 0x43c253 LEA 0x1(%RAX),%EDX |
(216) 0x43c256 VMOVSD %XMM1,(%R13,%RSI,8) |
(216) 0x43c25d CMP %EDX,%EBX |
(216) 0x43c25f JLE 43c060 |
(216) 0x43c265 LEA 0x2(%RDX,%R15,1),%ESI |
(216) 0x43c26a ADD %R8D,%EDX |
(216) 0x43c26d ADD $0x2,%EAX |
(216) 0x43c270 MOVSXD %ESI,%RSI |
(216) 0x43c273 ADD %R12D,%EDX |
(216) 0x43c276 ADD %RDI,%RSI |
(216) 0x43c279 MOVSXD %EDX,%RDX |
(216) 0x43c27c VMOVSD (%R9,%RSI,8),%XMM0 |
(216) 0x43c282 VMOVSD %XMM0,(%R13,%RDX,8) |
(216) 0x43c289 CMP %EAX,%EBX |
(216) 0x43c28b JLE 43c060 |
(216) 0x43c291 LEA 0x2(%RAX,%R15,1),%R15D |
(216) 0x43c296 ADD %R8D,%EAX |
(216) 0x43c299 MOVSXD %R15D,%RSI |
(216) 0x43c29c ADD %R12D,%EAX |
(216) 0x43c29f ADD %RDI,%RSI |
(216) 0x43c2a2 CLTQ |
(216) 0x43c2a4 VMOVSD (%R9,%RSI,8),%XMM2 |
(216) 0x43c2aa VMOVSD %XMM2,(%R13,%RAX,8) |
(216) 0x43c2b1 JMP 43c060 |
0x43c2b6 INC %EAX |
0x43c2b8 XOR %EDX,%EDX |
0x43c2ba JMP 43be31 |
(216) 0x43c2bf VMOVUPD (%R8),%ZMM1 |
(216) 0x43c2c5 MOV $0x40,%EDX |
(216) 0x43c2ca VMOVUPD %ZMM1,(%RAX) |
(216) 0x43c2d0 JMP 43c0e3 |
(216) 0x43c2d5 MOV %EBX,%EDX |
(216) 0x43c2d7 XOR %EAX,%EAX |
(216) 0x43c2d9 XOR %ESI,%ESI |
(216) 0x43c2db JMP 43c209 |
(216) 0x43c2e0 MOV 0x54(%RSP),%EAX |
(216) 0x43c2e4 JMP 43c22e |
0x43c2e9 NOPL (%RAX) |
Path / |
Source file and lines | pack_kernel.cpp:55-59 |
Module | exec |
nb instructions | 92 |
nb uops | 98 |
loop length | 315 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.70 | 7.60 | 6.33 | 6.33 | 10.00 | 7.67 | 7.50 | 10.00 | 10.00 | 10.00 | 7.53 | 6.33 |
cycles | 7.70 | 11.67 | 6.33 | 6.33 | 10.00 | 7.67 | 7.50 | 10.00 | 10.00 | 10.00 | 7.53 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.84-15.83 |
Stall cycles | 0.00 |
Front-end | 16.33 |
Dispatch | 11.67 |
DIV/SQRT | 6.00 |
Overall L1 | 16.33 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 8% |
load | 8% |
store | 8% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x28(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 43c2b6 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x4d6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43c080 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R12),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x24(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R12),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c080 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOVSXD %R10D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
IMUL %ECX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R11,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R11,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R15,%RSI,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RSI,%R15,1),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x1(%RBX),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x3,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R15,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R12D,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
AND $0x7,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EBX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %EAX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %ESI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x8,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8D,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R11,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %ECX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43be31 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | pack_kernel.cpp:55-59 |
Module | exec |
nb instructions | 92 |
nb uops | 98 |
loop length | 315 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 13 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.70 | 7.60 | 6.33 | 6.33 | 10.00 | 7.67 | 7.50 | 10.00 | 10.00 | 10.00 | 7.53 | 6.33 |
cycles | 7.70 | 11.67 | 6.33 | 6.33 | 10.00 | 7.67 | 7.50 | 10.00 | 10.00 | 10.00 | 7.53 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.84-15.83 |
Stall cycles | 0.00 |
Front-end | 16.33 |
Dispatch | 11.67 |
DIV/SQRT | 6.00 |
Overall L1 | 16.33 |
all | 3% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 8% |
load | 8% |
store | 8% |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x28(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 43c2b6 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x4d6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43c080 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R12),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x24(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R12),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c080 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2a0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOVSXD %R10D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
IMUL %ECX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R11,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R11,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R15,%RSI,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RSI,%R15,1),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA -0x1(%RBX),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x3,%EDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R15,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R12D,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
AND $0x7,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R8,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EBX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %EAX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %ESI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x8,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DEC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8D,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R11,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %ECX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43be31 <_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z24clover_pack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0– | 0.02 | 0.01 |
▼Loop 216 - pack_kernel.cpp:55-59 - exec– | 0.01 | 0.02 |
○Loop 215 - pack_kernel.cpp:57-59 - exec | 0 | 0 |
○Loop 217 - pack_kernel.cpp:57-59 - exec | 0 | 0 |