Function: _Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DI ... | Module: exec | Source: pack_kernel.cpp:88-92 [...] | Coverage: 0.01% |
---|
Function: _Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DI ... | Module: exec | Source: pack_kernel.cpp:88-92 [...] | Coverage: 0.01% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/pack_kernel.cpp: 88 - 92 |
-------------------------------------------------------------------------------- |
88: #pragma omp parallel for simd |
89: for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { |
90: for (int j = 0; j < depth; ++j) { |
91: int index = buffer_offset + j + k * depth; |
92: field(x_min - j, k) = left_rcv[index]; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x438ed0 PUSH %RBP |
0x438ed1 MOV %RSP,%RBP |
0x438ed4 PUSH %R15 |
0x438ed6 PUSH %R14 |
0x438ed8 PUSH %R13 |
0x438eda PUSH %R12 |
0x438edc MOV %RDI,%R12 |
0x438edf PUSH %RBX |
0x438ee0 AND $-0x40,%RSP |
0x438ee4 ADD $-0x80,%RSP |
0x438ee8 MOV 0x1c(%RDI),%EBX |
0x438eeb MOV 0x14(%RDI),%R14D |
0x438eef CALL 4046c0 <omp_get_num_threads@plt> |
0x438ef4 MOV %EAX,%R13D |
0x438ef7 SUB %EBX,%R14D |
0x438efa CALL 4045b0 <omp_get_thread_num@plt> |
0x438eff INC %R14D |
0x438f02 MOV %EAX,%ECX |
0x438f04 MOV 0x18(%R12),%EAX |
0x438f09 ADD 0x24(%R12),%EAX |
0x438f0e LEA 0x2(%RBX,%RAX,1),%EAX |
0x438f12 SUB %R14D,%EAX |
0x438f15 CLTD |
0x438f16 IDIV %R13D |
0x438f19 CMP %EDX,%ECX |
0x438f1b JL 439449 |
0x438f21 IMUL %EAX,%ECX |
0x438f24 ADD %EDX,%ECX |
0x438f26 ADD %ECX,%EAX |
0x438f28 CMP %EAX,%ECX |
0x438f2a JGE 43919d |
0x438f30 MOV 0x20(%R12),%EDI |
0x438f35 MOV 0x10(%R12),%R9D |
0x438f3a ADD %R14D,%EAX |
0x438f3d ADD %R14D,%ECX |
0x438f40 MOV 0x8(%R12),%R8 |
0x438f45 MOV (%R12),%R14 |
0x438f49 MOV %EAX,0x78(%RSP) |
0x438f4d MOV %EDI,0x38(%RSP) |
0x438f51 MOV %R9D,0x34(%RSP) |
0x438f56 TEST %EBX,%EBX |
0x438f58 JLE 43919d |
0x438f5e MOV %EBX,%R12D |
0x438f61 MOV (%R14),%R13 |
0x438f64 MOV 0x8(%R8),%R11 |
0x438f68 MOVSXD %EDI,%RSI |
0x438f6b IMUL %ECX,%R12D |
0x438f6f MOV 0x10(%R14),%R8 |
0x438f73 MOVSXD %EBX,%R15 |
0x438f76 MOV %EBX,%EDX |
0x438f78 MOV %R13,0x70(%RSP) |
0x438f7d MOV %EBX,%R13D |
0x438f80 SHR $0x3,%EDX |
0x438f83 MOV %EBX,%EDI |
0x438f85 AND $0x7,%R13D |
0x438f89 MOV %R15,0x68(%RSP) |
0x438f8e MOVSXD %R9D,%R14 |
0x438f91 SAL $0x3,%R15 |
0x438f95 MOVSXD %R12D,%R10 |
0x438f98 MOV %R8,%R9 |
0x438f9b MOV %R13D,0x48(%RSP) |
0x438fa0 LEA -0x1(%RBX),%EAX |
0x438fa3 ADD %RSI,%R10 |
0x438fa6 MOV $0x8,%ESI |
0x438fab SAL $0x6,%RDX |
0x438faf AND $-0x8,%EDI |
0x438fb2 SUB %R15,%R9 |
0x438fb5 SUB %R15,%RSI |
0x438fb8 DEC %R13D |
0x438fbb MOV %EAX,0x7c(%RSP) |
0x438fbf MOV %RDX,0x50(%RSP) |
0x438fc4 VMOVDQA64 0x2a572(%RIP),%ZMM0 |
0x438fce MOVSXD %ECX,%RCX |
0x438fd1 MOV %EDI,0x4c(%RSP) |
0x438fd5 MOV %R9,0x40(%RSP) |
0x438fda MOV %RSI,0x60(%RSP) |
0x438fdf MOV %R13D,0x3c(%RSP) |
0x438fe4 NOPL (%RAX) |
(203) 0x438fe8 MOV 0x70(%RSP),%RAX |
(203) 0x438fed LEA (,%R10,8),%RDX |
(203) 0x438ff5 IMUL %RCX,%RAX |
(203) 0x438ff9 CMPL $0x2,0x7c(%RSP) |
(203) 0x438ffe JBE 43903e |
(203) 0x439000 MOV 0x60(%RSP),%RDI |
(203) 0x439005 LEA (%R14,%RAX,1),%RSI |
(203) 0x439009 LEA (,%R10,8),%RDX |
(203) 0x439011 SAL $0x3,%RSI |
(203) 0x439015 LEA (%R11,%RDX,1),%R13 |
(203) 0x439019 LEA (%RDI,%RSI,1),%R9 |
(203) 0x43901d LEA (%R15,%RDX,1),%RDI |
(203) 0x439021 ADD %R8,%R9 |
(203) 0x439024 ADD %R11,%RDI |
(203) 0x439027 CMP %RDI,%R9 |
(203) 0x43902a JAE 4391b0 |
(203) 0x439030 LEA 0x8(%R8,%RSI,1),%R9 |
(203) 0x439035 CMP %R9,%R13 |
(203) 0x439038 JAE 4391b0 |
(203) 0x43903e LEA (%R14,%RAX,1),%RDI |
(203) 0x439042 MOV 0x40(%RSP),%RAX |
(203) 0x439047 ADD %R11,%RDX |
(203) 0x43904a SAL $0x3,%RDI |
(203) 0x43904e LEA (%R8,%RDI,1),%R13 |
(203) 0x439052 ADD %RAX,%RDI |
(203) 0x439055 MOV %R13,%RSI |
(203) 0x439058 SUB %RDI,%RSI |
(203) 0x43905b SUB $0x8,%RSI |
(203) 0x43905f SHR $0x3,%RSI |
(203) 0x439063 INC %RSI |
(203) 0x439066 AND $0x7,%ESI |
(203) 0x439069 JE 43911a |
(203) 0x43906f CMP $0x1,%RSI |
(203) 0x439073 JE 439103 |
(203) 0x439079 CMP $0x2,%RSI |
(203) 0x43907d JE 4390f1 |
(203) 0x43907f CMP $0x3,%RSI |
(203) 0x439083 JE 4390df |
(203) 0x439085 CMP $0x4,%RSI |
(203) 0x439089 JE 4390cd |
(203) 0x43908b CMP $0x5,%RSI |
(203) 0x43908f JE 4390bb |
(203) 0x439091 CMP $0x6,%RSI |
(203) 0x439095 JE 4390a9 |
(203) 0x439097 VMOVSD (%RDX),%XMM5 |
(203) 0x43909b SUB $0x8,%R13 |
(203) 0x43909f ADD $0x8,%RDX |
(203) 0x4390a3 VMOVSD %XMM5,0x8(%R13) |
(203) 0x4390a9 VMOVSD (%RDX),%XMM6 |
(203) 0x4390ad SUB $0x8,%R13 |
(203) 0x4390b1 ADD $0x8,%RDX |
(203) 0x4390b5 VMOVSD %XMM6,0x8(%R13) |
(203) 0x4390bb VMOVSD (%RDX),%XMM7 |
(203) 0x4390bf SUB $0x8,%R13 |
(203) 0x4390c3 ADD $0x8,%RDX |
(203) 0x4390c7 VMOVSD %XMM7,0x8(%R13) |
(203) 0x4390cd VMOVSD (%RDX),%XMM8 |
(203) 0x4390d1 SUB $0x8,%R13 |
(203) 0x4390d5 ADD $0x8,%RDX |
(203) 0x4390d9 VMOVSD %XMM8,0x8(%R13) |
(203) 0x4390df VMOVSD (%RDX),%XMM9 |
(203) 0x4390e3 SUB $0x8,%R13 |
(203) 0x4390e7 ADD $0x8,%RDX |
(203) 0x4390eb VMOVSD %XMM9,0x8(%R13) |
(203) 0x4390f1 VMOVSD (%RDX),%XMM10 |
(203) 0x4390f5 SUB $0x8,%R13 |
(203) 0x4390f9 ADD $0x8,%RDX |
(203) 0x4390fd VMOVSD %XMM10,0x8(%R13) |
(203) 0x439103 VMOVSD (%RDX),%XMM11 |
(203) 0x439107 SUB $0x8,%R13 |
(203) 0x43910b ADD $0x8,%RDX |
(203) 0x43910f VMOVSD %XMM11,0x8(%R13) |
(203) 0x439115 CMP %R13,%RDI |
(203) 0x439118 JE 439180 |
(202) 0x43911a VMOVSD (%RDX),%XMM12 |
(202) 0x43911e SUB $0x40,%R13 |
(202) 0x439122 ADD $0x40,%RDX |
(202) 0x439126 VMOVSD %XMM12,0x40(%R13) |
(202) 0x43912c VMOVSD -0x38(%RDX),%XMM13 |
(202) 0x439131 VMOVSD %XMM13,0x38(%R13) |
(202) 0x439137 VMOVSD -0x30(%RDX),%XMM14 |
(202) 0x43913c VMOVSD %XMM14,0x30(%R13) |
(202) 0x439142 VMOVSD -0x28(%RDX),%XMM15 |
(202) 0x439147 VMOVSD %XMM15,0x28(%R13) |
(202) 0x43914d VMOVSD -0x20(%RDX),%XMM1 |
(202) 0x439152 VMOVSD %XMM1,0x20(%R13) |
(202) 0x439158 VMOVSD -0x18(%RDX),%XMM2 |
(202) 0x43915d VMOVSD %XMM2,0x18(%R13) |
(202) 0x439163 VMOVSD -0x10(%RDX),%XMM3 |
(202) 0x439168 VMOVSD %XMM3,0x10(%R13) |
(202) 0x43916e VMOVSD -0x8(%RDX),%XMM4 |
(202) 0x439173 VMOVSD %XMM4,0x8(%R13) |
(202) 0x439179 CMP %R13,%RDI |
(202) 0x43917c JNE 43911a |
(203) 0x43917e XCHG %AX,%AX |
(203) 0x439180 MOV 0x68(%RSP),%R9 |
(203) 0x439185 INC %RCX |
(203) 0x439188 ADD %EBX,%R12D |
(203) 0x43918b LEA (%RCX),%EDX |
(203) 0x43918d ADD %R9,%R10 |
(203) 0x439190 CMP %EDX,0x78(%RSP) |
(203) 0x439194 JG 438fe8 |
0x43919a VZEROUPPER |
0x43919d LEA -0x28(%RBP),%RSP |
0x4391a1 POP %RBX |
0x4391a2 POP %R12 |
0x4391a4 POP %R13 |
0x4391a6 POP %R14 |
0x4391a8 POP %R15 |
0x4391aa POP %RBP |
0x4391ab RET |
0x4391ac NOPL (%RAX) |
(203) 0x4391b0 CMPL $0x6,0x7c(%RSP) |
(203) 0x4391b5 JBE 439471 |
(203) 0x4391bb MOV 0x50(%RSP),%R9 |
(203) 0x4391c0 MOV %R13,%RDX |
(203) 0x4391c3 LEA -0x38(%R8,%RSI,1),%RDI |
(203) 0x4391c8 ADD %R13,%R9 |
(203) 0x4391cb MOV %R9,0x58(%RSP) |
(203) 0x4391d0 MOV 0x50(%RSP),%R9 |
(203) 0x4391d5 SUB $0x40,%R9 |
(203) 0x4391d9 SHR $0x6,%R9 |
(203) 0x4391dd INC %R9 |
(203) 0x4391e0 AND $0x7,%R9D |
(203) 0x4391e4 JE 4392be |
(203) 0x4391ea CMP $0x1,%R9 |
(203) 0x4391ee JE 439297 |
(203) 0x4391f4 CMP $0x2,%R9 |
(203) 0x4391f8 JE 43927e |
(203) 0x4391fe CMP $0x3,%R9 |
(203) 0x439202 JE 439265 |
(203) 0x439204 CMP $0x4,%R9 |
(203) 0x439208 JE 43924c |
(203) 0x43920a CMP $0x5,%R9 |
(203) 0x43920e JE 439233 |
(203) 0x439210 CMP $0x6,%R9 |
(203) 0x439214 JNE 439452 |
(203) 0x43921a VXORPS %XMM2,%XMM2,%XMM2 |
(203) 0x43921e VPERMPD (%RDX),%ZMM0,%ZMM2 |
(203) 0x439224 SUB $0x40,%RDI |
(203) 0x439228 ADD $0x40,%RDX |
(203) 0x43922c VMOVUPD %ZMM2,0x40(%RDI) |
(203) 0x439233 VXORPS %XMM3,%XMM3,%XMM3 |
(203) 0x439237 VPERMPD (%RDX),%ZMM0,%ZMM3 |
(203) 0x43923d SUB $0x40,%RDI |
(203) 0x439241 ADD $0x40,%RDX |
(203) 0x439245 VMOVUPD %ZMM3,0x40(%RDI) |
(203) 0x43924c VXORPS %XMM4,%XMM4,%XMM4 |
(203) 0x439250 VPERMPD (%RDX),%ZMM0,%ZMM4 |
(203) 0x439256 SUB $0x40,%RDI |
(203) 0x43925a ADD $0x40,%RDX |
(203) 0x43925e VMOVUPD %ZMM4,0x40(%RDI) |
(203) 0x439265 VXORPS %XMM5,%XMM5,%XMM5 |
(203) 0x439269 VPERMPD (%RDX),%ZMM0,%ZMM5 |
(203) 0x43926f SUB $0x40,%RDI |
(203) 0x439273 ADD $0x40,%RDX |
(203) 0x439277 VMOVUPD %ZMM5,0x40(%RDI) |
(203) 0x43927e VXORPS %XMM6,%XMM6,%XMM6 |
(203) 0x439282 VPERMPD (%RDX),%ZMM0,%ZMM6 |
(203) 0x439288 SUB $0x40,%RDI |
(203) 0x43928c ADD $0x40,%RDX |
(203) 0x439290 VMOVUPD %ZMM6,0x40(%RDI) |
(203) 0x439297 VXORPS %XMM7,%XMM7,%XMM7 |
(203) 0x43929b VPERMPD (%RDX),%ZMM0,%ZMM7 |
(203) 0x4392a1 MOV 0x58(%RSP),%R13 |
(203) 0x4392a6 ADD $0x40,%RDX |
(203) 0x4392aa SUB $0x40,%RDI |
(203) 0x4392ae VMOVUPD %ZMM7,0x40(%RDI) |
(203) 0x4392b5 CMP %R13,%RDX |
(203) 0x4392b8 JE 439371 |
(204) 0x4392be VXORPS %XMM8,%XMM8,%XMM8 |
(204) 0x4392c3 VPERMPD (%RDX),%ZMM0,%ZMM8 |
(204) 0x4392c9 MOV 0x58(%RSP),%R9 |
(204) 0x4392ce ADD $0x200,%RDX |
(204) 0x4392d5 SUB $0x200,%RDI |
(204) 0x4392dc VMOVUPD %ZMM8,0x200(%RDI) |
(204) 0x4392e3 VXORPS %XMM9,%XMM9,%XMM9 |
(204) 0x4392e8 VPERMPD -0x1c0(%RDX),%ZMM0,%ZMM9 |
(204) 0x4392ef VMOVUPD %ZMM9,0x1c0(%RDI) |
(204) 0x4392f6 VXORPS %XMM10,%XMM10,%XMM10 |
(204) 0x4392fb VPERMPD -0x180(%RDX),%ZMM0,%ZMM10 |
(204) 0x439302 VMOVUPD %ZMM10,0x180(%RDI) |
(204) 0x439309 VXORPS %XMM11,%XMM11,%XMM11 |
(204) 0x43930e VPERMPD -0x140(%RDX),%ZMM0,%ZMM11 |
(204) 0x439315 VMOVUPD %ZMM11,0x140(%RDI) |
(204) 0x43931c VXORPS %XMM12,%XMM12,%XMM12 |
(204) 0x439321 VPERMPD -0x100(%RDX),%ZMM0,%ZMM12 |
(204) 0x439328 VMOVUPD %ZMM12,0x100(%RDI) |
(204) 0x43932f VXORPS %XMM13,%XMM13,%XMM13 |
(204) 0x439334 VPERMPD -0xc0(%RDX),%ZMM0,%ZMM13 |
(204) 0x43933b VMOVUPD %ZMM13,0xc0(%RDI) |
(204) 0x439342 VXORPS %XMM14,%XMM14,%XMM14 |
(204) 0x439347 VPERMPD -0x80(%RDX),%ZMM0,%ZMM14 |
(204) 0x43934e VMOVUPD %ZMM14,0x80(%RDI) |
(204) 0x439355 VXORPS %XMM15,%XMM15,%XMM15 |
(204) 0x43935a VPERMPD -0x40(%RDX),%ZMM0,%ZMM15 |
(204) 0x439361 VMOVUPD %ZMM15,0x40(%RDI) |
(204) 0x439368 CMP %R9,%RDX |
(204) 0x43936b JNE 4392be |
(203) 0x439371 MOV 0x4c(%RSP),%EDX |
(203) 0x439375 CMP %EDX,%EBX |
(203) 0x439377 JE 439180 |
(203) 0x43937d CMPL $0x2,0x3c(%RSP) |
(203) 0x439382 MOV 0x48(%RSP),%EDI |
(203) 0x439386 JBE 43947d |
(203) 0x43938c MOV 0x4c(%RSP),%R13D |
(203) 0x439391 MOV %R13D,%EDX |
(203) 0x439394 LEA (%RDX,%R10,1),%R9 |
(203) 0x439398 ADD %R8,%RSI |
(203) 0x43939b NEG %RDX |
(203) 0x43939e VXORPS %XMM1,%XMM1,%XMM1 |
(203) 0x4393a2 VPERMPD $0x1b,(%R11,%R9,8),%YMM1 |
(203) 0x4393a9 VMOVUPD %YMM1,-0x18(%RSI,%RDX,8) |
(203) 0x4393af MOV %EDI,%EDX |
(203) 0x4393b1 AND $-0x4,%EDX |
(203) 0x4393b4 ADD %R13D,%EDX |
(203) 0x4393b7 AND $0x3,%EDI |
(203) 0x4393ba JE 439180 |
(203) 0x4393c0 MOV 0x38(%RSP),%R13D |
(203) 0x4393c5 MOV 0x34(%RSP),%R9D |
(203) 0x4393ca LEA (%R13,%RDX,1),%ESI |
(203) 0x4393cf ADD %R12D,%ESI |
(203) 0x4393d2 MOVSXD %ESI,%RDI |
(203) 0x4393d5 MOV %R9D,%ESI |
(203) 0x4393d8 SUB %EDX,%ESI |
(203) 0x4393da VMOVSD (%R11,%RDI,8),%XMM2 |
(203) 0x4393e0 MOVSXD %ESI,%RDI |
(203) 0x4393e3 ADD %RAX,%RDI |
(203) 0x4393e6 VMOVSD %XMM2,(%R8,%RDI,8) |
(203) 0x4393ec LEA 0x1(%RDX),%EDI |
(203) 0x4393ef CMP %EDI,%EBX |
(203) 0x4393f1 JLE 439180 |
(203) 0x4393f7 LEA (%R13,%RDI,1),%ESI |
(203) 0x4393fc ADD $0x2,%EDX |
(203) 0x4393ff ADD %R12D,%ESI |
(203) 0x439402 MOVSXD %ESI,%RSI |
(203) 0x439405 VMOVSD (%R11,%RSI,8),%XMM3 |
(203) 0x43940b MOV %R9D,%ESI |
(203) 0x43940e SUB %EDI,%ESI |
(203) 0x439410 MOVSXD %ESI,%RDI |
(203) 0x439413 ADD %RAX,%RDI |
(203) 0x439416 VMOVSD %XMM3,(%R8,%RDI,8) |
(203) 0x43941c CMP %EDX,%EBX |
(203) 0x43941e JLE 439180 |
(203) 0x439424 LEA (%R13,%RDX,1),%R13D |
(203) 0x439429 SUB %EDX,%R9D |
(203) 0x43942c ADD %R12D,%R13D |
(203) 0x43942f MOVSXD %R9D,%RDX |
(203) 0x439432 MOVSXD %R13D,%RSI |
(203) 0x439435 ADD %RAX,%RDX |
(203) 0x439438 VMOVSD (%R11,%RSI,8),%XMM4 |
(203) 0x43943e VMOVSD %XMM4,(%R8,%RDX,8) |
(203) 0x439444 JMP 439180 |
0x439449 INC %EAX |
0x43944b XOR %EDX,%EDX |
0x43944d JMP 438f21 |
(203) 0x439452 VXORPS %XMM1,%XMM1,%XMM1 |
(203) 0x439456 VPERMPD (%R13),%ZMM0,%ZMM1 |
(203) 0x43945d ADD $0x40,%RDX |
(203) 0x439461 SUB $0x40,%RDI |
(203) 0x439465 VMOVUPD %ZMM1,0x40(%RDI) |
(203) 0x43946c JMP 43921a |
(203) 0x439471 MOV %EBX,%EDI |
(203) 0x439473 XOR %EDX,%EDX |
(203) 0x439475 XOR %R13D,%R13D |
(203) 0x439478 JMP 439394 |
(203) 0x43947d MOV 0x4c(%RSP),%EDX |
(203) 0x439481 JMP 4393c0 |
0x439486 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | pack_kernel.cpp:88-92 |
Module | exec |
nb instructions | 92 |
nb uops | 98 |
loop length | 321 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 13 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 8.00 | 6.33 | 6.33 | 10.00 | 8.07 | 8.00 | 10.00 | 10.00 | 10.00 | 7.93 | 6.33 |
cycles | 8.00 | 11.87 | 6.33 | 6.33 | 10.00 | 8.07 | 8.00 | 10.00 | 10.00 | 10.00 | 7.93 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.83 |
Stall cycles | 0.00 |
Front-end | 16.33 |
Dispatch | 11.87 |
DIV/SQRT | 6.00 |
Overall L1 | 16.33 |
all | 4% |
load | 25% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 5% |
all | 11% |
load | 32% |
store | 8% |
mul | 6% |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x24(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 439449 <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x579> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43919d <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R12),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43919d <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EDI,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
IMUL %ECX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x10(%R14),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EBX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%EDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R15,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R9D,%R14 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SAL $0x3,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOVSXD %R12D,%R10 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%RBX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RSI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
AND $-0x8,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R15,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0x2a572(%RIP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOVSXD %ECX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 438f21 <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | pack_kernel.cpp:88-92 |
Module | exec |
nb instructions | 92 |
nb uops | 98 |
loop length | 321 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 13 |
micro-operation queue | 16.33 cycles |
front end | 16.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 8.00 | 6.33 | 6.33 | 10.00 | 8.07 | 8.00 | 10.00 | 10.00 | 10.00 | 7.93 | 6.33 |
cycles | 8.00 | 11.87 | 6.33 | 6.33 | 10.00 | 8.07 | 8.00 | 10.00 | 10.00 | 10.00 | 7.93 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 15.83 |
Stall cycles | 0.00 |
Front-end | 16.33 |
Dispatch | 11.87 |
DIV/SQRT | 6.00 |
Overall L1 | 16.33 |
all | 4% |
load | 25% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 5% |
all | 11% |
load | 32% |
store | 8% |
mul | 6% |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x24(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 439449 <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x579> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43919d <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x8(%R12),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43919d <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x2cd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EDI,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
IMUL %ECX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x10(%R14),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EBX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%EDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %EBX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R15,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R9D,%R14 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SAL $0x3,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOVSXD %R12D,%R10 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R8,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%RBX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %RSI,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
AND $-0x8,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R15,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0x2a572(%RIP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOVSXD %ECX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 438f21 <_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z26clover_unpack_message_leftR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0– | 0.01 | 0.01 |
▼Loop 203 - pack_kernel.cpp:88-92 - exec– | 0.01 | 0.02 |
○Loop 204 - pack_kernel.cpp:90-92 - exec | 0 | 0 |
○Loop 202 - pack_kernel.cpp:90-92 - exec | 0 | 0 |