Function: _Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DId ... | Module: exec | Source: pack_kernel.cpp:120-124 [...] | Coverage: 0.02% |
---|
Function: _Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DId ... | Module: exec | Source: pack_kernel.cpp:120-124 [...] | Coverage: 0.02% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/pack_kernel.cpp: 120 - 124 |
-------------------------------------------------------------------------------- |
120: #pragma omp parallel for simd |
121: for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { |
122: for (int j = 0; j < depth; ++j) { |
123: int index = buffer_offset + j + k * depth; |
124: right_snd[index] = field(x_max + 1 - j, k); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x43c2f0 PUSH %RBP |
0x43c2f1 MOV %RSP,%RBP |
0x43c2f4 PUSH %R15 |
0x43c2f6 PUSH %R14 |
0x43c2f8 PUSH %R13 |
0x43c2fa PUSH %R12 |
0x43c2fc MOV %RDI,%R12 |
0x43c2ff PUSH %RBX |
0x43c300 AND $-0x40,%RSP |
0x43c304 ADD $-0x80,%RSP |
0x43c308 MOV 0x1c(%RDI),%EBX |
0x43c30b MOV 0x14(%RDI),%R14D |
0x43c30f CALL 4046c0 <omp_get_num_threads@plt> |
0x43c314 MOV %EAX,%R13D |
0x43c317 SUB %EBX,%R14D |
0x43c31a CALL 4045b0 <omp_get_thread_num@plt> |
0x43c31f INC %R14D |
0x43c322 MOV %EAX,%ECX |
0x43c324 MOV 0x18(%R12),%EAX |
0x43c329 ADD 0x24(%R12),%EAX |
0x43c32e LEA 0x2(%RBX,%RAX,1),%EAX |
0x43c332 SUB %R14D,%EAX |
0x43c335 CLTD |
0x43c336 IDIV %R13D |
0x43c339 CMP %EDX,%ECX |
0x43c33b JL 43c898 |
0x43c341 IMUL %EAX,%ECX |
0x43c344 ADD %ECX,%EDX |
0x43c346 ADD %EDX,%EAX |
0x43c348 CMP %EAX,%EDX |
0x43c34a JGE 43c5e8 |
0x43c350 MOVSXD 0x20(%R12),%RSI |
0x43c355 MOV 0x10(%R12),%EDI |
0x43c35a ADD %R14D,%EAX |
0x43c35d ADD %R14D,%EDX |
0x43c360 MOV (%R12),%R9 |
0x43c364 MOV 0x8(%R12),%R11 |
0x43c369 MOV %EAX,0x70(%RSP) |
0x43c36d MOV %ESI,0x28(%RSP) |
0x43c371 MOV %EDI,0x20(%RSP) |
0x43c375 TEST %EBX,%EBX |
0x43c377 JLE 43c5e8 |
0x43c37d MOV %EBX,%R12D |
0x43c380 MOV %EBX,%EAX |
0x43c382 MOV (%R9),%R14 |
0x43c385 LEA -0x1(%RBX),%R13D |
0x43c389 IMUL %EDX,%R12D |
0x43c38d SHR $0x3,%EAX |
0x43c390 MOV %R13D,0x74(%RSP) |
0x43c395 MOV 0x8(%R11),%R11 |
0x43c399 SAL $0x6,%RAX |
0x43c39d MOVSXD %EBX,%R15 |
0x43c3a0 MOV %EBX,%R13D |
0x43c3a3 MOV %R14,0x68(%RSP) |
0x43c3a8 LEA (,%R15,8),%RCX |
0x43c3b0 AND $0x7,%R13D |
0x43c3b4 MOV %RAX,0x30(%RSP) |
0x43c3b9 MOV $0x8,%R14D |
0x43c3bf MOVSXD %R12D,%R8 |
0x43c3c2 LEA 0x1(%RDI),%EAX |
0x43c3c5 MOV %R13D,0x48(%RSP) |
0x43c3ca MOV 0x10(%R9),%R10 |
0x43c3ce ADD %RSI,%R8 |
0x43c3d1 MOV %EBX,%ESI |
0x43c3d3 MOV %EAX,0x24(%RSP) |
0x43c3d7 LEA (%R11,%RCX,1),%R9 |
0x43c3db AND $-0x8,%ESI |
0x43c3de CLTQ |
0x43c3e0 MOVSXD %EDI,%RDI |
0x43c3e3 SUB %RCX,%R14 |
0x43c3e6 DEC %R13D |
0x43c3e9 MOV %ESI,0x4c(%RSP) |
0x43c3ed VMOVDQA64 0x27149(%RIP),%ZMM0 |
0x43c3f7 MOVSXD %EDX,%RDX |
0x43c3fa MOV %RAX,0x40(%RSP) |
0x43c3ff MOV %R9,0x38(%RSP) |
0x43c404 MOV %RDI,0x60(%RSP) |
0x43c409 MOV %R14,0x58(%RSP) |
0x43c40e MOV %R13D,0x2c(%RSP) |
0x43c413 MOV %RCX,0x78(%RSP) |
0x43c418 NOPL (%RAX,%RAX,1) |
(219) 0x43c420 MOV 0x68(%RSP),%R9 |
(219) 0x43c425 LEA (,%R8,8),%RAX |
(219) 0x43c42d IMUL %RDX,%R9 |
(219) 0x43c431 CMPL $0x2,0x74(%RSP) |
(219) 0x43c436 JBE 43c480 |
(219) 0x43c438 MOV 0x60(%RSP),%RCX |
(219) 0x43c43d MOV 0x58(%RSP),%RSI |
(219) 0x43c442 LEA (,%R8,8),%RAX |
(219) 0x43c44a MOV 0x78(%RSP),%R14 |
(219) 0x43c44f LEA (%R11,%RAX,1),%R13 |
(219) 0x43c453 LEA 0x2(%R9,%RCX,1),%RCX |
(219) 0x43c458 SAL $0x3,%RCX |
(219) 0x43c45c ADD %RAX,%R14 |
(219) 0x43c45f LEA -0x8(%RCX,%RSI,1),%RDI |
(219) 0x43c464 ADD %R11,%R14 |
(219) 0x43c467 ADD %R10,%RDI |
(219) 0x43c46a CMP %R14,%RDI |
(219) 0x43c46d JAE 43c600 |
(219) 0x43c473 LEA (%R10,%RCX,1),%RSI |
(219) 0x43c477 CMP %RSI,%R13 |
(219) 0x43c47a JAE 43c600 |
(219) 0x43c480 MOV 0x38(%RSP),%R13 |
(219) 0x43c485 LEA (%R11,%RAX,1),%RCX |
(219) 0x43c489 MOV 0x40(%RSP),%RSI |
(219) 0x43c48e ADD %R13,%RAX |
(219) 0x43c491 ADD %RSI,%R9 |
(219) 0x43c494 MOV %RAX,%R14 |
(219) 0x43c497 LEA (%R10,%R9,8),%R9 |
(219) 0x43c49b SUB %RCX,%R14 |
(219) 0x43c49e SUB $0x8,%R14 |
(219) 0x43c4a2 SHR $0x3,%R14 |
(219) 0x43c4a6 INC %R14 |
(219) 0x43c4a9 AND $0x7,%R14D |
(219) 0x43c4ad JE 43c55e |
(219) 0x43c4b3 CMP $0x1,%R14 |
(219) 0x43c4b7 JE 43c547 |
(219) 0x43c4bd CMP $0x2,%R14 |
(219) 0x43c4c1 JE 43c535 |
(219) 0x43c4c3 CMP $0x3,%R14 |
(219) 0x43c4c7 JE 43c523 |
(219) 0x43c4c9 CMP $0x4,%R14 |
(219) 0x43c4cd JE 43c511 |
(219) 0x43c4cf CMP $0x5,%R14 |
(219) 0x43c4d3 JE 43c4ff |
(219) 0x43c4d5 CMP $0x6,%R14 |
(219) 0x43c4d9 JE 43c4ed |
(219) 0x43c4db VMOVSD (%R9),%XMM5 |
(219) 0x43c4e0 ADD $0x8,%RCX |
(219) 0x43c4e4 SUB $0x8,%R9 |
(219) 0x43c4e8 VMOVSD %XMM5,-0x8(%RCX) |
(219) 0x43c4ed VMOVSD (%R9),%XMM6 |
(219) 0x43c4f2 ADD $0x8,%RCX |
(219) 0x43c4f6 SUB $0x8,%R9 |
(219) 0x43c4fa VMOVSD %XMM6,-0x8(%RCX) |
(219) 0x43c4ff VMOVSD (%R9),%XMM7 |
(219) 0x43c504 ADD $0x8,%RCX |
(219) 0x43c508 SUB $0x8,%R9 |
(219) 0x43c50c VMOVSD %XMM7,-0x8(%RCX) |
(219) 0x43c511 VMOVSD (%R9),%XMM8 |
(219) 0x43c516 ADD $0x8,%RCX |
(219) 0x43c51a SUB $0x8,%R9 |
(219) 0x43c51e VMOVSD %XMM8,-0x8(%RCX) |
(219) 0x43c523 VMOVSD (%R9),%XMM9 |
(219) 0x43c528 ADD $0x8,%RCX |
(219) 0x43c52c SUB $0x8,%R9 |
(219) 0x43c530 VMOVSD %XMM9,-0x8(%RCX) |
(219) 0x43c535 VMOVSD (%R9),%XMM10 |
(219) 0x43c53a ADD $0x8,%RCX |
(219) 0x43c53e SUB $0x8,%R9 |
(219) 0x43c542 VMOVSD %XMM10,-0x8(%RCX) |
(219) 0x43c547 VMOVSD (%R9),%XMM11 |
(219) 0x43c54c ADD $0x8,%RCX |
(219) 0x43c550 SUB $0x8,%R9 |
(219) 0x43c554 VMOVSD %XMM11,-0x8(%RCX) |
(219) 0x43c559 CMP %RCX,%RAX |
(219) 0x43c55c JE 43c5d0 |
(219) 0x43c55e MOV 0x78(%RSP),%RDI |
(218) 0x43c563 VMOVSD (%R9),%XMM12 |
(218) 0x43c568 ADD $0x40,%RCX |
(218) 0x43c56c SUB $0x40,%R9 |
(218) 0x43c570 VMOVSD %XMM12,-0x40(%RCX) |
(218) 0x43c575 VMOVSD 0x38(%R9),%XMM13 |
(218) 0x43c57b VMOVSD %XMM13,-0x38(%RCX) |
(218) 0x43c580 VMOVSD 0x30(%R9),%XMM14 |
(218) 0x43c586 VMOVSD %XMM14,-0x30(%RCX) |
(218) 0x43c58b VMOVSD 0x28(%R9),%XMM15 |
(218) 0x43c591 VMOVSD %XMM15,-0x28(%RCX) |
(218) 0x43c596 VMOVSD 0x20(%R9),%XMM1 |
(218) 0x43c59c VMOVSD %XMM1,-0x20(%RCX) |
(218) 0x43c5a1 VMOVSD 0x18(%R9),%XMM2 |
(218) 0x43c5a7 VMOVSD %XMM2,-0x18(%RCX) |
(218) 0x43c5ac VMOVSD 0x10(%R9),%XMM3 |
(218) 0x43c5b2 VMOVSD %XMM3,-0x10(%RCX) |
(218) 0x43c5b7 VMOVSD 0x8(%R9),%XMM4 |
(218) 0x43c5bd VMOVSD %XMM4,-0x8(%RCX) |
(218) 0x43c5c2 CMP %RCX,%RAX |
(218) 0x43c5c5 JNE 43c563 |
(219) 0x43c5c7 MOV %RDI,0x78(%RSP) |
(219) 0x43c5cc NOPL (%RAX) |
(219) 0x43c5d0 INC %RDX |
(219) 0x43c5d3 ADD %EBX,%R12D |
(219) 0x43c5d6 ADD %R15,%R8 |
(219) 0x43c5d9 LEA (%RDX),%EAX |
(219) 0x43c5db CMP %EAX,0x70(%RSP) |
(219) 0x43c5df JG 43c420 |
0x43c5e5 VZEROUPPER |
0x43c5e8 LEA -0x28(%RBP),%RSP |
0x43c5ec POP %RBX |
0x43c5ed POP %R12 |
0x43c5ef POP %R13 |
0x43c5f1 POP %R14 |
0x43c5f3 POP %R15 |
0x43c5f5 POP %RBP |
0x43c5f6 RET |
0x43c5f7 NOPW (%RAX,%RAX,1) |
(219) 0x43c600 CMPL $0x6,0x74(%RSP) |
(219) 0x43c605 JBE 43c8bf |
(219) 0x43c60b MOV 0x30(%RSP),%R14 |
(219) 0x43c610 LEA -0x40(%R10,%RCX,1),%RAX |
(219) 0x43c615 MOV %R13,%RSI |
(219) 0x43c618 MOV %RAX,%RDI |
(219) 0x43c61b SUB %R14,%RDI |
(219) 0x43c61e MOV %RDI,0x50(%RSP) |
(219) 0x43c623 LEA -0x40(%R14),%RDI |
(219) 0x43c627 SHR $0x6,%RDI |
(219) 0x43c62b INC %RDI |
(219) 0x43c62e AND $0x7,%EDI |
(219) 0x43c631 JE 43c708 |
(219) 0x43c637 CMP $0x1,%RDI |
(219) 0x43c63b JE 43c6e4 |
(219) 0x43c641 CMP $0x2,%RDI |
(219) 0x43c645 JE 43c6cb |
(219) 0x43c64b CMP $0x3,%RDI |
(219) 0x43c64f JE 43c6b2 |
(219) 0x43c651 CMP $0x4,%RDI |
(219) 0x43c655 JE 43c699 |
(219) 0x43c657 CMP $0x5,%RDI |
(219) 0x43c65b JE 43c680 |
(219) 0x43c65d CMP $0x6,%RDI |
(219) 0x43c661 JNE 43c8a1 |
(219) 0x43c667 VXORPS %XMM2,%XMM2,%XMM2 |
(219) 0x43c66b VPERMPD (%RAX),%ZMM0,%ZMM2 |
(219) 0x43c671 ADD $0x40,%RSI |
(219) 0x43c675 SUB $0x40,%RAX |
(219) 0x43c679 VMOVUPD %ZMM2,-0x40(%RSI) |
(219) 0x43c680 VXORPS %XMM3,%XMM3,%XMM3 |
(219) 0x43c684 VPERMPD (%RAX),%ZMM0,%ZMM3 |
(219) 0x43c68a ADD $0x40,%RSI |
(219) 0x43c68e SUB $0x40,%RAX |
(219) 0x43c692 VMOVUPD %ZMM3,-0x40(%RSI) |
(219) 0x43c699 VXORPS %XMM4,%XMM4,%XMM4 |
(219) 0x43c69d VPERMPD (%RAX),%ZMM0,%ZMM4 |
(219) 0x43c6a3 ADD $0x40,%RSI |
(219) 0x43c6a7 SUB $0x40,%RAX |
(219) 0x43c6ab VMOVUPD %ZMM4,-0x40(%RSI) |
(219) 0x43c6b2 VXORPS %XMM5,%XMM5,%XMM5 |
(219) 0x43c6b6 VPERMPD (%RAX),%ZMM0,%ZMM5 |
(219) 0x43c6bc ADD $0x40,%RSI |
(219) 0x43c6c0 SUB $0x40,%RAX |
(219) 0x43c6c4 VMOVUPD %ZMM5,-0x40(%RSI) |
(219) 0x43c6cb VXORPS %XMM6,%XMM6,%XMM6 |
(219) 0x43c6cf VPERMPD (%RAX),%ZMM0,%ZMM6 |
(219) 0x43c6d5 ADD $0x40,%RSI |
(219) 0x43c6d9 SUB $0x40,%RAX |
(219) 0x43c6dd VMOVUPD %ZMM6,-0x40(%RSI) |
(219) 0x43c6e4 VXORPS %XMM7,%XMM7,%XMM7 |
(219) 0x43c6e8 VPERMPD (%RAX),%ZMM0,%ZMM7 |
(219) 0x43c6ee SUB $0x40,%RAX |
(219) 0x43c6f2 ADD $0x40,%RSI |
(219) 0x43c6f6 VMOVUPD %ZMM7,-0x40(%RSI) |
(219) 0x43c6fd CMP %RAX,0x50(%RSP) |
(219) 0x43c702 JE 43c7c1 |
(219) 0x43c708 MOV 0x78(%RSP),%R13 |
(220) 0x43c70d VXORPS %XMM8,%XMM8,%XMM8 |
(220) 0x43c712 VPERMPD (%RAX),%ZMM0,%ZMM8 |
(220) 0x43c718 SUB $0x200,%RAX |
(220) 0x43c71e ADD $0x200,%RSI |
(220) 0x43c725 VMOVUPD %ZMM8,-0x200(%RSI) |
(220) 0x43c72c VXORPS %XMM9,%XMM9,%XMM9 |
(220) 0x43c731 VPERMPD 0x1c0(%RAX),%ZMM0,%ZMM9 |
(220) 0x43c738 VMOVUPD %ZMM9,-0x1c0(%RSI) |
(220) 0x43c73f VXORPS %XMM10,%XMM10,%XMM10 |
(220) 0x43c744 VPERMPD 0x180(%RAX),%ZMM0,%ZMM10 |
(220) 0x43c74b VMOVUPD %ZMM10,-0x180(%RSI) |
(220) 0x43c752 VXORPS %XMM11,%XMM11,%XMM11 |
(220) 0x43c757 VPERMPD 0x140(%RAX),%ZMM0,%ZMM11 |
(220) 0x43c75e VMOVUPD %ZMM11,-0x140(%RSI) |
(220) 0x43c765 VXORPS %XMM12,%XMM12,%XMM12 |
(220) 0x43c76a VPERMPD 0x100(%RAX),%ZMM0,%ZMM12 |
(220) 0x43c771 VMOVUPD %ZMM12,-0x100(%RSI) |
(220) 0x43c778 VXORPS %XMM13,%XMM13,%XMM13 |
(220) 0x43c77d VPERMPD 0xc0(%RAX),%ZMM0,%ZMM13 |
(220) 0x43c784 VMOVUPD %ZMM13,-0xc0(%RSI) |
(220) 0x43c78b VXORPS %XMM14,%XMM14,%XMM14 |
(220) 0x43c790 VPERMPD 0x80(%RAX),%ZMM0,%ZMM14 |
(220) 0x43c797 VMOVUPD %ZMM14,-0x80(%RSI) |
(220) 0x43c79e VXORPS %XMM15,%XMM15,%XMM15 |
(220) 0x43c7a3 VPERMPD 0x40(%RAX),%ZMM0,%ZMM15 |
(220) 0x43c7aa VMOVUPD %ZMM15,-0x40(%RSI) |
(220) 0x43c7b1 CMP %RAX,0x50(%RSP) |
(220) 0x43c7b6 JNE 43c70d |
(219) 0x43c7bc MOV %R13,0x78(%RSP) |
(219) 0x43c7c1 CMP %EBX,0x4c(%RSP) |
(219) 0x43c7c5 JE 43c5d0 |
(219) 0x43c7cb CMPL $0x2,0x2c(%RSP) |
(219) 0x43c7d0 MOV 0x48(%RSP),%R14D |
(219) 0x43c7d5 JBE 43c8cc |
(219) 0x43c7db MOV 0x4c(%RSP),%R13D |
(219) 0x43c7e0 MOV %R13D,%EAX |
(219) 0x43c7e3 MOV %RAX,%RSI |
(219) 0x43c7e6 ADD %R10,%RCX |
(219) 0x43c7e9 ADD %R8,%RAX |
(219) 0x43c7ec NEG %RSI |
(219) 0x43c7ef VXORPS %XMM1,%XMM1,%XMM1 |
(219) 0x43c7f3 VPERMPD $0x1b,-0x20(%RCX,%RSI,8),%YMM1 |
(219) 0x43c7fb VMOVUPD %YMM1,(%R11,%RAX,8) |
(219) 0x43c801 MOV %R14D,%EAX |
(219) 0x43c804 AND $-0x4,%EAX |
(219) 0x43c807 ADD %R13D,%EAX |
(219) 0x43c80a AND $0x3,%R14D |
(219) 0x43c80e JE 43c5d0 |
(219) 0x43c814 MOV 0x24(%RSP),%EDI |
(219) 0x43c818 MOV 0x28(%RSP),%R13D |
(219) 0x43c81d MOV %EDI,%ECX |
(219) 0x43c81f LEA (%R13,%RAX,1),%ESI |
(219) 0x43c824 SUB %EAX,%ECX |
(219) 0x43c826 ADD %R12D,%ESI |
(219) 0x43c829 MOVSXD %ECX,%R14 |
(219) 0x43c82c MOVSXD %ESI,%RCX |
(219) 0x43c82f ADD %R9,%R14 |
(219) 0x43c832 VMOVSD (%R10,%R14,8),%XMM2 |
(219) 0x43c838 LEA 0x1(%RAX),%R14D |
(219) 0x43c83c VMOVSD %XMM2,(%R11,%RCX,8) |
(219) 0x43c842 CMP %R14D,%EBX |
(219) 0x43c845 JLE 43c5d0 |
(219) 0x43c84b MOV 0x20(%RSP),%ESI |
(219) 0x43c84f ADD %R13D,%R14D |
(219) 0x43c852 ADD %R12D,%R14D |
(219) 0x43c855 SUB %EAX,%ESI |
(219) 0x43c857 MOVSXD %R14D,%R14 |
(219) 0x43c85a ADD $0x2,%EAX |
(219) 0x43c85d MOVSXD %ESI,%RCX |
(219) 0x43c860 ADD %R9,%RCX |
(219) 0x43c863 VMOVSD (%R10,%RCX,8),%XMM3 |
(219) 0x43c869 VMOVSD %XMM3,(%R11,%R14,8) |
(219) 0x43c86f CMP %EAX,%EBX |
(219) 0x43c871 JLE 43c5d0 |
(219) 0x43c877 SUB %EAX,%EDI |
(219) 0x43c879 ADD %R13D,%EAX |
(219) 0x43c87c MOVSXD %EDI,%RDI |
(219) 0x43c87f ADD %R12D,%EAX |
(219) 0x43c882 ADD %R9,%RDI |
(219) 0x43c885 CLTQ |
(219) 0x43c887 VMOVSD (%R10,%RDI,8),%XMM4 |
(219) 0x43c88d VMOVSD %XMM4,(%R11,%RAX,8) |
(219) 0x43c893 JMP 43c5d0 |
0x43c898 INC %EAX |
0x43c89a XOR %EDX,%EDX |
0x43c89c JMP 43c341 |
(219) 0x43c8a1 VXORPS %XMM1,%XMM1,%XMM1 |
(219) 0x43c8a5 VPERMPD (%RAX),%ZMM0,%ZMM1 |
(219) 0x43c8ab ADD $0x40,%RSI |
(219) 0x43c8af SUB $0x40,%RAX |
(219) 0x43c8b3 VMOVUPD %ZMM1,(%R13) |
(219) 0x43c8ba JMP 43c667 |
(219) 0x43c8bf MOV %EBX,%R14D |
(219) 0x43c8c2 XOR %EAX,%EAX |
(219) 0x43c8c4 XOR %R13D,%R13D |
(219) 0x43c8c7 JMP 43c7e3 |
(219) 0x43c8cc MOV 0x4c(%RSP),%EAX |
(219) 0x43c8d0 JMP 43c814 |
0x43c8d5 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | pack_kernel.cpp:120-124 |
Module | exec |
nb instructions | 95 |
nb uops | 101 |
loop length | 351 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 16 |
micro-operation queue | 16.83 cycles |
front end | 16.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.90 | 7.80 | 6.33 | 6.33 | 11.50 | 7.87 | 7.70 | 11.50 | 11.50 | 11.50 | 7.73 | 6.33 |
cycles | 7.90 | 12.03 | 6.33 | 6.33 | 11.50 | 7.87 | 7.70 | 11.50 | 11.50 | 11.50 | 7.73 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 16.35 |
Stall cycles | 0.00 |
Front-end | 16.83 |
Dispatch | 12.03 |
DIV/SQRT | 6.00 |
Overall L1 | 16.83 |
all | 4% |
load | 20% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 6% |
all | 11% |
load | 28% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 8% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x24(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 43c898 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x5a8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43c5e8 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x20(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R12),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c5e8 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R9),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x1(%RBX),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
IMUL %EDX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x3,%EAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R13D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R11),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOVSXD %EBX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R15,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R12D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA 0x1(%RDI),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R13D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EAX,0x24(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R11,%RCX,1),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOVSXD %EDI,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SUB %RCX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0x27149(%RIP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13D,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43c341 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | pack_kernel.cpp:120-124 |
Module | exec |
nb instructions | 95 |
nb uops | 101 |
loop length | 351 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 16 |
micro-operation queue | 16.83 cycles |
front end | 16.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.90 | 7.80 | 6.33 | 6.33 | 11.50 | 7.87 | 7.70 | 11.50 | 11.50 | 11.50 | 7.73 | 6.33 |
cycles | 7.90 | 12.03 | 6.33 | 6.33 | 11.50 | 7.87 | 7.70 | 11.50 | 11.50 | 11.50 | 7.73 | 6.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 16.35 |
Stall cycles | 0.00 |
Front-end | 16.83 |
Dispatch | 12.03 |
DIV/SQRT | 6.00 |
Overall L1 | 16.83 |
all | 4% |
load | 20% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 6% |
all | 11% |
load | 28% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 8% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
INC %R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x24(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 43c898 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x5a8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43c5e8 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x20(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R12),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EBX,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c5e8 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x2f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EBX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R9),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x1(%RBX),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
IMUL %EDX,%R12D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SHR $0x3,%EAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R13D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%R11),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x6,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOVSXD %EBX,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EBX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R15,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $0x7,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R12D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA 0x1(%RDI),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R13D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EAX,0x24(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R11,%RCX,1),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOVSXD %EDI,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SUB %RCX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DEC %R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQA64 0x27149(%RIP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13D,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43c341 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x51> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0– | 0.02 | 0.01 |
▼Loop 219 - pack_kernel.cpp:120-124 - exec– | 0.02 | 0.02 |
○Loop 220 - pack_kernel.cpp:122-124 - exec | 0 | 0 |
○Loop 218 - pack_kernel.cpp:122-124 - exec | 0 | 0 |