Function: hypre_SeqVectorAxpy._omp_fn.0 | Module: exec | Source: vector.c:449-452 | Coverage: 0.64% |
---|
Function: hypre_SeqVectorAxpy._omp_fn.0 | Module: exec | Source: vector.c:449-452 | Coverage: 0.64% |
---|
/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/seq_mv/vector.c: 449 - 452 |
-------------------------------------------------------------------------------- |
449: #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE |
450: #endif |
451: for (i = 0; i < size; i++) |
452: y_data[i] += alpha * x_data[i]; |
0x5a8380 PUSH %RBP |
0x5a8381 MOV %RSP,%RBP |
0x5a8384 PUSH %R13 |
0x5a8386 PUSH %R12 |
0x5a8388 MOV %RDI,%R12 |
0x5a838b PUSH %RBX |
0x5a838c AND $-0x40,%RSP |
0x5a8390 CALL 40f0b0 <omp_get_num_threads@plt> |
0x5a8395 MOV %EAX,%EBX |
0x5a8397 CALL 40f1f0 <omp_get_thread_num@plt> |
0x5a839c MOVSXD %EBX,%RSI |
0x5a839f MOVSXD %EAX,%RCX |
0x5a83a2 MOV 0x18(%R12),%RAX |
0x5a83a7 CQTO |
0x5a83a9 IDIV %RSI |
0x5a83ac CMP %RDX,%RCX |
0x5a83af JL 5a8810 |
0x5a83b5 IMUL %RAX,%RCX |
0x5a83b9 ADD %RCX,%RDX |
0x5a83bc LEA (%RAX,%RDX,1),%R10 |
0x5a83c0 CMP %R10,%RDX |
0x5a83c3 JGE 5a8571 |
0x5a83c9 LEA -0x1(%RAX),%RSI |
0x5a83cd MOV 0x10(%R12),%RCX |
0x5a83d2 MOV 0x8(%R12),%RDI |
0x5a83d7 MOV %RAX,%R11 |
0x5a83da VMOVSD (%R12),%XMM0 |
0x5a83e0 MOV %RDX,%R12 |
0x5a83e3 CMP $0x2,%RSI |
0x5a83e7 JBE 5a840a |
0x5a83e9 LEA (,%RDX,8),%R9 |
0x5a83f1 LEA (%RCX,%R9,1),%R8 |
0x5a83f5 LEA 0x8(%RDI,%R9,1),%R13 |
0x5a83fa MOV %R8,%RBX |
0x5a83fd SUB %R13,%RBX |
0x5a8400 CMP $0x30,%RBX |
0x5a8404 JA 5a8580 |
0x5a840a MOV %R10,%RBX |
0x5a840d SUB %RDX,%RBX |
0x5a8410 AND $0x7,%EBX |
0x5a8413 JE 5a84cf |
0x5a8419 CMP $0x1,%RBX |
0x5a841d JE 5a84b3 |
0x5a8423 CMP $0x2,%RBX |
0x5a8427 JE 5a84a0 |
0x5a8429 CMP $0x3,%RBX |
0x5a842d JE 5a848d |
0x5a842f CMP $0x4,%RBX |
0x5a8433 JE 5a847a |
0x5a8435 CMP $0x5,%RBX |
0x5a8439 JE 5a8467 |
0x5a843b CMP $0x6,%RBX |
0x5a843f JE 5a8454 |
0x5a8441 VMOVSD (%RDI,%RDX,8),%XMM7 |
0x5a8446 VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM7 |
0x5a844c VMOVSD %XMM7,(%RCX,%RDX,8) |
0x5a8451 INC %RDX |
0x5a8454 VMOVSD (%RDI,%RDX,8),%XMM8 |
0x5a8459 VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM8 |
0x5a845f VMOVSD %XMM8,(%RCX,%RDX,8) |
0x5a8464 INC %RDX |
0x5a8467 VMOVSD (%RDI,%RDX,8),%XMM9 |
0x5a846c VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM9 |
0x5a8472 VMOVSD %XMM9,(%RCX,%RDX,8) |
0x5a8477 INC %RDX |
0x5a847a VMOVSD (%RDI,%RDX,8),%XMM10 |
0x5a847f VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM10 |
0x5a8485 VMOVSD %XMM10,(%RCX,%RDX,8) |
0x5a848a INC %RDX |
0x5a848d VMOVSD (%RDI,%RDX,8),%XMM11 |
0x5a8492 VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM11 |
0x5a8498 VMOVSD %XMM11,(%RCX,%RDX,8) |
0x5a849d INC %RDX |
0x5a84a0 VMOVSD (%RDI,%RDX,8),%XMM12 |
0x5a84a5 VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM12 |
0x5a84ab VMOVSD %XMM12,(%RCX,%RDX,8) |
0x5a84b0 INC %RDX |
0x5a84b3 VMOVSD (%RDI,%RDX,8),%XMM13 |
0x5a84b8 VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM13 |
0x5a84be VMOVSD %XMM13,(%RCX,%RDX,8) |
0x5a84c3 INC %RDX |
0x5a84c6 CMP %RDX,%R10 |
0x5a84c9 JE 5a8571 |
(3160) 0x5a84cf VMOVSD (%RDI,%RDX,8),%XMM14 |
(3160) 0x5a84d4 VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM14 |
(3160) 0x5a84da VMOVSD %XMM14,(%RCX,%RDX,8) |
(3160) 0x5a84df VMOVSD 0x8(%RDI,%RDX,8),%XMM15 |
(3160) 0x5a84e5 VFMADD213SD 0x8(%RCX,%RDX,8),%XMM0,%XMM15 |
(3160) 0x5a84ec VMOVSD %XMM15,0x8(%RCX,%RDX,8) |
(3160) 0x5a84f2 VMOVSD 0x10(%RDI,%RDX,8),%XMM2 |
(3160) 0x5a84f8 VFMADD213SD 0x10(%RCX,%RDX,8),%XMM0,%XMM2 |
(3160) 0x5a84ff VMOVSD %XMM2,0x10(%RCX,%RDX,8) |
(3160) 0x5a8505 VMOVSD 0x18(%RDI,%RDX,8),%XMM1 |
(3160) 0x5a850b VFMADD213SD 0x18(%RCX,%RDX,8),%XMM0,%XMM1 |
(3160) 0x5a8512 VMOVSD %XMM1,0x18(%RCX,%RDX,8) |
(3160) 0x5a8518 VMOVSD 0x20(%RDI,%RDX,8),%XMM3 |
(3160) 0x5a851e VFMADD213SD 0x20(%RCX,%RDX,8),%XMM0,%XMM3 |
(3160) 0x5a8525 VMOVSD %XMM3,0x20(%RCX,%RDX,8) |
(3160) 0x5a852b VMOVSD 0x28(%RDI,%RDX,8),%XMM4 |
(3160) 0x5a8531 VFMADD213SD 0x28(%RCX,%RDX,8),%XMM0,%XMM4 |
(3160) 0x5a8538 VMOVSD %XMM4,0x28(%RCX,%RDX,8) |
(3160) 0x5a853e VMOVSD 0x30(%RDI,%RDX,8),%XMM5 |
(3160) 0x5a8544 VFMADD213SD 0x30(%RCX,%RDX,8),%XMM0,%XMM5 |
(3160) 0x5a854b VMOVSD %XMM5,0x30(%RCX,%RDX,8) |
(3160) 0x5a8551 VMOVSD 0x38(%RDI,%RDX,8),%XMM6 |
(3160) 0x5a8557 VFMADD213SD 0x38(%RCX,%RDX,8),%XMM0,%XMM6 |
(3160) 0x5a855e VMOVSD %XMM6,0x38(%RCX,%RDX,8) |
(3160) 0x5a8564 ADD $0x8,%RDX |
(3160) 0x5a8568 CMP %RDX,%R10 |
(3160) 0x5a856b JNE 5a84cf |
0x5a8571 LEA -0x18(%RBP),%RSP |
0x5a8575 POP %RBX |
0x5a8576 POP %R12 |
0x5a8578 POP %R13 |
0x5a857a POP %RBP |
0x5a857b RET |
0x5a857c NOPL (%RAX) |
0x5a8580 CMP $0x6,%RSI |
0x5a8584 JBE 5a884a |
0x5a858a MOV %RAX,%R13 |
0x5a858d ADD %RDI,%R9 |
0x5a8590 VBROADCASTSD %XMM0,%ZMM1 |
0x5a8596 XOR %EBX,%EBX |
0x5a8598 SHR $0x3,%R13 |
0x5a859c SAL $0x6,%R13 |
0x5a85a0 LEA -0x40(%R13),%R11 |
0x5a85a4 SHR $0x6,%R11 |
0x5a85a8 INC %R11 |
0x5a85ab AND $0x7,%R11D |
0x5a85af JE 5a8684 |
0x5a85b5 CMP $0x1,%R11 |
0x5a85b9 JE 5a8662 |
0x5a85bf CMP $0x2,%R11 |
0x5a85c3 JE 5a8649 |
0x5a85c9 CMP $0x3,%R11 |
0x5a85cd JE 5a8630 |
0x5a85cf CMP $0x4,%R11 |
0x5a85d3 JE 5a8617 |
0x5a85d5 CMP $0x5,%R11 |
0x5a85d9 JE 5a85fe |
0x5a85db CMP $0x6,%R11 |
0x5a85df JNE 5a882e |
0x5a85e5 VMOVUPD (%R9,%RBX,1),%ZMM3 |
0x5a85ec VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM3 |
0x5a85f3 VMOVUPD %ZMM3,(%R8,%RBX,1) |
0x5a85fa ADD $0x40,%RBX |
0x5a85fe VMOVUPD (%R9,%RBX,1),%ZMM4 |
0x5a8605 VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM4 |
0x5a860c VMOVUPD %ZMM4,(%R8,%RBX,1) |
0x5a8613 ADD $0x40,%RBX |
0x5a8617 VMOVUPD (%R9,%RBX,1),%ZMM5 |
0x5a861e VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM5 |
0x5a8625 VMOVUPD %ZMM5,(%R8,%RBX,1) |
0x5a862c ADD $0x40,%RBX |
0x5a8630 VMOVUPD (%R9,%RBX,1),%ZMM6 |
0x5a8637 VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM6 |
0x5a863e VMOVUPD %ZMM6,(%R8,%RBX,1) |
0x5a8645 ADD $0x40,%RBX |
0x5a8649 VMOVUPD (%R9,%RBX,1),%ZMM7 |
0x5a8650 VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM7 |
0x5a8657 VMOVUPD %ZMM7,(%R8,%RBX,1) |
0x5a865e ADD $0x40,%RBX |
0x5a8662 VMOVUPD (%R9,%RBX,1),%ZMM8 |
0x5a8669 VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM8 |
0x5a8670 VMOVUPD %ZMM8,(%R8,%RBX,1) |
0x5a8677 ADD $0x40,%RBX |
0x5a867b CMP %R13,%RBX |
0x5a867e JE 5a8751 |
(3161) 0x5a8684 VMOVUPD (%R9,%RBX,1),%ZMM9 |
(3161) 0x5a868b VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM9 |
(3161) 0x5a8692 VMOVUPD %ZMM9,(%R8,%RBX,1) |
(3161) 0x5a8699 VMOVUPD 0x40(%R9,%RBX,1),%ZMM10 |
(3161) 0x5a86a1 VFMADD213PD 0x40(%R8,%RBX,1),%ZMM1,%ZMM10 |
(3161) 0x5a86a9 VMOVUPD %ZMM10,0x40(%R8,%RBX,1) |
(3161) 0x5a86b1 VMOVUPD 0x80(%R9,%RBX,1),%ZMM11 |
(3161) 0x5a86b9 VFMADD213PD 0x80(%R8,%RBX,1),%ZMM1,%ZMM11 |
(3161) 0x5a86c1 VMOVUPD %ZMM11,0x80(%R8,%RBX,1) |
(3161) 0x5a86c9 VMOVUPD 0xc0(%R9,%RBX,1),%ZMM12 |
(3161) 0x5a86d1 VFMADD213PD 0xc0(%R8,%RBX,1),%ZMM1,%ZMM12 |
(3161) 0x5a86d9 VMOVUPD %ZMM12,0xc0(%R8,%RBX,1) |
(3161) 0x5a86e1 VMOVUPD 0x100(%R9,%RBX,1),%ZMM13 |
(3161) 0x5a86e9 VFMADD213PD 0x100(%R8,%RBX,1),%ZMM1,%ZMM13 |
(3161) 0x5a86f1 VMOVUPD %ZMM13,0x100(%R8,%RBX,1) |
(3161) 0x5a86f9 VMOVUPD 0x140(%R9,%RBX,1),%ZMM14 |
(3161) 0x5a8701 VFMADD213PD 0x140(%R8,%RBX,1),%ZMM1,%ZMM14 |
(3161) 0x5a8709 VMOVUPD %ZMM14,0x140(%R8,%RBX,1) |
(3161) 0x5a8711 VMOVUPD 0x180(%R9,%RBX,1),%ZMM15 |
(3161) 0x5a8719 VFMADD213PD 0x180(%R8,%RBX,1),%ZMM1,%ZMM15 |
(3161) 0x5a8721 VMOVUPD %ZMM15,0x180(%R8,%RBX,1) |
(3161) 0x5a8729 VMOVUPD 0x1c0(%R9,%RBX,1),%ZMM2 |
(3161) 0x5a8731 VFMADD213PD 0x1c0(%R8,%RBX,1),%ZMM1,%ZMM2 |
(3161) 0x5a8739 VMOVUPD %ZMM2,0x1c0(%R8,%RBX,1) |
(3161) 0x5a8741 ADD $0x200,%RBX |
(3161) 0x5a8748 CMP %R13,%RBX |
(3161) 0x5a874b JNE 5a8684 |
0x5a8751 MOV %RAX,%RSI |
0x5a8754 AND $-0x8,%RSI |
0x5a8758 ADD %RSI,%RDX |
0x5a875b CMP %RSI,%RAX |
0x5a875e JE 5a8820 |
0x5a8764 SUB %RSI,%RAX |
0x5a8767 MOV %RAX,%R11 |
0x5a876a LEA -0x1(%RAX),%RAX |
0x5a876e CMP $0x2,%RAX |
0x5a8772 JBE 5a87a2 |
0x5a8774 ADD %R12,%RSI |
0x5a8777 VBROADCASTSD %XMM0,%YMM1 |
0x5a877c MOV %R11,%R9 |
0x5a877f LEA (%RCX,%RSI,8),%R12 |
0x5a8783 AND $-0x4,%R9 |
0x5a8787 VMOVUPD (%R12),%YMM3 |
0x5a878d ADD %R9,%RDX |
0x5a8790 AND $0x3,%R11D |
0x5a8794 VFMADD132PD (%RDI,%RSI,8),%YMM3,%YMM1 |
0x5a879a VMOVUPD %YMM1,(%R12) |
0x5a87a0 JE 5a8820 |
0x5a87a2 VMOVSD (%RDI,%RDX,8),%XMM4 |
0x5a87a7 LEA (,%RDX,8),%R8 |
0x5a87af LEA 0x1(%RDX),%R13 |
0x5a87b3 LEA (%RCX,%R8,1),%RSI |
0x5a87b7 VFMADD213SD (%RSI),%XMM0,%XMM4 |
0x5a87bc VMOVSD %XMM4,(%RSI) |
0x5a87c0 CMP %R13,%R10 |
0x5a87c3 JLE 5a8820 |
0x5a87c5 VMOVSD 0x8(%RDI,%R8,1),%XMM5 |
0x5a87cc LEA 0x8(%RCX,%R8,1),%R11 |
0x5a87d1 ADD $0x2,%RDX |
0x5a87d5 VFMADD213SD (%R11),%XMM0,%XMM5 |
0x5a87da VMOVSD %XMM5,(%R11) |
0x5a87df CMP %RDX,%R10 |
0x5a87e2 JLE 5a8820 |
0x5a87e4 LEA 0x10(%RCX,%R8,1),%RDX |
0x5a87e9 VMOVSD (%RDX),%XMM6 |
0x5a87ed VFMADD132SD 0x10(%RDI,%R8,1),%XMM6,%XMM0 |
0x5a87f4 VMOVSD %XMM0,(%RDX) |
0x5a87f8 VZEROUPPER |
0x5a87fb LEA -0x18(%RBP),%RSP |
0x5a87ff POP %RBX |
0x5a8800 POP %R12 |
0x5a8802 POP %R13 |
0x5a8804 POP %RBP |
0x5a8805 RET |
0x5a8806 NOPW %CS:(%RAX,%RAX,1) |
0x5a8810 INC %RAX |
0x5a8813 XOR %EDX,%EDX |
0x5a8815 JMP 5a83b5 |
0x5a881a NOPW (%RAX,%RAX,1) |
0x5a8820 VZEROUPPER |
0x5a8823 LEA -0x18(%RBP),%RSP |
0x5a8827 POP %RBX |
0x5a8828 POP %R12 |
0x5a882a POP %R13 |
0x5a882c POP %RBP |
0x5a882d RET |
0x5a882e VMOVUPD (%R9),%ZMM2 |
0x5a8834 MOV $0x40,%EBX |
0x5a8839 VFMADD213PD (%R8),%ZMM1,%ZMM2 |
0x5a883f VMOVUPD %ZMM2,(%R8) |
0x5a8845 JMP 5a85e5 |
0x5a884a XOR %ESI,%ESI |
0x5a884c JMP 5a8774 |
0x5a8851 NOPW %CS:(%RAX,%RAX,1) |
0x5a885c NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | vector.c:449-452 |
Module | exec |
nb instructions | 209 |
nb uops | 273 |
loop length | 881 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 11 |
used ymm registers | 2 |
used zmm registers | 8 |
nb stack references | 1 |
micro-operation queue | 72.00 cycles |
front end | 72.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 47.00 | 47.00 | 26.33 | 26.33 | 24.00 | 47.00 | 47.00 | 26.33 |
cycles | 47.00 | 47.00 | 26.33 | 26.33 | 24.00 | 47.00 | 47.00 | 26.33 |
Cycles executing div or sqrt instructions | 24.00-90.00 |
FE+BE cycles | 56.96-99.56 |
Stall cycles | 0.00-42.51 |
Front-end | 72.00 |
Dispatch | 47.00 |
DIV/SQRT | 24.00-90.00 |
Overall L1 | 72.00-90.00 |
all | 7% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 42% |
load | 43% |
store | 44% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 44% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 30% |
load | 43% |
store | 44% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 44% |
div/sqrt | 0% |
other | 6% |
all | 13% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 46% |
load | 47% |
store | 48% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 48% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 35% |
load | 47% |
store | 48% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 48% |
div/sqrt | 12% |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 40f0b0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 40f1f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOVSXD %EBX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD %EAX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x18(%R12),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
IDIV %RSI | 57 | 14.25 | 14.25 | 0 | 0 | 0 | 14.25 | 14.25 | 0 | 42-95 | 24-90 |
CMP %RDX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JL 5a8810 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RCX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%RAX,%RDX,1),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 5a8571 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x1(%RAX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R12),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%R12),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD (%R12),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x2,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 5a840a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA (,%RDX,8),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RCX,%R9,1),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x8(%RDI,%R9,1),%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R13,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $0x30,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JA 5a8580 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R10,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %RDX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a84cf | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a84b3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a84a0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a848d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a847a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8467 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8454 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD (%RDI,%RDX,8),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM7 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM7,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM8 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM8,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM9,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM10 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM10,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM11,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM12 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM12,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM13 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM13,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RDX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8571 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x18(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x6,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 5a884a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD %RDI,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VBROADCASTSD %XMM0,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x3,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
SAL $0x6,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA -0x40(%R13),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x6,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8684 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8662 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8649 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8630 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8617 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a85fe | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 5a882e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVUPD (%R9,%RBX,1),%ZMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM3 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM3,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R9,%RBX,1),%ZMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM4 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM4,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R9,%RBX,1),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM5 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM5,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R9,%RBX,1),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM6 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM6,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R9,%RBX,1),%ZMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM7 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM7,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R9,%RBX,1),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM8 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM8,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R13,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8751 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RSI,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8820 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x1(%RAX),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x2,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 5a87a2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %R12,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VBROADCASTSD %XMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
MOV %R11,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%RCX,%RSI,8),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x4,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R12),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
ADD %R9,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x3,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VFMADD132PD (%RDI,%RSI,8),%YMM3,%YMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM1,(%R12) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 5a8820 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD (%RDI,%RDX,8),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (,%RDX,8),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RDX),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RCX,%R8,1),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VFMADD213SD (%RSI),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM4,(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %R13,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 5a8820 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD 0x8(%RDI,%R8,1),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x8(%RCX,%R8,1),%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD $0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VFMADD213SD (%R11),%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM5,(%R11) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %RDX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 5a8820 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA 0x10(%RCX,%R8,1),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD (%RDX),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD132SD 0x10(%RDI,%R8,1),%XMM6,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,(%RDX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x18(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
INC %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 5a83b5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x18(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
VMOVUPD (%R9),%ZMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
MOV $0x40,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VFMADD213PD (%R8),%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM2,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 5a85e5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 5a8774 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | vector.c:449-452 |
Module | exec |
nb instructions | 209 |
nb uops | 273 |
loop length | 881 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 11 |
used ymm registers | 2 |
used zmm registers | 8 |
nb stack references | 1 |
micro-operation queue | 72.00 cycles |
front end | 72.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 47.00 | 47.00 | 26.33 | 26.33 | 24.00 | 47.00 | 47.00 | 26.33 |
cycles | 47.00 | 47.00 | 26.33 | 26.33 | 24.00 | 47.00 | 47.00 | 26.33 |
Cycles executing div or sqrt instructions | 24.00-90.00 |
FE+BE cycles | 56.96-99.56 |
Stall cycles | 0.00-42.51 |
Front-end | 72.00 |
Dispatch | 47.00 |
DIV/SQRT | 24.00-90.00 |
Overall L1 | 72.00-90.00 |
all | 7% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 42% |
load | 43% |
store | 44% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 44% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 30% |
load | 43% |
store | 44% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 44% |
div/sqrt | 0% |
other | 6% |
all | 13% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 46% |
load | 47% |
store | 48% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 48% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 35% |
load | 47% |
store | 48% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 48% |
div/sqrt | 12% |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 40f0b0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 40f1f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOVSXD %EBX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVSXD %EAX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x18(%R12),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
IDIV %RSI | 57 | 14.25 | 14.25 | 0 | 0 | 0 | 14.25 | 14.25 | 0 | 42-95 | 24-90 |
CMP %RDX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JL 5a8810 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RCX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%RAX,%RDX,1),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 5a8571 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x1(%RAX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R12),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%R12),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD (%R12),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x2,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 5a840a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA (,%RDX,8),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RCX,%R9,1),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x8(%RDI,%R9,1),%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R13,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $0x30,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JA 5a8580 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R10,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %RDX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a84cf | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a84b3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a84a0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a848d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a847a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8467 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8454 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD (%RDI,%RDX,8),%XMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM7 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM7,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM8 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM8,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM9 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM9,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM10 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM10,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM11 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM11,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM12 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM12,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVSD (%RDI,%RDX,8),%XMM13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD213SD (%RCX,%RDX,8),%XMM0,%XMM13 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM13,(%RCX,%RDX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RDX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8571 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x18(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x6,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 5a884a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD %RDI,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VBROADCASTSD %XMM0,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x3,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
SAL $0x6,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA -0x40(%R13),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x6,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8684 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8662 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8649 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8630 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8617 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a85fe | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 5a882e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVUPD (%R9,%RBX,1),%ZMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM3 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM3,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R9,%RBX,1),%ZMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM4 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM4,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R9,%RBX,1),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM5 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM5,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R9,%RBX,1),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM6 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM6,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R9,%RBX,1),%ZMM7 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM7 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM7,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R9,%RBX,1),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
VFMADD213PD (%R8,%RBX,1),%ZMM1,%ZMM8 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM8,(%R8,%RBX,1) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD $0x40,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R13,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8751 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RSI,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5a8820 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x1(%RAX),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x2,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 5a87a2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %R12,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VBROADCASTSD %XMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
MOV %R11,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%RCX,%RSI,8),%R12 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x4,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVUPD (%R12),%YMM3 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
ADD %R9,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x3,%R11D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VFMADD132PD (%RDI,%RSI,8),%YMM3,%YMM1 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM1,(%R12) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 5a8820 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD (%RDI,%RDX,8),%XMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (,%RDX,8),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RDX),%R13 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RCX,%R8,1),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VFMADD213SD (%RSI),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM4,(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %R13,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 5a8820 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVSD 0x8(%RDI,%R8,1),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x8(%RCX,%R8,1),%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD $0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VFMADD213SD (%R11),%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM5,(%R11) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %RDX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 5a8820 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA 0x10(%RCX,%R8,1),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD (%RDX),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VFMADD132SD 0x10(%RDI,%R8,1),%XMM6,%XMM0 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM0,(%RDX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x18(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
INC %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 5a83b5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x18(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
VMOVUPD (%R9),%ZMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5-6 | 0.50 |
MOV $0x40,%EBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VFMADD213PD (%R8),%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 4 | 0.50 |
VMOVUPD %ZMM2,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 5a85e5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 5a8774 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_SeqVectorAxpy._omp_fn.0– | 0.64 | 0.24 |
○Loop 3161 - vector.c:452-452 - exec | 0.64 | 0.23 |
○Loop 3160 - vector.c:452-452 - exec | 0 | 0 |