Function: hypre_CSRMatrixSetRownnz | Module: exec | Source: csr_matrix.c:136-169 | Coverage: 0.01% |
---|
Function: hypre_CSRMatrixSetRownnz | Module: exec | Source: csr_matrix.c:136-169 | Coverage: 0.01% |
---|
/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/seq_mv/csr_matrix.c: 136 - 169 |
-------------------------------------------------------------------------------- |
136: { |
137: HYPRE_Int ierr=0; |
138: HYPRE_Int num_rows = hypre_CSRMatrixNumRows(matrix); |
139: HYPRE_Int *A_i = hypre_CSRMatrixI(matrix); |
140: HYPRE_Int *Arownnz; |
141: |
142: HYPRE_Int i, adiag; |
143: HYPRE_Int irownnz=0; |
144: |
145: for (i=0; i < num_rows; i++) |
146: { |
147: adiag = (A_i[i+1] - A_i[i]); |
148: if(adiag > 0) irownnz++; |
149: } |
150: |
151: hypre_CSRMatrixNumRownnz(matrix) = irownnz; |
152: |
153: if ((irownnz == 0) || (irownnz == num_rows)) |
154: { |
155: hypre_CSRMatrixRownnz(matrix) = NULL; |
156: } |
157: else |
158: { |
159: Arownnz = hypre_CTAlloc(HYPRE_Int, irownnz); |
160: irownnz = 0; |
161: for (i=0; i < num_rows; i++) |
162: { |
163: adiag = A_i[i+1]-A_i[i]; |
164: if(adiag > 0) Arownnz[irownnz++] = i; |
165: } |
166: hypre_CSRMatrixRownnz(matrix) = Arownnz; |
167: } |
168: return ierr; |
169: } |
0x59a1b0 PUSH %RBP |
0x59a1b1 MOV %RSP,%RBP |
0x59a1b4 PUSH %R13 |
0x59a1b6 MOV %RDI,%R13 |
0x59a1b9 PUSH %R12 |
0x59a1bb PUSH %RBX |
0x59a1bc MOV 0x10(%RDI),%R12 |
0x59a1c0 MOV (%RDI),%RBX |
0x59a1c3 AND $-0x40,%RSP |
0x59a1c7 TEST %R12,%R12 |
0x59a1ca JLE 59a830 |
0x59a1d0 LEA -0x1(%R12),%RCX |
0x59a1d5 CMP $0x6,%RCX |
0x59a1d9 JBE 59a83d |
0x59a1df MOV %R12,%RDI |
0x59a1e2 VPXOR %XMM0,%XMM0,%XMM0 |
0x59a1e6 VPTERNLOGD $-0x1,%ZMM1,%ZMM1,%ZMM1 |
0x59a1ed MOV %RBX,%R8 |
0x59a1f0 SHR $0x3,%RDI |
0x59a1f4 VMOVDQA64 %ZMM0,%ZMM2 |
0x59a1fa SAL $0x6,%RDI |
0x59a1fe LEA (%RDI,%RBX,1),%RSI |
0x59a202 SUB $0x40,%RDI |
0x59a206 SHR $0x6,%RDI |
0x59a20a INC %RDI |
0x59a20d AND $0x7,%EDI |
0x59a210 JE 59a343 |
0x59a216 CMP $0x1,%RDI |
0x59a21a JE 59a312 |
0x59a220 CMP $0x2,%RDI |
0x59a224 JE 59a2ea |
0x59a22a CMP $0x3,%RDI |
0x59a22e JE 59a2c2 |
0x59a234 CMP $0x4,%RDI |
0x59a238 JE 59a29a |
0x59a23a CMP $0x5,%RDI |
0x59a23e JE 59a272 |
0x59a240 CMP $0x6,%RDI |
0x59a244 JNE 59a800 |
0x59a24a VMOVDQU64 0x8(%R8),%ZMM6 |
0x59a254 ADD $0x40,%R8 |
0x59a258 VPSUBQ -0x40(%R8),%ZMM6,%ZMM7 |
0x59a25f VPCMPNLEQ %ZMM2,%ZMM7,%K2 |
0x59a266 VMOVDQA64 %ZMM1,%ZMM8{%K2}{z} |
0x59a26c VPSUBQ %ZMM8,%ZMM0,%ZMM0 |
0x59a272 VMOVDQU64 0x8(%R8),%ZMM9 |
0x59a27c ADD $0x40,%R8 |
0x59a280 VPSUBQ -0x40(%R8),%ZMM9,%ZMM10 |
0x59a287 VPCMPNLEQ %ZMM2,%ZMM10,%K3 |
0x59a28e VMOVDQA64 %ZMM1,%ZMM11{%K3}{z} |
0x59a294 VPSUBQ %ZMM11,%ZMM0,%ZMM0 |
0x59a29a VMOVDQU64 0x8(%R8),%ZMM12 |
0x59a2a4 ADD $0x40,%R8 |
0x59a2a8 VPSUBQ -0x40(%R8),%ZMM12,%ZMM13 |
0x59a2af VPCMPNLEQ %ZMM2,%ZMM13,%K4 |
0x59a2b6 VMOVDQA64 %ZMM1,%ZMM14{%K4}{z} |
0x59a2bc VPSUBQ %ZMM14,%ZMM0,%ZMM0 |
0x59a2c2 VMOVDQU64 0x8(%R8),%ZMM15 |
0x59a2cc ADD $0x40,%R8 |
0x59a2d0 VPSUBQ -0x40(%R8),%ZMM15,%ZMM5 |
0x59a2d7 VPCMPNLEQ %ZMM2,%ZMM5,%K5 |
0x59a2de VMOVDQA64 %ZMM1,%ZMM3{%K5}{z} |
0x59a2e4 VPSUBQ %ZMM3,%ZMM0,%ZMM0 |
0x59a2ea VMOVDQU64 0x8(%R8),%ZMM4 |
0x59a2f4 ADD $0x40,%R8 |
0x59a2f8 VPSUBQ -0x40(%R8),%ZMM4,%ZMM6 |
0x59a2ff VPCMPNLEQ %ZMM2,%ZMM6,%K6 |
0x59a306 VMOVDQA64 %ZMM1,%ZMM7{%K6}{z} |
0x59a30c VPSUBQ %ZMM7,%ZMM0,%ZMM0 |
0x59a312 VMOVDQU64 0x8(%R8),%ZMM8 |
0x59a31c ADD $0x40,%R8 |
0x59a320 VPSUBQ -0x40(%R8),%ZMM8,%ZMM9 |
0x59a327 VPCMPNLEQ %ZMM2,%ZMM9,%K7 |
0x59a32e VMOVDQA64 %ZMM1,%ZMM10{%K7}{z} |
0x59a334 VPSUBQ %ZMM10,%ZMM0,%ZMM0 |
0x59a33a CMP %R8,%RSI |
0x59a33d JE 59a473 |
(3031) 0x59a343 VMOVDQU64 0x8(%R8),%ZMM11 |
(3031) 0x59a34d VMOVDQU64 0x48(%R8),%ZMM15 |
(3031) 0x59a357 ADD $0x200,%R8 |
(3031) 0x59a35e VMOVDQU64 -0x178(%R8),%ZMM6 |
(3031) 0x59a368 VMOVDQU64 -0x138(%R8),%ZMM10 |
(3031) 0x59a372 VPSUBQ -0x200(%R8),%ZMM11,%ZMM12 |
(3031) 0x59a379 VPSUBQ -0x1c0(%R8),%ZMM15,%ZMM5 |
(3031) 0x59a380 VPSUBQ -0x180(%R8),%ZMM6,%ZMM7 |
(3031) 0x59a387 VPCMPNLEQ %ZMM2,%ZMM12,%K1 |
(3031) 0x59a38e VPCMPNLEQ %ZMM2,%ZMM5,%K2 |
(3031) 0x59a395 VPCMPNLEQ %ZMM2,%ZMM7,%K3 |
(3031) 0x59a39c VMOVDQA64 %ZMM1,%ZMM13{%K1}{z} |
(3031) 0x59a3a2 VMOVDQA64 %ZMM1,%ZMM3{%K2}{z} |
(3031) 0x59a3a8 VPSUBQ %ZMM13,%ZMM0,%ZMM14 |
(3031) 0x59a3ae VPSUBQ -0x140(%R8),%ZMM10,%ZMM0 |
(3031) 0x59a3b5 VMOVDQU64 -0xf8(%R8),%ZMM13 |
(3031) 0x59a3bf VMOVDQA64 %ZMM1,%ZMM8{%K3}{z} |
(3031) 0x59a3c5 VPSUBQ %ZMM3,%ZMM14,%ZMM4 |
(3031) 0x59a3cb VMOVDQU64 -0xb8(%R8),%ZMM3 |
(3031) 0x59a3d5 VPSUBQ -0x100(%R8),%ZMM13,%ZMM14 |
(3031) 0x59a3dc VPCMPNLEQ %ZMM2,%ZMM0,%K4 |
(3031) 0x59a3e3 VPSUBQ %ZMM8,%ZMM4,%ZMM9 |
(3031) 0x59a3e9 VMOVDQU64 -0x78(%R8),%ZMM8 |
(3031) 0x59a3f3 VPSUBQ -0xc0(%R8),%ZMM3,%ZMM4 |
(3031) 0x59a3fa VPCMPNLEQ %ZMM2,%ZMM14,%K5 |
(3031) 0x59a401 VMOVDQA64 %ZMM1,%ZMM11{%K4}{z} |
(3031) 0x59a407 VPCMPNLEQ %ZMM2,%ZMM4,%K6 |
(3031) 0x59a40e VPSUBQ %ZMM11,%ZMM9,%ZMM12 |
(3031) 0x59a414 VMOVDQU64 -0x38(%R8),%ZMM11 |
(3031) 0x59a41e VPSUBQ -0x80(%R8),%ZMM8,%ZMM9 |
(3031) 0x59a425 VMOVDQA64 %ZMM1,%ZMM15{%K5}{z} |
(3031) 0x59a42b VPSUBQ %ZMM15,%ZMM12,%ZMM5 |
(3031) 0x59a431 VPSUBQ -0x40(%R8),%ZMM11,%ZMM12 |
(3031) 0x59a438 VPCMPNLEQ %ZMM2,%ZMM9,%K7 |
(3031) 0x59a43f VMOVDQA64 %ZMM1,%ZMM6{%K6}{z} |
(3031) 0x59a445 VPCMPNLEQ %ZMM2,%ZMM12,%K1 |
(3031) 0x59a44c VPSUBQ %ZMM6,%ZMM5,%ZMM7 |
(3031) 0x59a452 VMOVDQA64 %ZMM1,%ZMM10{%K7}{z} |
(3031) 0x59a458 VPSUBQ %ZMM10,%ZMM7,%ZMM0 |
(3031) 0x59a45e VMOVDQA64 %ZMM1,%ZMM13{%K1}{z} |
(3031) 0x59a464 VPSUBQ %ZMM13,%ZMM0,%ZMM0 |
(3031) 0x59a46a CMP %R8,%RSI |
(3031) 0x59a46d JNE 59a343 |
0x59a473 VEXTRACTI64X4 $0x1,%ZMM0,%YMM1 |
0x59a47a MOV %R12,%RAX |
0x59a47d VPADDQ %YMM1,%YMM0,%YMM0 |
0x59a481 AND $-0x8,%RAX |
0x59a485 VEXTRACTI64X2 $0x1,%YMM0,%XMM15 |
0x59a48c MOV %RAX,%RDX |
0x59a48f VPADDQ %XMM15,%XMM0,%XMM5 |
0x59a494 VPSRLDQ $0x8,%XMM5,%XMM3 |
0x59a499 VPADDQ %XMM3,%XMM5,%XMM4 |
0x59a49d VMOVQ %XMM4,%RDI |
0x59a4a2 CMP %RAX,%R12 |
0x59a4a5 JE 59a5a1 |
0x59a4ab MOV %R12,%R9 |
0x59a4ae SUB %RDX,%R9 |
0x59a4b1 LEA -0x1(%R9),%R10 |
0x59a4b5 CMP $0x2,%R10 |
0x59a4b9 JBE 59a504 |
0x59a4bb VMOVDQU 0x8(%RBX,%RDX,8),%YMM6 |
0x59a4c1 VPXOR %XMM8,%XMM8,%XMM8 |
0x59a4c6 MOV %R9,%R11 |
0x59a4c9 AND $-0x4,%R11 |
0x59a4cd VPSUBQ (%RBX,%RDX,8),%YMM6,%YMM7 |
0x59a4d2 ADD %R11,%RAX |
0x59a4d5 AND $0x3,%R9D |
0x59a4d9 VPCMPGTQ %YMM8,%YMM7,%YMM9 |
0x59a4de VPSUBQ %YMM9,%YMM0,%YMM10 |
0x59a4e3 VEXTRACTI64X2 $0x1,%YMM10,%XMM0 |
0x59a4ea VPADDQ %XMM0,%XMM10,%XMM12 |
0x59a4ee VPSRLDQ $0x8,%XMM12,%XMM13 |
0x59a4f4 VPADDQ %XMM13,%XMM12,%XMM2 |
0x59a4f9 VMOVQ %XMM2,%RDI |
0x59a4fe JE 59a5a1 |
0x59a504 LEA (,%RAX,8),%RDX |
0x59a50c LEA 0x1(%RAX),%RSI |
0x59a510 LEA (%RBX,%RDX,1),%R9 |
0x59a514 CMP %RCX,%RSI |
0x59a517 JGE 59a555 |
0x59a519 CMP %RAX,%R12 |
0x59a51c JLE 59a555 |
0x59a51e MOV 0x8(%RBX,%RDX,1),%R8 |
0x59a523 XOR %ESI,%ESI |
0x59a525 MOV 0x10(%RBX,%RDX,1),%R10 |
0x59a52a LEA 0x3(%RAX),%R11 |
0x59a52e CMP (%R9),%R8 |
0x59a531 SETG %SIL |
0x59a535 ADD %RDI,%RSI |
0x59a538 XOR %EDI,%EDI |
0x59a53a CMP %R8,%R10 |
0x59a53d SETG %DIL |
0x59a541 ADD %RSI,%RDI |
0x59a544 CMP %R11,%RCX |
0x59a547 JG 59a790 |
0x59a54d ADD $0x2,%RAX |
0x59a551 LEA 0x1(%RAX),%RSI |
0x59a555 MOV (%RBX,%RSI,8),%R9 |
0x59a559 XOR %R8D,%R8D |
0x59a55c LEA (,%RSI,8),%RDX |
0x59a564 CMP -0x8(%RBX,%RSI,8),%R9 |
0x59a569 SETG %R8B |
0x59a56d ADD %R8,%RDI |
0x59a570 CMP %R12,%RSI |
0x59a573 JGE 59a5a1 |
0x59a575 CMP %R9,0x8(%RBX,%RDX,1) |
0x59a57a SETG %R10B |
0x59a57e ADD $0x2,%RAX |
0x59a582 MOVZX %R10B,%R11D |
0x59a586 ADD %R11,%RDI |
0x59a589 CMP %R12,%RAX |
0x59a58c JGE 59a5a1 |
0x59a58e MOV 0x8(%RBX,%RDX,1),%RAX |
0x59a593 CMP %RAX,0x10(%RBX,%RDX,1) |
0x59a598 SETG %CL |
0x59a59b MOVZX %CL,%ESI |
0x59a59e ADD %RSI,%RDI |
0x59a5a1 MOV %RDI,0x40(%R13) |
0x59a5a5 TEST %RDI,%RDI |
0x59a5a8 JE 59a778 |
0x59a5ae CMP %RDI,%R12 |
0x59a5b1 JE 59a778 |
0x59a5b7 MOV $0x8,%ESI |
0x59a5bc VZEROUPPER |
0x59a5bf CALL 5b1df0 <hypre_CAlloc> |
0x59a5c4 MOV %R12,%RDI |
0x59a5c7 XOR %R8D,%R8D |
0x59a5ca XOR %EDX,%EDX |
0x59a5cc AND $0x7,%EDI |
0x59a5cf JE 59a69f |
0x59a5d5 CMP $0x1,%RDI |
0x59a5d9 JE 59a681 |
0x59a5df CMP $0x2,%RDI |
0x59a5e3 JE 59a66c |
0x59a5e9 CMP $0x3,%RDI |
0x59a5ed JE 59a657 |
0x59a5ef CMP $0x4,%RDI |
0x59a5f3 JE 59a642 |
0x59a5f5 CMP $0x5,%RDI |
0x59a5f9 JE 59a62d |
0x59a5fb CMP $0x6,%RDI |
0x59a5ff JE 59a618 |
0x59a601 MOV (%RBX),%R9 |
0x59a604 CMP %R9,0x8(%RBX) |
0x59a608 JLE 59a613 |
0x59a60a MOV %RDX,(%RAX) |
0x59a60d MOV $0x1,%R8D |
0x59a613 MOV $0x1,%EDX |
0x59a618 MOV (%RBX,%RDX,8),%R10 |
0x59a61c CMP %R10,0x8(%RBX,%RDX,8) |
0x59a621 JLE 59a62a |
0x59a623 MOV %RDX,(%RAX,%R8,8) |
0x59a627 INC %R8 |
0x59a62a INC %RDX |
0x59a62d MOV (%RBX,%RDX,8),%R11 |
0x59a631 CMP %R11,0x8(%RBX,%RDX,8) |
0x59a636 JLE 59a63f |
0x59a638 MOV %RDX,(%RAX,%R8,8) |
0x59a63c INC %R8 |
0x59a63f INC %RDX |
0x59a642 MOV (%RBX,%RDX,8),%RCX |
0x59a646 CMP %RCX,0x8(%RBX,%RDX,8) |
0x59a64b JLE 59a654 |
0x59a64d MOV %RDX,(%RAX,%R8,8) |
0x59a651 INC %R8 |
0x59a654 INC %RDX |
0x59a657 MOV (%RBX,%RDX,8),%RSI |
0x59a65b CMP %RSI,0x8(%RBX,%RDX,8) |
0x59a660 JLE 59a669 |
0x59a662 MOV %RDX,(%RAX,%R8,8) |
0x59a666 INC %R8 |
0x59a669 INC %RDX |
0x59a66c MOV (%RBX,%RDX,8),%RDI |
0x59a670 CMP %RDI,0x8(%RBX,%RDX,8) |
0x59a675 JLE 59a67e |
0x59a677 MOV %RDX,(%RAX,%R8,8) |
0x59a67b INC %R8 |
0x59a67e INC %RDX |
0x59a681 MOV (%RBX,%RDX,8),%R9 |
0x59a685 CMP %R9,0x8(%RBX,%RDX,8) |
0x59a68a JLE 59a693 |
0x59a68c MOV %RDX,(%RAX,%R8,8) |
0x59a690 INC %R8 |
0x59a693 INC %RDX |
0x59a696 CMP %RDX,%R12 |
0x59a699 JE 59a77d |
0x59a69f MOV (%RBX,%RDX,8),%R10 |
0x59a6a3 CMP %R10,0x8(%RBX,%RDX,8) |
0x59a6a8 JLE 59a6b7 |
0x59a6aa NOPW (%RAX,%RAX,1) |
(3030) 0x59a6b0 MOV %RDX,(%RAX,%R8,8) |
(3030) 0x59a6b4 INC %R8 |
(3030) 0x59a6b7 MOV 0x8(%RBX,%RDX,8),%RCX |
(3030) 0x59a6bc LEA 0x1(%RDX),%R11 |
(3030) 0x59a6c0 CMP %RCX,0x10(%RBX,%RDX,8) |
(3030) 0x59a6c5 JLE 59a6ce |
(3030) 0x59a6c7 MOV %R11,(%RAX,%R8,8) |
(3030) 0x59a6cb INC %R8 |
(3030) 0x59a6ce MOV 0x8(%RBX,%R11,8),%RSI |
(3030) 0x59a6d3 LEA 0x1(%R11),%RDX |
(3030) 0x59a6d7 CMP %RSI,0x10(%RBX,%R11,8) |
(3030) 0x59a6dc JLE 59a6e5 |
(3030) 0x59a6de MOV %RDX,(%RAX,%R8,8) |
(3030) 0x59a6e2 INC %R8 |
(3030) 0x59a6e5 MOV 0x10(%RBX,%R11,8),%RDI |
(3030) 0x59a6ea LEA 0x2(%R11),%R9 |
(3030) 0x59a6ee CMP %RDI,0x18(%RBX,%R11,8) |
(3030) 0x59a6f3 JLE 59a6fc |
(3030) 0x59a6f5 MOV %R9,(%RAX,%R8,8) |
(3030) 0x59a6f9 INC %R8 |
(3030) 0x59a6fc MOV 0x18(%RBX,%R11,8),%RCX |
(3030) 0x59a701 LEA 0x3(%R11),%R10 |
(3030) 0x59a705 CMP %RCX,0x20(%RBX,%R11,8) |
(3030) 0x59a70a JLE 59a713 |
(3030) 0x59a70c MOV %R10,(%RAX,%R8,8) |
(3030) 0x59a710 INC %R8 |
(3030) 0x59a713 MOV 0x20(%RBX,%R11,8),%RSI |
(3030) 0x59a718 LEA 0x4(%R11),%RDX |
(3030) 0x59a71c CMP %RSI,0x28(%RBX,%R11,8) |
(3030) 0x59a721 JLE 59a72a |
(3030) 0x59a723 MOV %RDX,(%RAX,%R8,8) |
(3030) 0x59a727 INC %R8 |
(3030) 0x59a72a MOV 0x28(%RBX,%R11,8),%RDI |
(3030) 0x59a72f LEA 0x5(%R11),%R9 |
(3030) 0x59a733 CMP %RDI,0x30(%RBX,%R11,8) |
(3030) 0x59a738 JLE 59a741 |
(3030) 0x59a73a MOV %R9,(%RAX,%R8,8) |
(3030) 0x59a73e INC %R8 |
(3030) 0x59a741 MOV 0x30(%RBX,%R11,8),%RCX |
(3030) 0x59a746 LEA 0x6(%R11),%R10 |
(3030) 0x59a74a CMP %RCX,0x38(%RBX,%R11,8) |
(3030) 0x59a74f JLE 59a758 |
(3030) 0x59a751 MOV %R10,(%RAX,%R8,8) |
(3030) 0x59a755 INC %R8 |
(3030) 0x59a758 LEA 0x7(%R11),%RDX |
(3030) 0x59a75c CMP %RDX,%R12 |
(3030) 0x59a75f JE 59a77d |
(3030) 0x59a761 MOV (%RBX,%RDX,8),%R10 |
(3030) 0x59a765 CMP %R10,0x8(%RBX,%RDX,8) |
(3030) 0x59a76a JG 59a6b0 |
(3030) 0x59a770 JMP 59a6b7 |
0x59a775 NOPL (%RAX) |
0x59a778 VZEROUPPER |
0x59a77b XOR %EAX,%EAX |
0x59a77d MOV %RAX,0x38(%R13) |
0x59a781 LEA -0x18(%RBP),%RSP |
0x59a785 XOR %EAX,%EAX |
0x59a787 POP %RBX |
0x59a788 POP %R12 |
0x59a78a POP %R13 |
0x59a78c POP %RBP |
0x59a78d RET |
0x59a78e XCHG %AX,%AX |
0x59a790 MOV 0x18(%RBX,%RDX,1),%R8 |
0x59a795 XOR %ESI,%ESI |
0x59a797 MOV 0x20(%RBX,%RDX,1),%R9 |
0x59a79c CMP %R10,%R8 |
0x59a79f LEA 0x5(%RAX),%R10 |
0x59a7a3 SETG %SIL |
0x59a7a7 ADD %RDI,%RSI |
0x59a7aa XOR %EDI,%EDI |
0x59a7ac CMP %R8,%R9 |
0x59a7af SETG %DIL |
0x59a7b3 ADD %RSI,%RDI |
0x59a7b6 CMP %R10,%RCX |
0x59a7b9 JLE 59a7f0 |
0x59a7bb MOV 0x28(%RBX,%RDX,1),%R11 |
0x59a7c0 XOR %ECX,%ECX |
0x59a7c2 LEA 0x7(%RAX),%RSI |
0x59a7c6 CMP %R9,%R11 |
0x59a7c9 SETG %CL |
0x59a7cc ADD %RDI,%RCX |
0x59a7cf XOR %EDI,%EDI |
0x59a7d1 CMP %R11,0x30(%RBX,%RDX,1) |
0x59a7d6 SETG %DIL |
0x59a7da ADD $0x6,%RAX |
0x59a7de ADD %RCX,%RDI |
0x59a7e1 JMP 59a555 |
0x59a7e6 NOPW %CS:(%RAX,%RAX,1) |
0x59a7f0 ADD $0x4,%RAX |
0x59a7f4 LEA 0x1(%RAX),%RSI |
0x59a7f8 JMP 59a555 |
0x59a7fd NOPL (%RAX) |
0x59a800 VMOVDQU64 0x8(%RBX),%ZMM5 |
0x59a80a LEA 0x40(%RBX),%R8 |
0x59a80e VPSUBQ (%RBX),%ZMM5,%ZMM3 |
0x59a814 VPCMPNLEQ %ZMM0,%ZMM3,%K1 |
0x59a81b VMOVDQA64 %ZMM1,%ZMM4{%K1}{z} |
0x59a821 VPSUBQ %ZMM4,%ZMM0,%ZMM0 |
0x59a827 JMP 59a24a |
0x59a82c NOPL (%RAX) |
0x59a830 MOVQ $0,0x40(%RDI) |
0x59a838 JMP 59a77b |
0x59a83d VPXOR %XMM0,%XMM0,%XMM0 |
0x59a841 XOR %EDX,%EDX |
0x59a843 XOR %EDI,%EDI |
0x59a845 XOR %EAX,%EAX |
0x59a847 JMP 59a4ab |
0x59a84c NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | hypre_IJMatrixAssembleParCSR | IJMatrix_parcsr.c:2826 | exec |
○ | BuildIJLaplacian27pt | amg.c:2272 | exec |
○ | main | amg.c:274 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | csr_matrix.c:136-169 |
Module | exec |
nb instructions | 282 |
nb uops | 282 |
loop length | 1195 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 7 |
used zmm registers | 16 |
nb stack references | 1 |
micro-operation queue | 73.50 cycles |
front end | 73.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 46.75 | 46.67 | 25.00 | 25.00 | 15.00 | 46.83 | 46.75 | 16.00 |
cycles | 46.75 | 46.67 | 25.00 | 25.00 | 15.00 | 46.83 | 46.75 | 16.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 71.96 |
Stall cycles | 0.00 |
Front-end | 73.50 |
Dispatch | 46.83 |
Overall L1 | 73.50 |
all | 45% |
load | 45% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 95% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 35% |
all | 42% |
load | 49% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 75% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RDI),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
TEST %R12,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 59a830 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x1(%R12),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x6,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 59a83d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPTERNLOGD $-0x1,%ZMM1,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
VMOVDQA64 %ZMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RDI,%RBX,1),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x40,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a343 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a312 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a2ea | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a2c2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a29a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a272 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 59a800 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVDQU64 0x8(%R8),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM6,%ZMM7 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM7,%K2 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM8{%K2}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM8,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQU64 0x8(%R8),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM9,%ZMM10 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM10,%K3 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM11{%K3}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM11,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQU64 0x8(%R8),%ZMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM12,%ZMM13 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM13,%K4 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM14{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM14,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQU64 0x8(%R8),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM5,%K5 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM3{%K5}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM3,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQU64 0x8(%R8),%ZMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM4,%ZMM6 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM6,%K6 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM7{%K6}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM7,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQU64 0x8(%R8),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM8,%ZMM9 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM9,%K7 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM10{%K7}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM10,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a473 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VEXTRACTI64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPADDQ %YMM1,%YMM0,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
AND $-0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VEXTRACTI64X2 $0x1,%YMM0,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPADDQ %XMM15,%XMM0,%XMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPSRLDQ $0x8,%XMM5,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ %XMM3,%XMM5,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM4,%RDI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP %RAX,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a5a1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R12,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %RDX,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%R9),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x2,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 59a504 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVDQU 0x8(%RBX,%RDX,8),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R9,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ (%RBX,%RDX,8),%YMM6,%YMM7 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
ADD %R11,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x3,%R9D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPCMPGTQ %YMM8,%YMM7,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPSUBQ %YMM9,%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VEXTRACTI64X2 $0x1,%YMM10,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDQ %XMM0,%XMM10,%XMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPSRLDQ $0x8,%XMM12,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ %XMM13,%XMM12,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM2,%RDI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
JE 59a5a1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RAX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RBX,%RDX,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RCX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 59a555 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %RAX,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 59a555 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x8(%RBX,%RDX,1),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x10(%RBX,%RDX,1),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x3(%RAX),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP (%R9),%R8 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SETG %SIL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %RDI,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %R8,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SETG %DIL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R11,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JG 59a790 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x2,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX,%RSI,8),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (,%RSI,8),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP -0x8(%RBX,%RSI,8),%R9 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SETG %R8B | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %R8,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R12,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 59a5a1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %R9,0x8(%RBX,%RDX,1) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SETG %R10B | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD $0x2,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVZX %R10B,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R11,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R12,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 59a5a1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x8(%RBX,%RDX,1),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RAX,0x10(%RBX,%RDX,1) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SETG %CL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOVZX %CL,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDI,0x40(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a778 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %RDI,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a778 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 5b1df0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $0x7,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a69f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a681 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a66c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a657 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a642 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a62d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a618 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV (%RBX),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R9,0x8(%RBX) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a613 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV $0x1,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R10,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a62a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R11,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a63f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RCX,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a654 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RSI,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a669 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RDI,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a67e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R9,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a693 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RDX,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a77d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV (%RBX,%RDX,8),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R10,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a6b7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,0x38(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA -0x18(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x18(%RBX,%RDX,1),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x20(%RBX,%RDX,1),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R10,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x5(%RAX),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SETG %SIL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %RDI,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %R8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SETG %DIL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R10,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 59a7f0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x28(%RBX,%RDX,1),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x7(%RAX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SETG %CL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %RDI,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %R11,0x30(%RBX,%RDX,1) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SETG %DIL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD $0x6,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RCX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 59a555 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x4,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 59a555 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVDQU64 0x8(%RBX),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
LEA 0x40(%RBX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPSUBQ (%RBX),%ZMM5,%ZMM3 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM0,%ZMM3,%K1 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM4{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM4,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 59a24a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVQ $0,0x40(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
JMP 59a77b | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 59a4ab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | csr_matrix.c:136-169 |
Module | exec |
nb instructions | 282 |
nb uops | 282 |
loop length | 1195 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 7 |
used zmm registers | 16 |
nb stack references | 1 |
micro-operation queue | 73.50 cycles |
front end | 73.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 46.75 | 46.67 | 25.00 | 25.00 | 15.00 | 46.83 | 46.75 | 16.00 |
cycles | 46.75 | 46.67 | 25.00 | 25.00 | 15.00 | 46.83 | 46.75 | 16.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 71.96 |
Stall cycles | 0.00 |
Front-end | 73.50 |
Dispatch | 46.83 |
Overall L1 | 73.50 |
all | 45% |
load | 45% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 95% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 35% |
all | 42% |
load | 49% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 75% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RDI),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
TEST %R12,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 59a830 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x1(%R12),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x6,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 59a83d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPTERNLOGD $-0x1,%ZMM1,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
VMOVDQA64 %ZMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RDI,%RBX,1),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x40,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a343 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a312 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a2ea | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a2c2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a29a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a272 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 59a800 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVDQU64 0x8(%R8),%ZMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM6,%ZMM7 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM7,%K2 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM8{%K2}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM8,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQU64 0x8(%R8),%ZMM9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM9,%ZMM10 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM10,%K3 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM11{%K3}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM11,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQU64 0x8(%R8),%ZMM12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM12,%ZMM13 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM13,%K4 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM14{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM14,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQU64 0x8(%R8),%ZMM15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM15,%ZMM5 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM5,%K5 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM3{%K5}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM3,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQU64 0x8(%R8),%ZMM4 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM4,%ZMM6 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM6,%K6 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM7{%K6}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM7,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVDQU64 0x8(%R8),%ZMM8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
ADD $0x40,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ -0x40(%R8),%ZMM8,%ZMM9 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM2,%ZMM9,%K7 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM10{%K7}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM10,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a473 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VEXTRACTI64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPADDQ %YMM1,%YMM0,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
AND $-0x8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VEXTRACTI64X2 $0x1,%YMM0,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPADDQ %XMM15,%XMM0,%XMM5 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPSRLDQ $0x8,%XMM5,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ %XMM3,%XMM5,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM4,%RDI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP %RAX,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a5a1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R12,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %RDX,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%R9),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x2,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 59a504 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVDQU 0x8(%RBX,%RDX,8),%YMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R9,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPSUBQ (%RBX,%RDX,8),%YMM6,%YMM7 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
ADD %R11,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x3,%R9D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPCMPGTQ %YMM8,%YMM7,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPSUBQ %YMM9,%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VEXTRACTI64X2 $0x1,%YMM10,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDQ %XMM0,%XMM10,%XMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPSRLDQ $0x8,%XMM12,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ %XMM13,%XMM12,%XMM2 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM2,%RDI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
JE 59a5a1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RAX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%RBX,%RDX,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RCX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 59a555 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %RAX,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 59a555 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x8(%RBX,%RDX,1),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x10(%RBX,%RDX,1),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA 0x3(%RAX),%R11 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP (%R9),%R8 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SETG %SIL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %RDI,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %R8,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SETG %DIL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R11,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JG 59a790 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x2,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX,%RSI,8),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (,%RSI,8),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP -0x8(%RBX,%RSI,8),%R9 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SETG %R8B | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %R8,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R12,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 59a5a1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %R9,0x8(%RBX,%RDX,1) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SETG %R10B | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD $0x2,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOVZX %R10B,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R11,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R12,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 59a5a1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x8(%RBX,%RDX,1),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RAX,0x10(%RBX,%RDX,1) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SETG %CL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOVZX %CL,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDI,0x40(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a778 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %RDI,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a778 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 5b1df0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $0x7,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a69f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a681 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a66c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a657 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a642 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a62d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a618 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV (%RBX),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R9,0x8(%RBX) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a613 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV $0x1,%R8D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R10,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a62a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R11,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a63f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RCX,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a654 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RSI,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a669 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RDI,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a67e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV (%RBX,%RDX,8),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R9,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a693 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RDX,(%RAX,%R8,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
INC %R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RDX,%R12 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59a77d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV (%RBX,%RDX,8),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R10,0x8(%RBX,%RDX,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 59a6b7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,0x38(%R13) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA -0x18(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x18(%RBX,%RDX,1),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x20(%RBX,%RDX,1),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R10,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x5(%RAX),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SETG %SIL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %RDI,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %R8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SETG %DIL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R10,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 59a7f0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x28(%RBX,%RDX,1),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x7(%RAX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9,%R11 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SETG %CL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD %RDI,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %R11,0x30(%RBX,%RDX,1) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
SETG %DIL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD $0x6,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RCX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 59a555 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0x4,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1(%RAX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 59a555 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVDQU64 0x8(%RBX),%ZMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
LEA 0x40(%RBX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPSUBQ (%RBX),%ZMM5,%ZMM3 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPNLEQ %ZMM0,%ZMM3,%K1 | |||||||||||
VMOVDQA64 %ZMM1,%ZMM4{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPSUBQ %ZMM4,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 59a24a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOVQ $0,0x40(%RDI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
JMP 59a77b | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 59a4ab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixSetRownnz– | 0.01 | 0.01 |
○Loop 3031 - csr_matrix.c:145-148 - exec | 0.01 | 0.01 |
○Loop 3030 - csr_matrix.c:161-164 - exec | 0 | 0 |