Function: hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3 | Module: libseq_mv.so | Source: csr_matvec.c:178-204 [...] | Coverage: 0.05% |
---|
Function: hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3 | Module: libseq_mv.so | Source: csr_matvec.c:178-204 [...] | Coverage: 0.05% |
---|
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 178 - 204 |
-------------------------------------------------------------------------------- |
178: #pragma omp parallel for private(i,j,jj,m,tempx) HYPRE_SMP_SCHEDULE |
179: #endif |
180: |
181: for (i = 0; i < num_rownnz; i++) |
182: { |
183: m = A_rownnz[i]; |
[...] |
191: if ( num_vectors==1 ) |
192: { |
193: tempx = 0; |
194: for (jj = A_i[m]; jj < A_i[m+1]; jj++) |
195: tempx += A_data[jj] * x_data[A_j[jj]]; |
196: y_data[m] += tempx; |
197: } |
198: else |
199: for ( j=0; j<num_vectors; ++j ) |
200: { |
201: tempx = 0; |
202: for (jj = A_i[m]; jj < A_i[m+1]; jj++) |
203: tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; |
204: y_data[ j*vecstride_y + m*idxstride_y] += tempx; |
0x3520 PUSH %RBP |
0x3521 MOV %RSP,%RBP |
0x3524 PUSH %R15 |
0x3526 PUSH %R14 |
0x3528 PUSH %R13 |
0x352a MOV %RDI,%R13 |
0x352d PUSH %R12 |
0x352f PUSH %RBX |
0x3530 AND $-0x20,%RSP |
0x3534 SUB $0xa0,%RSP |
0x353b CALL 2050 <omp_get_num_threads@plt> |
0x3540 MOV %EAX,%R12D |
0x3543 CALL 2100 <omp_get_thread_num@plt> |
0x3548 MOVSXD %R12D,%RDI |
0x354b MOVSXD %EAX,%RSI |
0x354e MOV 0x20(%R13),%RAX |
0x3552 CQTO |
0x3554 IDIV %RDI |
0x3557 CMP %RDX,%RSI |
0x355a JL 39f0 |
0x3560 IMUL %RAX,%RSI |
0x3564 LEA (%RSI,%RDX,1),%RCX |
0x3568 LEA (%RAX,%RCX,1),%R8 |
0x356c CMP %R8,%RCX |
0x356f JGE 39c6 |
0x3575 MOV 0x18(%R13),%R12 |
0x3579 MOV 0x58(%R13),%RBX |
0x357d MOV 0x30(%R13),%R15 |
0x3581 MOV 0x40(%R13),%R10 |
0x3585 MOV 0x38(%R13),%R11 |
0x3589 MOV 0x8(%R13),%RSI |
0x358d LEA (%R12,%RCX,8),%RDI |
0x3591 MOV %RBX,0x90(%RSP) |
0x3599 MOV 0x50(%R13),%R14 |
0x359d MOV 0x48(%R13),%R9 |
0x35a1 MOV %R15,0x50(%RSP) |
0x35a6 MOV %RDI,%RAX |
0x35a9 MOV 0x28(%R13),%R15 |
0x35ad MOV 0x10(%R13),%RBX |
0x35b1 MOV %R10,0x20(%RSP) |
0x35b6 MOV %R11,0x98(%RSP) |
0x35be MOV (%R13),%R13 |
0x35c2 MOV %RSI,0x40(%RSP) |
0x35c7 CMP $0x1,%R14 |
0x35cb JNE 39fa |
0x35d1 LEA (,%R9,8),%R9 |
0x35d9 LEA (%R12,%R8,8),%R14 |
0x35dd VPCMPEQD %XMM8,%XMM8,%XMM8 |
0x35e2 MOV (%RDI),%R12 |
0x35e5 CMPQ $0x1,0x98(%RSP) |
0x35ee MOV %R9,0x80(%RSP) |
0x35f6 VPCMPEQD %YMM9,%YMM9,%YMM9 |
0x35fb JE 40f2 |
0x3601 CMPQ $0,0x98(%RSP) |
0x360a JLE 395c |
0x3610 MOV %R14,0x38(%RSP) |
(20) 0x3615 MOV 0x40(%RSP),%RSI |
(20) 0x361a MOV 0x20(%RSP),%R8 |
(20) 0x361f MOV %RAX,0x48(%RSP) |
(20) 0x3624 MOV 0x50(%RSP),%R10 |
(20) 0x3629 MOV (%RSI,%R12,8),%R11 |
(20) 0x362d MOV 0x8(%RSI,%R12,8),%RDI |
(20) 0x3632 IMUL %R8,%R12 |
(20) 0x3636 MOV %RDI,0x88(%RSP) |
(20) 0x363e SUB %R11,%RDI |
(20) 0x3641 LEA -0x1(%RDI),%RCX |
(20) 0x3645 MOV %RDI,%R14 |
(20) 0x3648 MOV %RDI,%RDX |
(20) 0x364b MOV %RDI,0x68(%RSP) |
(20) 0x3650 AND $0x3,%EDI |
(20) 0x3653 AND $-0x4,%RDX |
(20) 0x3657 MOV %RCX,0x78(%RSP) |
(20) 0x365c SHR $0x2,%R14 |
(20) 0x3660 MOV %RDI,0x70(%RSP) |
(20) 0x3665 LEA (,%R11,8),%RCX |
(20) 0x366d LEA (%R10,%R12,8),%R9 |
(20) 0x3671 SAL $0x5,%R14 |
(20) 0x3675 MOV %RDX,0x60(%RSP) |
(20) 0x367a ADD %R11,%RDX |
(20) 0x367d LEA (%R13,%RCX,1),%RSI |
(20) 0x3682 XOR %R10D,%R10D |
(20) 0x3685 MOV %RDX,0x58(%RSP) |
(20) 0x368a ADD %RBX,%RCX |
(20) 0x368d XOR %EDX,%EDX |
(20) 0x368f NOP |
(19) 0x3690 CMP %R11,0x88(%RSP) |
(19) 0x3698 JLE 39d5 |
(19) 0x369e CMPQ $0x2,0x78(%RSP) |
(19) 0x36a4 JBE 39de |
(19) 0x36aa LEA -0x20(%R14),%R8 |
(19) 0x36ae LEA (%R15,%RDX,8),%R12 |
(19) 0x36b2 VXORPD %XMM1,%XMM1,%XMM1 |
(19) 0x36b6 XOR %EAX,%EAX |
(19) 0x36b8 SHR $0x5,%R8 |
(19) 0x36bc INC %R8 |
(19) 0x36bf AND $0x7,%R8D |
(19) 0x36c3 JE 37b1 |
(19) 0x36c9 CMP $0x1,%R8 |
(19) 0x36cd JE 378e |
(19) 0x36d3 CMP $0x2,%R8 |
(19) 0x36d7 JE 3774 |
(19) 0x36dd CMP $0x3,%R8 |
(19) 0x36e1 JE 375b |
(19) 0x36e3 CMP $0x4,%R8 |
(19) 0x36e7 JE 3742 |
(19) 0x36e9 CMP $0x5,%R8 |
(19) 0x36ed JE 3728 |
(19) 0x36ef CMP $0x6,%R8 |
(19) 0x36f3 JE 370e |
(19) 0x36f5 VMOVDQU (%RCX),%YMM11 |
(19) 0x36f9 VMOVAPD %YMM9,%YMM10 |
(19) 0x36fe MOV $0x20,%EAX |
(19) 0x3703 VGATHERQPD %YMM10,(%R12,%YMM11,8),%YMM12 |
(19) 0x3709 VFMADD231PD (%RSI),%YMM12,%YMM1 |
(19) 0x370e VMOVDQU (%RCX,%RAX,1),%YMM14 |
(19) 0x3713 VMOVAPD %YMM9,%YMM13 |
(19) 0x3718 VGATHERQPD %YMM13,(%R12,%YMM14,8),%YMM0 |
(19) 0x371e VFMADD231PD (%RSI,%RAX,1),%YMM0,%YMM1 |
(19) 0x3724 ADD $0x20,%RAX |
(19) 0x3728 VMOVDQU (%RCX,%RAX,1),%YMM3 |
(19) 0x372d VMOVAPD %YMM9,%YMM15 |
(19) 0x3732 VGATHERQPD %YMM15,(%R12,%YMM3,8),%YMM5 |
(19) 0x3738 VFMADD231PD (%RSI,%RAX,1),%YMM5,%YMM1 |
(19) 0x373e ADD $0x20,%RAX |
(19) 0x3742 VMOVDQU (%RCX,%RAX,1),%YMM7 |
(19) 0x3747 VMOVAPD %YMM9,%YMM4 |
(19) 0x374b VGATHERQPD %YMM4,(%R12,%YMM7,8),%YMM6 |
(19) 0x3751 VFMADD231PD (%RSI,%RAX,1),%YMM6,%YMM1 |
(19) 0x3757 ADD $0x20,%RAX |
(19) 0x375b VMOVDQU (%RCX,%RAX,1),%YMM10 |
(19) 0x3760 VMOVAPD %YMM9,%YMM2 |
(19) 0x3764 VGATHERQPD %YMM2,(%R12,%YMM10,8),%YMM11 |
(19) 0x376a VFMADD231PD (%RSI,%RAX,1),%YMM11,%YMM1 |
(19) 0x3770 ADD $0x20,%RAX |
(19) 0x3774 VMOVDQU (%RCX,%RAX,1),%YMM13 |
(19) 0x3779 VMOVAPD %YMM9,%YMM12 |
(19) 0x377e VGATHERQPD %YMM12,(%R12,%YMM13,8),%YMM14 |
(19) 0x3784 VFMADD231PD (%RSI,%RAX,1),%YMM14,%YMM1 |
(19) 0x378a ADD $0x20,%RAX |
(19) 0x378e VMOVDQU (%RCX,%RAX,1),%YMM3 |
(19) 0x3793 VMOVAPD %YMM9,%YMM15 |
(19) 0x3798 VGATHERQPD %YMM15,(%R12,%YMM3,8),%YMM0 |
(19) 0x379e VFMADD231PD (%RSI,%RAX,1),%YMM0,%YMM1 |
(19) 0x37a4 ADD $0x20,%RAX |
(19) 0x37a8 CMP %R14,%RAX |
(19) 0x37ab JE 3891 |
(21) 0x37b1 VMOVDQU (%RCX,%RAX,1),%YMM4 |
(21) 0x37b6 VMOVDQU 0x20(%RCX,%RAX,1),%YMM2 |
(21) 0x37bc VMOVAPD %YMM9,%YMM5 |
(21) 0x37c0 VMOVAPD %YMM9,%YMM6 |
(21) 0x37c4 VMOVDQU 0x40(%RCX,%RAX,1),%YMM12 |
(21) 0x37ca VMOVAPD %YMM9,%YMM11 |
(21) 0x37cf VMOVAPD %YMM9,%YMM14 |
(21) 0x37d4 VMOVAPD %YMM9,%YMM3 |
(21) 0x37d8 VGATHERQPD %YMM5,(%R12,%YMM4,8),%YMM7 |
(21) 0x37de VFMADD231PD (%RSI,%RAX,1),%YMM7,%YMM1 |
(21) 0x37e4 VMOVDQU 0x60(%RCX,%RAX,1),%YMM15 |
(21) 0x37ea VGATHERQPD %YMM6,(%R12,%YMM2,8),%YMM10 |
(21) 0x37f0 VFMADD231PD 0x20(%RSI,%RAX,1),%YMM10,%YMM1 |
(21) 0x37f7 VMOVAPD %YMM9,%YMM4 |
(21) 0x37fb VGATHERQPD %YMM11,(%R12,%YMM12,8),%YMM13 |
(21) 0x3801 VMOVDQU 0x80(%RCX,%RAX,1),%YMM5 |
(21) 0x380a VFMADD132PD 0x40(%RSI,%RAX,1),%YMM1,%YMM13 |
(21) 0x3811 VMOVAPD %YMM9,%YMM6 |
(21) 0x3815 VMOVAPD %YMM9,%YMM12 |
(21) 0x381a VMOVDQU 0xa0(%RCX,%RAX,1),%YMM7 |
(21) 0x3823 VGATHERQPD %YMM14,(%R12,%YMM15,8),%YMM1 |
(21) 0x3829 VFMADD132PD 0x60(%RSI,%RAX,1),%YMM13,%YMM1 |
(21) 0x3830 VMOVDQU 0xc0(%RCX,%RAX,1),%YMM10 |
(21) 0x3839 VGATHERQPD %YMM3,(%R12,%YMM5,8),%YMM0 |
(21) 0x383f VFMADD132PD 0x80(%RSI,%RAX,1),%YMM1,%YMM0 |
(21) 0x3849 VMOVDQU 0xe0(%RCX,%RAX,1),%YMM13 |
(21) 0x3852 VGATHERQPD %YMM4,(%R12,%YMM7,8),%YMM2 |
(21) 0x3858 VFMADD132PD 0xa0(%RSI,%RAX,1),%YMM0,%YMM2 |
(21) 0x3862 VGATHERQPD %YMM6,(%R12,%YMM10,8),%YMM11 |
(21) 0x3868 VFMADD132PD 0xc0(%RSI,%RAX,1),%YMM2,%YMM11 |
(21) 0x3872 VGATHERQPD %YMM12,(%R12,%YMM13,8),%YMM1 |
(21) 0x3878 VFMADD132PD 0xe0(%RSI,%RAX,1),%YMM11,%YMM1 |
(21) 0x3882 ADD $0x100,%RAX |
(21) 0x3888 CMP %R14,%RAX |
(21) 0x388b JNE 37b1 |
(19) 0x3891 VEXTRACTF128 $0x1,%YMM1,%XMM14 |
(19) 0x3897 CMPQ $0,0x70(%RSP) |
(19) 0x389d VADDPD %XMM1,%XMM14,%XMM15 |
(19) 0x38a1 VADDPD %XMM14,%XMM1,%XMM2 |
(19) 0x38a6 VUNPCKHPD %XMM15,%XMM15,%XMM3 |
(19) 0x38ab VADDPD %XMM15,%XMM3,%XMM6 |
(19) 0x38b0 JE 3910 |
(19) 0x38b2 MOV 0x60(%RSP),%RDI |
(19) 0x38b7 MOV 0x58(%RSP),%RAX |
(19) 0x38bc MOV 0x68(%RSP),%R8 |
(19) 0x38c1 SUB %RDI,%R8 |
(19) 0x38c4 CMP $0x1,%R8 |
(19) 0x38c8 JE 38fc |
(19) 0x38ca ADD %R11,%RDI |
(19) 0x38cd LEA (%R15,%RDX,8),%R12 |
(19) 0x38d1 VMOVAPD %XMM8,%XMM5 |
(19) 0x38d5 VMOVDQU (%RBX,%RDI,8),%XMM0 |
(19) 0x38da VGATHERQPD %XMM5,(%R12,%XMM0,8),%XMM4 |
(19) 0x38e0 VFMADD132PD (%R13,%RDI,8),%XMM2,%XMM4 |
(19) 0x38e7 VUNPCKHPD %XMM4,%XMM4,%XMM7 |
(19) 0x38eb VADDPD %XMM4,%XMM7,%XMM6 |
(19) 0x38ef TEST $0x1,%R8B |
(19) 0x38f3 JE 3910 |
(19) 0x38f5 AND $-0x2,%R8 |
(19) 0x38f9 ADD %R8,%RAX |
(19) 0x38fc MOV (%RBX,%RAX,8),%RDI |
(19) 0x3900 VMOVSD (%R13,%RAX,8),%XMM2 |
(19) 0x3907 ADD %RDX,%RDI |
(19) 0x390a VFMADD231SD (%R15,%RDI,8),%XMM2,%XMM6 |
(19) 0x3910 VADDSD (%R9),%XMM6,%XMM10 |
(19) 0x3915 MOV 0x80(%RSP),%RAX |
(19) 0x391d INC %R10 |
(19) 0x3920 MOV 0x90(%RSP),%R8 |
(19) 0x3928 VMOVSD %XMM10,(%R9) |
(19) 0x392d ADD %R8,%RDX |
(19) 0x3930 ADD %RAX,%R9 |
(19) 0x3933 CMP %R10,0x98(%RSP) |
(19) 0x393b JNE 3690 |
(20) 0x3941 MOV 0x48(%RSP),%RAX |
(20) 0x3946 MOV 0x38(%RSP),%R11 |
(20) 0x394b ADD $0x8,%RAX |
(20) 0x394f CMP %R11,%RAX |
(20) 0x3952 JE 39c3 |
(20) 0x3954 MOV (%RAX),%R12 |
(20) 0x3957 JMP 3615 |
0x395c MOV %R14,%R15 |
0x395f SUB %RDI,%R15 |
0x3962 SUB $0x8,%R15 |
0x3966 SHR $0x3,%R15 |
0x396a INC %R15 |
0x396d AND $0x7,%R15D |
0x3971 JE 39ba |
0x3973 CMP $0x1,%R15 |
0x3977 JE 39b1 |
0x3979 CMP $0x2,%R15 |
0x397d JE 39ad |
0x397f CMP $0x3,%R15 |
0x3983 JE 39a9 |
0x3985 CMP $0x4,%R15 |
0x3989 JE 39a5 |
0x398b CMP $0x5,%R15 |
0x398f JE 39a1 |
0x3991 ADD $0x8,%RDI |
0x3995 CMP $0x6,%R15 |
0x3999 CMOVNE %RDI,%RAX |
0x399d ADD $0x8,%RAX |
0x39a1 ADD $0x8,%RAX |
0x39a5 ADD $0x8,%RAX |
0x39a9 ADD $0x8,%RAX |
0x39ad ADD $0x8,%RAX |
0x39b1 ADD $0x8,%RAX |
0x39b5 CMP %R14,%RAX |
0x39b8 JE 39c3 |
(18) 0x39ba ADD $0x40,%RAX |
(18) 0x39be CMP %R14,%RAX |
(18) 0x39c1 JNE 39ba |
0x39c3 VZEROUPPER |
0x39c6 LEA -0x28(%RBP),%RSP |
0x39ca POP %RBX |
0x39cb POP %R12 |
0x39cd POP %R13 |
0x39cf POP %R14 |
0x39d1 POP %R15 |
0x39d3 POP %RBP |
0x39d4 RET |
(19) 0x39d5 VXORPD %XMM6,%XMM6,%XMM6 |
(19) 0x39d9 JMP 3910 |
(19) 0x39de MOV %R11,%RAX |
(19) 0x39e1 VXORPD %XMM2,%XMM2,%XMM2 |
(19) 0x39e5 VXORPD %XMM6,%XMM6,%XMM6 |
(19) 0x39e9 XOR %EDI,%EDI |
(19) 0x39eb JMP 38bc |
0x39f0 INC %RAX |
0x39f3 XOR %EDX,%EDX |
0x39f5 JMP 3560 |
0x39fa LEA (%R12,%R8,8),%RDX |
0x39fe MOV %RDI,0x70(%RSP) |
0x3a03 MOV (%RDI),%R11 |
0x3a06 VPCMPEQD %XMM3,%XMM3,%XMM3 |
0x3a0a LEA (,%R9,8),%RCX |
0x3a12 MOV %RDX,0x18(%RSP) |
0x3a17 VPCMPEQD %YMM2,%YMM2,%YMM2 |
0x3a1b MOV %RDI,%R10 |
0x3a1e CMPQ $0x1,0x98(%RSP) |
0x3a27 MOV %RCX,0x88(%RSP) |
0x3a2f JE 3f58 |
0x3a35 CMPQ $0,0x98(%RSP) |
0x3a3e JLE 43e8 |
0x3a44 VMOVQ %R14,%XMM3 |
0x3a49 MOV %RBX,0x8(%RSP) |
0x3a4e VPCMPEQD %YMM2,%YMM2,%YMM2 |
0x3a52 MOV %R13,%RCX |
0x3a55 VPBROADCASTQ %XMM3,%YMM8 |
0x3a5a VPSRLQ $0x20,%YMM8,%YMM9 |
(14) 0x3a60 MOV 0x40(%RSP),%RDX |
(14) 0x3a65 MOV 0x20(%RSP),%RSI |
(14) 0x3a6a MOV %RCX,0x10(%RSP) |
(14) 0x3a6f MOV 0x50(%RSP),%R8 |
(14) 0x3a74 MOV 0x8(%RSP),%R10 |
(14) 0x3a79 MOV 0x8(%RDX,%R11,8),%RBX |
(14) 0x3a7e MOV (%RDX,%R11,8),%R12 |
(14) 0x3a82 IMUL %RSI,%R11 |
(14) 0x3a86 MOV %RBX,%RAX |
(14) 0x3a89 LEA (,%R12,8),%RSI |
(14) 0x3a91 SUB %R12,%RAX |
(14) 0x3a94 LEA (%R8,%R11,8),%R9 |
(14) 0x3a98 LEA (%RCX,%RSI,1),%RDI |
(14) 0x3a9c ADD %R10,%RSI |
(14) 0x3a9f MOV %RAX,%R11 |
(14) 0x3aa2 LEA -0x1(%RAX),%R13 |
(14) 0x3aa6 MOV %RAX,0x78(%RSP) |
(14) 0x3aab AND $-0x4,%RAX |
(14) 0x3aaf SHR $0x2,%R11 |
(14) 0x3ab3 ADD %R12,%RAX |
(14) 0x3ab6 MOV %R13,0x80(%RSP) |
(14) 0x3abe SAL $0x5,%R11 |
(14) 0x3ac2 CMP $0x2,%R13 |
(14) 0x3ac6 CMOVBE %R12,%RAX |
(14) 0x3aca LEA (,%RAX,8),%RDX |
(14) 0x3ad2 LEA 0x1(%RAX),%R13 |
(14) 0x3ad6 LEA (%RCX,%RDX,1),%R8 |
(14) 0x3ada ADD %R10,%RDX |
(14) 0x3add LEA 0x2(%RAX),%RAX |
(14) 0x3ae1 MOV %RDX,0x60(%RSP) |
(14) 0x3ae6 LEA (,%R13,8),%RDX |
(14) 0x3aee MOV %R8,0x68(%RSP) |
(14) 0x3af3 LEA (%RCX,%RDX,1),%R8 |
(14) 0x3af7 ADD %R10,%RDX |
(14) 0x3afa MOV %R8,0x38(%RSP) |
(14) 0x3aff LEA (,%RAX,8),%R8 |
(14) 0x3b07 MOV %RAX,0x58(%RSP) |
(14) 0x3b0c MOV 0x78(%RSP),%RAX |
(14) 0x3b11 LEA (%R10,%R8,1),%R10 |
(14) 0x3b15 MOV %RDX,0x48(%RSP) |
(14) 0x3b1a LEA (%RCX,%R8,1),%RDX |
(14) 0x3b1e XOR %R8D,%R8D |
(14) 0x3b21 AND $0x3,%EAX |
(14) 0x3b24 MOV %RDX,0x30(%RSP) |
(14) 0x3b29 MOV %RAX,0x78(%RSP) |
(14) 0x3b2e MOV %R10,0x28(%RSP) |
(14) 0x3b33 XOR %R10D,%R10D |
(14) 0x3b36 NOPW %CS:(%RAX,%RAX,1) |
(13) 0x3b40 CMP %RBX,%R12 |
(13) 0x3b43 JGE 40e0 |
(13) 0x3b49 CMPQ $0x2,0x80(%RSP) |
(13) 0x3b52 JBE 40e9 |
(13) 0x3b58 LEA -0x20(%R11),%RDX |
(13) 0x3b5c LEA (%R15,%R8,8),%RCX |
(13) 0x3b60 VXORPD %XMM0,%XMM0,%XMM0 |
(13) 0x3b64 XOR %EAX,%EAX |
(13) 0x3b66 SHR $0x5,%RDX |
(13) 0x3b6a INC %RDX |
(13) 0x3b6d AND $0x3,%EDX |
(13) 0x3b70 JE 3c3c |
(13) 0x3b76 CMP $0x1,%RDX |
(13) 0x3b7a JE 3bf8 |
(13) 0x3b7c CMP $0x2,%RDX |
(13) 0x3b80 JE 3bbd |
(13) 0x3b82 VMOVDQU (%RSI),%YMM5 |
(13) 0x3b86 VMOVAPD %YMM2,%YMM15 |
(13) 0x3b8a MOV $0x20,%EAX |
(13) 0x3b8f VPSRLQ $0x20,%YMM5,%YMM1 |
(13) 0x3b94 VPMULUDQ %YMM5,%YMM9,%YMM11 |
(13) 0x3b98 VPMULUDQ %YMM8,%YMM1,%YMM10 |
(13) 0x3b9d VPMULUDQ %YMM8,%YMM5,%YMM7 |
(13) 0x3ba2 VPADDQ %YMM11,%YMM10,%YMM12 |
(13) 0x3ba7 VPSLLQ $0x20,%YMM12,%YMM13 |
(13) 0x3bad VPADDQ %YMM13,%YMM7,%YMM14 |
(13) 0x3bb2 VGATHERQPD %YMM15,(%RCX,%YMM14,8),%YMM4 |
(13) 0x3bb8 VFMADD231PD (%RDI),%YMM4,%YMM0 |
(13) 0x3bbd VMOVDQU (%RSI,%RAX,1),%YMM3 |
(13) 0x3bc2 VMOVAPD %YMM2,%YMM13 |
(13) 0x3bc6 VPSRLQ $0x20,%YMM3,%YMM5 |
(13) 0x3bcb VPMULUDQ %YMM3,%YMM9,%YMM1 |
(13) 0x3bcf VPMULUDQ %YMM8,%YMM5,%YMM7 |
(13) 0x3bd4 VPMULUDQ %YMM8,%YMM3,%YMM6 |
(13) 0x3bd9 VPADDQ %YMM1,%YMM7,%YMM10 |
(13) 0x3bdd VPSLLQ $0x20,%YMM10,%YMM11 |
(13) 0x3be3 VPADDQ %YMM11,%YMM6,%YMM12 |
(13) 0x3be8 VGATHERQPD %YMM13,(%RCX,%YMM12,8),%YMM14 |
(13) 0x3bee VFMADD231PD (%RDI,%RAX,1),%YMM14,%YMM0 |
(13) 0x3bf4 ADD $0x20,%RAX |
(13) 0x3bf8 VMOVDQU (%RSI,%RAX,1),%YMM15 |
(13) 0x3bfd VMOVAPD %YMM2,%YMM11 |
(13) 0x3c01 VPSRLQ $0x20,%YMM15,%YMM4 |
(13) 0x3c07 VPMULUDQ %YMM15,%YMM9,%YMM5 |
(13) 0x3c0c VPMULUDQ %YMM8,%YMM4,%YMM6 |
(13) 0x3c11 VPMULUDQ %YMM8,%YMM15,%YMM3 |
(13) 0x3c16 VPADDQ %YMM5,%YMM6,%YMM7 |
(13) 0x3c1a VPSLLQ $0x20,%YMM7,%YMM1 |
(13) 0x3c1f VPADDQ %YMM1,%YMM3,%YMM10 |
(13) 0x3c23 VGATHERQPD %YMM11,(%RCX,%YMM10,8),%YMM12 |
(13) 0x3c29 VFMADD231PD (%RDI,%RAX,1),%YMM12,%YMM0 |
(13) 0x3c2f ADD $0x20,%RAX |
(13) 0x3c33 CMP %R11,%RAX |
(13) 0x3c36 JE 3d2d |
(15) 0x3c3c VMOVDQU (%RSI,%RAX,1),%YMM13 |
(15) 0x3c41 VMOVDQU 0x20(%RSI,%RAX,1),%YMM11 |
(15) 0x3c47 VMOVAPD %YMM2,%YMM1 |
(15) 0x3c4b VPSRLQ $0x20,%YMM13,%YMM15 |
(15) 0x3c51 VPMULUDQ %YMM13,%YMM9,%YMM4 |
(15) 0x3c56 VPMULUDQ %YMM8,%YMM15,%YMM3 |
(15) 0x3c5b VPMULUDQ %YMM8,%YMM13,%YMM14 |
(15) 0x3c60 VPSRLQ $0x20,%YMM11,%YMM13 |
(15) 0x3c66 VPMULUDQ %YMM11,%YMM9,%YMM15 |
(15) 0x3c6b VPMULUDQ %YMM8,%YMM11,%YMM12 |
(15) 0x3c70 VPADDQ %YMM4,%YMM3,%YMM6 |
(15) 0x3c74 VPSLLQ $0x20,%YMM6,%YMM5 |
(15) 0x3c79 VPADDQ %YMM5,%YMM14,%YMM7 |
(15) 0x3c7d VPMULUDQ %YMM8,%YMM13,%YMM14 |
(15) 0x3c82 VMOVAPD %YMM2,%YMM5 |
(15) 0x3c86 VGATHERQPD %YMM1,(%RCX,%YMM7,8),%YMM10 |
(15) 0x3c8c VMOVDQU 0x40(%RSI,%RAX,1),%YMM1 |
(15) 0x3c92 VFMADD231PD (%RDI,%RAX,1),%YMM10,%YMM0 |
(15) 0x3c98 VPMULUDQ %YMM8,%YMM1,%YMM10 |
(15) 0x3c9d VPADDQ %YMM15,%YMM14,%YMM3 |
(15) 0x3ca2 VPSLLQ $0x20,%YMM3,%YMM4 |
(15) 0x3ca7 VMOVAPD %YMM2,%YMM3 |
(15) 0x3cab VPADDQ %YMM4,%YMM12,%YMM7 |
(15) 0x3caf VPMULUDQ %YMM1,%YMM9,%YMM12 |
(15) 0x3cb3 VMOVDQU 0x60(%RSI,%RAX,1),%YMM4 |
(15) 0x3cb9 VGATHERQPD %YMM5,(%RCX,%YMM7,8),%YMM6 |
(15) 0x3cbf VFMADD132PD 0x20(%RDI,%RAX,1),%YMM0,%YMM6 |
(15) 0x3cc6 VPSRLQ $0x20,%YMM1,%YMM0 |
(15) 0x3ccb VPMULUDQ %YMM8,%YMM4,%YMM7 |
(15) 0x3cd0 VPMULUDQ %YMM8,%YMM0,%YMM11 |
(15) 0x3cd5 VPADDQ %YMM12,%YMM11,%YMM13 |
(15) 0x3cda VPSLLQ $0x20,%YMM13,%YMM14 |
(15) 0x3ce0 VMOVAPD %YMM2,%YMM13 |
(15) 0x3ce4 VPADDQ %YMM14,%YMM10,%YMM15 |
(15) 0x3ce9 VPMULUDQ %YMM4,%YMM9,%YMM10 |
(15) 0x3ced VGATHERQPD %YMM3,(%RCX,%YMM15,8),%YMM5 |
(15) 0x3cf3 VFMADD132PD 0x40(%RDI,%RAX,1),%YMM6,%YMM5 |
(15) 0x3cfa VPSRLQ $0x20,%YMM4,%YMM6 |
(15) 0x3cff VPMULUDQ %YMM8,%YMM6,%YMM1 |
(15) 0x3d04 VPADDQ %YMM10,%YMM1,%YMM0 |
(15) 0x3d09 VPSLLQ $0x20,%YMM0,%YMM11 |
(15) 0x3d0e VPADDQ %YMM11,%YMM7,%YMM12 |
(15) 0x3d13 VGATHERQPD %YMM13,(%RCX,%YMM12,8),%YMM0 |
(15) 0x3d19 VFMADD132PD 0x60(%RDI,%RAX,1),%YMM5,%YMM0 |
(15) 0x3d20 SUB $-0x80,%RAX |
(15) 0x3d24 CMP %R11,%RAX |
(15) 0x3d27 JNE 3c3c |
(13) 0x3d2d VEXTRACTF128 $0x1,%YMM0,%XMM14 |
(13) 0x3d33 CMPQ $0,0x78(%RSP) |
(13) 0x3d39 VADDPD %XMM0,%XMM14,%XMM15 |
(13) 0x3d3d VUNPCKHPD %XMM15,%XMM15,%XMM3 |
(13) 0x3d42 VADDPD %XMM15,%XMM3,%XMM6 |
(13) 0x3d47 JE 3db2 |
(13) 0x3d49 MOV 0x60(%RSP),%RCX |
(13) 0x3d4e MOV 0x68(%RSP),%RAX |
(13) 0x3d53 MOV (%RCX),%RDX |
(13) 0x3d56 VMOVSD (%RAX),%XMM5 |
(13) 0x3d5a IMUL %R14,%RDX |
(13) 0x3d5e ADD %R8,%RDX |
(13) 0x3d61 VFMADD231SD (%R15,%RDX,8),%XMM5,%XMM6 |
(13) 0x3d67 CMP %R13,%RBX |
(13) 0x3d6a JLE 3db2 |
(13) 0x3d6c MOV 0x48(%RSP),%RCX |
(13) 0x3d71 MOV 0x38(%RSP),%RAX |
(13) 0x3d76 MOV (%RCX),%RDX |
(13) 0x3d79 MOV 0x58(%RSP),%RCX |
(13) 0x3d7e VMOVSD (%RAX),%XMM4 |
(13) 0x3d82 IMUL %R14,%RDX |
(13) 0x3d86 ADD %R8,%RDX |
(13) 0x3d89 VFMADD231SD (%R15,%RDX,8),%XMM4,%XMM6 |
(13) 0x3d8f CMP %RCX,%RBX |
(13) 0x3d92 JLE 3db2 |
(13) 0x3d94 MOV 0x28(%RSP),%RDX |
(13) 0x3d99 MOV 0x30(%RSP),%RCX |
(13) 0x3d9e MOV (%RDX),%RAX |
(13) 0x3da1 VMOVSD (%RCX),%XMM7 |
(13) 0x3da5 IMUL %R14,%RAX |
(13) 0x3da9 ADD %R8,%RAX |
(13) 0x3dac VFMADD231SD (%R15,%RAX,8),%XMM7,%XMM6 |
(13) 0x3db2 VADDSD (%R9),%XMM6,%XMM6 |
(13) 0x3db7 MOV 0x88(%RSP),%RDX |
(13) 0x3dbf INC %R10 |
(13) 0x3dc2 MOV 0x90(%RSP),%RAX |
(13) 0x3dca VMOVSD %XMM6,(%R9) |
(13) 0x3dcf ADD %RAX,%R8 |
(13) 0x3dd2 ADD %RDX,%R9 |
(13) 0x3dd5 CMP %R10,0x98(%RSP) |
(13) 0x3ddd JNE 3b40 |
(14) 0x3de3 ADDQ $0x8,0x70(%RSP) |
(14) 0x3de9 MOV 0x10(%RSP),%RCX |
(14) 0x3dee MOV 0x70(%RSP),%R12 |
(14) 0x3df3 CMP %R12,0x18(%RSP) |
(14) 0x3df8 JE 39c3 |
(14) 0x3dfe MOV (%R12),%R11 |
(14) 0x3e02 JMP 3a60 |
(10) 0x3e07 VMOVDQU (%RDX),%YMM5 |
(10) 0x3e0b VMOVAPD %YMM2,%YMM6 |
(10) 0x3e0f MOV $0x20,%EAX |
(10) 0x3e14 VGATHERQPD %YMM6,(%R15,%YMM5,8),%YMM1 |
(10) 0x3e1a VFMADD231PD (%RSI),%YMM1,%YMM0 |
(10) 0x3e1f VMOVDQU (%RDX,%RAX,1),%YMM8 |
(10) 0x3e24 VMOVAPD %YMM2,%YMM7 |
(10) 0x3e28 VGATHERQPD %YMM7,(%R15,%YMM8,8),%YMM9 |
(10) 0x3e2e VFMADD231PD (%RSI,%RAX,1),%YMM9,%YMM0 |
(10) 0x3e34 ADD $0x20,%RAX |
(10) 0x3e38 VMOVDQU (%RDX,%RAX,1),%YMM11 |
(10) 0x3e3d VMOVAPD %YMM2,%YMM10 |
(10) 0x3e41 VGATHERQPD %YMM10,(%R15,%YMM11,8),%YMM12 |
(10) 0x3e47 VFMADD231PD (%RSI,%RAX,1),%YMM12,%YMM0 |
(10) 0x3e4d ADD $0x20,%RAX |
(10) 0x3e51 VMOVDQU (%RDX,%RAX,1),%YMM14 |
(10) 0x3e56 VMOVAPD %YMM2,%YMM13 |
(10) 0x3e5a VGATHERQPD %YMM13,(%R15,%YMM14,8),%YMM15 |
(10) 0x3e60 VFMADD231PD (%RSI,%RAX,1),%YMM15,%YMM0 |
(10) 0x3e66 ADD $0x20,%RAX |
(10) 0x3e6a VMOVDQU (%RDX,%RAX,1),%YMM6 |
(10) 0x3e6f VMOVAPD %YMM2,%YMM4 |
(10) 0x3e73 VGATHERQPD %YMM4,(%R15,%YMM6,8),%YMM5 |
(10) 0x3e79 VFMADD231PD (%RSI,%RAX,1),%YMM5,%YMM0 |
(10) 0x3e7f ADD $0x20,%RAX |
(10) 0x3e83 VMOVDQU (%RDX,%RAX,1),%YMM7 |
(10) 0x3e88 VMOVAPD %YMM2,%YMM8 |
(10) 0x3e8c VGATHERQPD %YMM8,(%R15,%YMM7,8),%YMM1 |
(10) 0x3e92 VFMADD231PD (%RSI,%RAX,1),%YMM1,%YMM0 |
(10) 0x3e98 ADD $0x20,%RAX |
(10) 0x3e9c VMOVDQU (%RDX,%RAX,1),%YMM10 |
(10) 0x3ea1 VMOVAPD %YMM2,%YMM9 |
(10) 0x3ea5 VGATHERQPD %YMM9,(%R15,%YMM10,8),%YMM11 |
(10) 0x3eab VFMADD231PD (%RSI,%RAX,1),%YMM11,%YMM0 |
(10) 0x3eb1 ADD $0x20,%RAX |
(10) 0x3eb5 CMP %RAX,%RCX |
(10) 0x3eb8 JNE 3ffe |
(10) 0x3ebe VEXTRACTF128 $0x1,%YMM0,%XMM7 |
(10) 0x3ec4 MOV %RDI,%R12 |
(10) 0x3ec7 VADDPD %XMM0,%XMM7,%XMM9 |
(10) 0x3ecb AND $-0x4,%R12 |
(10) 0x3ecf VADDPD %XMM7,%XMM0,%XMM4 |
(10) 0x3ed3 ADD %R12,%R9 |
(10) 0x3ed6 VUNPCKHPD %XMM9,%XMM9,%XMM1 |
(10) 0x3edb VADDPD %XMM9,%XMM1,%XMM14 |
(10) 0x3ee0 TEST $0x3,%DIL |
(10) 0x3ee4 JE 3f31 |
(10) 0x3ee6 SUB %R12,%RDI |
(10) 0x3ee9 CMP $0x1,%RDI |
(10) 0x3eed JE 3f20 |
(10) 0x3eef ADD %R11,%R12 |
(10) 0x3ef2 VMOVAPD %XMM3,%XMM10 |
(10) 0x3ef6 VMOVDQU (%RBX,%R12,8),%XMM11 |
(10) 0x3efc VGATHERQPD %XMM10,(%R15,%XMM11,8),%XMM12 |
(10) 0x3f02 VFMADD132PD (%R13,%R12,8),%XMM4,%XMM12 |
(10) 0x3f09 VUNPCKHPD %XMM12,%XMM12,%XMM13 |
(10) 0x3f0e VADDPD %XMM12,%XMM13,%XMM14 |
(10) 0x3f13 TEST $0x1,%DIL |
(10) 0x3f17 JE 3f31 |
(10) 0x3f19 AND $-0x2,%RDI |
(10) 0x3f1d ADD %RDI,%R9 |
(10) 0x3f20 MOV (%RBX,%R9,8),%RDI |
(10) 0x3f24 VMOVSD (%R13,%R9,8),%XMM15 |
(10) 0x3f2b VFMADD231SD (%R15,%RDI,8),%XMM15,%XMM14 |
(10) 0x3f31 MOV 0x50(%RSP),%R9 |
(10) 0x3f36 ADD $0x8,%R10 |
(10) 0x3f3a ADD %R9,%R14 |
(10) 0x3f3d VADDSD (%R14),%XMM14,%XMM4 |
(10) 0x3f42 VMOVSD %XMM4,(%R14) |
(10) 0x3f47 MOV 0x18(%RSP),%R14 |
(10) 0x3f4c CMP %R14,%R10 |
(10) 0x3f4f JE 39c3 |
(10) 0x3f55 MOV (%R10),%R11 |
(10) 0x3f58 MOV 0x40(%RSP),%R8 |
(10) 0x3f5d LEA (,%R11,8),%R14 |
(10) 0x3f65 MOV (%R8,%R11,8),%R9 |
(10) 0x3f69 MOV 0x8(%R8,%R14,1),%RDI |
(10) 0x3f6e CMP %RDI,%R9 |
(10) 0x3f71 JGE 43de |
(10) 0x3f77 SUB %R9,%RDI |
(10) 0x3f7a MOV %R9,%R11 |
(10) 0x3f7d LEA -0x1(%RDI),%RAX |
(10) 0x3f81 CMP $0x2,%RAX |
(10) 0x3f85 JBE 4480 |
(10) 0x3f8b MOV %RDI,%RCX |
(10) 0x3f8e LEA (,%R9,8),%RDX |
(10) 0x3f96 VXORPD %XMM0,%XMM0,%XMM0 |
(10) 0x3f9a XOR %EAX,%EAX |
(10) 0x3f9c SHR $0x2,%RCX |
(10) 0x3fa0 LEA (%R13,%RDX,1),%RSI |
(10) 0x3fa5 ADD %RBX,%RDX |
(10) 0x3fa8 SAL $0x5,%RCX |
(10) 0x3fac LEA -0x20(%RCX),%R8 |
(10) 0x3fb0 SHR $0x5,%R8 |
(10) 0x3fb4 INC %R8 |
(10) 0x3fb7 AND $0x7,%R8D |
(10) 0x3fbb JE 3ffe |
(10) 0x3fbd CMP $0x1,%R8 |
(10) 0x3fc1 JE 3e9c |
(10) 0x3fc7 CMP $0x2,%R8 |
(10) 0x3fcb JE 3e83 |
(10) 0x3fd1 CMP $0x3,%R8 |
(10) 0x3fd5 JE 3e6a |
(10) 0x3fdb CMP $0x4,%R8 |
(10) 0x3fdf JE 3e51 |
(10) 0x3fe5 CMP $0x5,%R8 |
(10) 0x3fe9 JE 3e38 |
(10) 0x3fef CMP $0x6,%R8 |
(10) 0x3ff3 JE 3e1f |
(10) 0x3ff9 JMP 3e07 |
(11) 0x3ffe VMOVDQU (%RDX,%RAX,1),%YMM13 |
(11) 0x4003 VMOVDQU 0x20(%RDX,%RAX,1),%YMM4 |
(11) 0x4009 VMOVAPD %YMM2,%YMM12 |
(11) 0x400d VMOVAPD %YMM2,%YMM15 |
(11) 0x4011 VMOVDQU 0x40(%RDX,%RAX,1),%YMM6 |
(11) 0x4017 VMOVDQU 0x60(%RDX,%RAX,1),%YMM7 |
(11) 0x401d VMOVAPD %YMM2,%YMM5 |
(11) 0x4021 VMOVAPD %YMM2,%YMM1 |
(11) 0x4025 VGATHERQPD %YMM12,(%R15,%YMM13,8),%YMM14 |
(11) 0x402b VFMADD132PD (%RSI,%RAX,1),%YMM0,%YMM14 |
(11) 0x4031 VMOVAPD %YMM2,%YMM10 |
(11) 0x4035 VMOVDQU 0x80(%RDX,%RAX,1),%YMM11 |
(11) 0x403e VGATHERQPD %YMM15,(%R15,%YMM4,8),%YMM0 |
(11) 0x4044 VFMADD132PD 0x20(%RSI,%RAX,1),%YMM14,%YMM0 |
(11) 0x404b VMOVAPD %YMM2,%YMM13 |
(11) 0x404f VMOVDQU 0xa0(%RDX,%RAX,1),%YMM14 |
(11) 0x4058 VGATHERQPD %YMM5,(%R15,%YMM6,8),%YMM8 |
(11) 0x405e VFMADD132PD 0x40(%RSI,%RAX,1),%YMM0,%YMM8 |
(11) 0x4065 VMOVAPD %YMM2,%YMM4 |
(11) 0x4069 VMOVDQU 0xc0(%RDX,%RAX,1),%YMM0 |
(11) 0x4072 VGATHERQPD %YMM1,(%R15,%YMM7,8),%YMM9 |
(11) 0x4078 VFMADD132PD 0x60(%RSI,%RAX,1),%YMM8,%YMM9 |
(11) 0x407f VMOVAPD %YMM2,%YMM8 |
(11) 0x4083 VMOVDQU 0xe0(%RDX,%RAX,1),%YMM5 |
(11) 0x408c VGATHERQPD %YMM10,(%R15,%YMM11,8),%YMM12 |
(11) 0x4092 VFMADD132PD 0x80(%RSI,%RAX,1),%YMM9,%YMM12 |
(11) 0x409c VGATHERQPD %YMM13,(%R15,%YMM14,8),%YMM15 |
(11) 0x40a2 VFMADD132PD 0xa0(%RSI,%RAX,1),%YMM12,%YMM15 |
(11) 0x40ac VGATHERQPD %YMM4,(%R15,%YMM0,8),%YMM6 |
(11) 0x40b2 VGATHERQPD %YMM8,(%R15,%YMM5,8),%YMM0 |
(11) 0x40b8 VFMADD132PD 0xc0(%RSI,%RAX,1),%YMM15,%YMM6 |
(11) 0x40c2 VFMADD132PD 0xe0(%RSI,%RAX,1),%YMM6,%YMM0 |
(11) 0x40cc ADD $0x100,%RAX |
(11) 0x40d2 CMP %RAX,%RCX |
(11) 0x40d5 JNE 3ffe |
(10) 0x40db JMP 3ebe |
(13) 0x40e0 VXORPD %XMM6,%XMM6,%XMM6 |
(13) 0x40e4 JMP 3db2 |
(13) 0x40e9 VXORPD %XMM6,%XMM6,%XMM6 |
(13) 0x40ed JMP 3d49 |
0x40f2 MOV %R14,0x98(%RSP) |
0x40fa MOV 0x40(%RSP),%R14 |
(16) 0x40ff LEA (,%R12,8),%R10 |
(16) 0x4107 MOV (%R14,%R12,8),%RCX |
(16) 0x410b MOV 0x8(%R14,%R10,1),%RSI |
(16) 0x4110 CMP %RCX,%RSI |
(16) 0x4113 JLE 43c6 |
(16) 0x4119 SUB %RCX,%RSI |
(16) 0x411c MOV %RCX,%R12 |
(16) 0x411f LEA -0x1(%RSI),%R9 |
(16) 0x4123 CMP $0x2,%R9 |
(16) 0x4127 JBE 43cd |
(16) 0x412d MOV %RSI,%R11 |
(16) 0x4130 LEA (,%RCX,8),%RDI |
(16) 0x4138 VXORPD %XMM1,%XMM1,%XMM1 |
(16) 0x413c XOR %EDX,%EDX |
(16) 0x413e SHR $0x2,%R11 |
(16) 0x4142 LEA (%R13,%RDI,1),%R9 |
(16) 0x4147 ADD %RBX,%RDI |
(16) 0x414a SAL $0x5,%R11 |
(16) 0x414e LEA -0x20(%R11),%R8 |
(16) 0x4152 SHR $0x5,%R8 |
(16) 0x4156 INC %R8 |
(16) 0x4159 AND $0x7,%R8D |
(16) 0x415d JE 424a |
(16) 0x4163 CMP $0x1,%R8 |
(16) 0x4167 JE 4228 |
(16) 0x416d CMP $0x2,%R8 |
(16) 0x4171 JE 420e |
(16) 0x4177 CMP $0x3,%R8 |
(16) 0x417b JE 41f4 |
(16) 0x417d CMP $0x4,%R8 |
(16) 0x4181 JE 41db |
(16) 0x4183 CMP $0x5,%R8 |
(16) 0x4187 JE 41c2 |
(16) 0x4189 CMP $0x6,%R8 |
(16) 0x418d JE 41a8 |
(16) 0x418f VMOVDQU (%RDI),%YMM13 |
(16) 0x4193 VMOVAPD %YMM9,%YMM12 |
(16) 0x4198 MOV $0x20,%EDX |
(16) 0x419d VGATHERQPD %YMM12,(%R15,%YMM13,8),%YMM14 |
(16) 0x41a3 VFMADD231PD (%R9),%YMM14,%YMM1 |
(16) 0x41a8 VMOVDQU (%RDI,%RDX,1),%YMM3 |
(16) 0x41ad VMOVAPD %YMM9,%YMM15 |
(16) 0x41b2 VGATHERQPD %YMM15,(%R15,%YMM3,8),%YMM5 |
(16) 0x41b8 VFMADD231PD (%R9,%RDX,1),%YMM5,%YMM1 |
(16) 0x41be ADD $0x20,%RDX |
(16) 0x41c2 VMOVDQU (%RDI,%RDX,1),%YMM7 |
(16) 0x41c7 VMOVAPD %YMM9,%YMM4 |
(16) 0x41cb VGATHERQPD %YMM4,(%R15,%YMM7,8),%YMM0 |
(16) 0x41d1 VFMADD231PD (%R9,%RDX,1),%YMM0,%YMM1 |
(16) 0x41d7 ADD $0x20,%RDX |
(16) 0x41db VMOVDQU (%RDI,%RDX,1),%YMM2 |
(16) 0x41e0 VMOVAPD %YMM9,%YMM6 |
(16) 0x41e4 VGATHERQPD %YMM6,(%R15,%YMM2,8),%YMM10 |
(16) 0x41ea VFMADD231PD (%R9,%RDX,1),%YMM10,%YMM1 |
(16) 0x41f0 ADD $0x20,%RDX |
(16) 0x41f4 VMOVDQU (%RDI,%RDX,1),%YMM12 |
(16) 0x41f9 VMOVAPD %YMM9,%YMM11 |
(16) 0x41fe VGATHERQPD %YMM11,(%R15,%YMM12,8),%YMM13 |
(16) 0x4204 VFMADD231PD (%R9,%RDX,1),%YMM13,%YMM1 |
(16) 0x420a ADD $0x20,%RDX |
(16) 0x420e VMOVDQU (%RDI,%RDX,1),%YMM15 |
(16) 0x4213 VMOVAPD %YMM9,%YMM14 |
(16) 0x4218 VGATHERQPD %YMM14,(%R15,%YMM15,8),%YMM3 |
(16) 0x421e VFMADD231PD (%R9,%RDX,1),%YMM3,%YMM1 |
(16) 0x4224 ADD $0x20,%RDX |
(16) 0x4228 VMOVDQU (%RDI,%RDX,1),%YMM5 |
(16) 0x422d VMOVAPD %YMM9,%YMM4 |
(16) 0x4231 VGATHERQPD %YMM4,(%R15,%YMM5,8),%YMM7 |
(16) 0x4237 VFMADD231PD (%R9,%RDX,1),%YMM7,%YMM1 |
(16) 0x423d ADD $0x20,%RDX |
(16) 0x4241 CMP %R11,%RDX |
(16) 0x4244 JE 432c |
(17) 0x424a VMOVDQU (%RDI,%RDX,1),%YMM2 |
(17) 0x424f VMOVDQU 0x20(%RDI,%RDX,1),%YMM11 |
(17) 0x4255 VMOVAPD %YMM9,%YMM6 |
(17) 0x4259 VMOVAPD %YMM9,%YMM10 |
(17) 0x425e VMOVDQU 0x40(%RDI,%RDX,1),%YMM14 |
(17) 0x4264 VMOVDQU 0x60(%RDI,%RDX,1),%YMM4 |
(17) 0x426a VMOVAPD %YMM9,%YMM13 |
(17) 0x426f VMOVAPD %YMM9,%YMM3 |
(17) 0x4273 VGATHERQPD %YMM6,(%R15,%YMM2,8),%YMM0 |
(17) 0x4279 VFMADD231PD (%R9,%RDX,1),%YMM0,%YMM1 |
(17) 0x427f VMOVAPD %YMM9,%YMM5 |
(17) 0x4283 VMOVDQU 0x80(%RDI,%RDX,1),%YMM7 |
(17) 0x428c VGATHERQPD %YMM10,(%R15,%YMM11,8),%YMM12 |
(17) 0x4292 VFMADD231PD 0x20(%R9,%RDX,1),%YMM12,%YMM1 |
(17) 0x4299 VMOVAPD %YMM9,%YMM6 |
(17) 0x429d VMOVDQU 0xa0(%RDI,%RDX,1),%YMM0 |
(17) 0x42a6 VGATHERQPD %YMM13,(%R15,%YMM14,8),%YMM15 |
(17) 0x42ac VFMADD132PD 0x40(%R9,%RDX,1),%YMM1,%YMM15 |
(17) 0x42b3 VMOVAPD %YMM9,%YMM11 |
(17) 0x42b8 VMOVDQU 0xc0(%RDI,%RDX,1),%YMM12 |
(17) 0x42c1 VGATHERQPD %YMM3,(%R15,%YMM4,8),%YMM1 |
(17) 0x42c7 VFMADD132PD 0x60(%R9,%RDX,1),%YMM15,%YMM1 |
(17) 0x42ce VMOVAPD %YMM9,%YMM14 |
(17) 0x42d3 VMOVDQU 0xe0(%RDI,%RDX,1),%YMM15 |
(17) 0x42dc VGATHERQPD %YMM5,(%R15,%YMM7,8),%YMM2 |
(17) 0x42e2 VFMADD132PD 0x80(%R9,%RDX,1),%YMM1,%YMM2 |
(17) 0x42ec VGATHERQPD %YMM6,(%R15,%YMM0,8),%YMM10 |
(17) 0x42f2 VFMADD132PD 0xa0(%R9,%RDX,1),%YMM2,%YMM10 |
(17) 0x42fc VGATHERQPD %YMM11,(%R15,%YMM12,8),%YMM13 |
(17) 0x4302 VGATHERQPD %YMM14,(%R15,%YMM15,8),%YMM1 |
(17) 0x4308 VFMADD132PD 0xc0(%R9,%RDX,1),%YMM10,%YMM13 |
(17) 0x4312 VFMADD132PD 0xe0(%R9,%RDX,1),%YMM13,%YMM1 |
(17) 0x431c ADD $0x100,%RDX |
(17) 0x4323 CMP %R11,%RDX |
(17) 0x4326 JNE 424a |
(16) 0x432c VEXTRACTF128 $0x1,%YMM1,%XMM3 |
(16) 0x4332 MOV %RSI,%RDX |
(16) 0x4335 VADDPD %XMM1,%XMM3,%XMM4 |
(16) 0x4339 AND $-0x4,%RDX |
(16) 0x433d VADDPD %XMM3,%XMM1,%XMM11 |
(16) 0x4341 ADD %RDX,%RCX |
(16) 0x4344 VUNPCKHPD %XMM4,%XMM4,%XMM5 |
(16) 0x4348 VADDPD %XMM4,%XMM5,%XMM10 |
(16) 0x434c TEST $0x3,%SIL |
(16) 0x4350 JE 439a |
(16) 0x4352 SUB %RDX,%RSI |
(16) 0x4355 CMP $0x1,%RSI |
(16) 0x4359 JE 4389 |
(16) 0x435b ADD %R12,%RDX |
(16) 0x435e VMOVAPD %XMM8,%XMM7 |
(16) 0x4362 VMOVDQU (%RBX,%RDX,8),%XMM2 |
(16) 0x4367 VGATHERQPD %XMM7,(%R15,%XMM2,8),%XMM6 |
(16) 0x436d VFMADD132PD (%R13,%RDX,8),%XMM11,%XMM6 |
(16) 0x4374 VUNPCKHPD %XMM6,%XMM6,%XMM0 |
(16) 0x4378 VADDPD %XMM6,%XMM0,%XMM10 |
(16) 0x437c TEST $0x1,%SIL |
(16) 0x4380 JE 439a |
(16) 0x4382 AND $-0x2,%RSI |
(16) 0x4386 ADD %RSI,%RCX |
(16) 0x4389 MOV (%RBX,%RCX,8),%RSI |
(16) 0x438d VMOVSD (%R13,%RCX,8),%XMM11 |
(16) 0x4394 VFMADD231SD (%R15,%RSI,8),%XMM11,%XMM10 |
(16) 0x439a MOV 0x50(%RSP),%RCX |
(16) 0x439f ADD $0x8,%RAX |
(16) 0x43a3 ADD %RCX,%R10 |
(16) 0x43a6 VADDSD (%R10),%XMM10,%XMM12 |
(16) 0x43ab VMOVSD %XMM12,(%R10) |
(16) 0x43b0 CMP %RAX,0x98(%RSP) |
(16) 0x43b8 JE 39c3 |
(16) 0x43be MOV (%RAX),%R12 |
(16) 0x43c1 JMP 40ff |
(16) 0x43c6 VXORPD %XMM10,%XMM10,%XMM10 |
(16) 0x43cb JMP 439a |
(16) 0x43cd VXORPD %XMM11,%XMM11,%XMM11 |
(16) 0x43d2 XOR %EDX,%EDX |
(16) 0x43d4 VXORPD %XMM10,%XMM10,%XMM10 |
(16) 0x43d9 JMP 4352 |
(10) 0x43de VXORPD %XMM14,%XMM14,%XMM14 |
(10) 0x43e3 JMP 3f31 |
0x43e8 MOV 0x18(%RSP),%R15 |
0x43ed SUB %RDI,%R15 |
0x43f0 SUB $0x8,%R15 |
0x43f4 SHR $0x3,%R15 |
0x43f8 INC %R15 |
0x43fb AND $0x7,%R15D |
0x43ff JE 4491 |
0x4405 CMP $0x1,%R15 |
0x4409 JE 4453 |
0x440b CMP $0x2,%R15 |
0x440f JE 444d |
0x4411 CMP $0x3,%R15 |
0x4415 JE 4447 |
0x4417 CMP $0x4,%R15 |
0x441b JE 4441 |
0x441d CMP $0x5,%R15 |
0x4421 JE 443b |
0x4423 MOV %RDI,%RBX |
0x4426 ADD $0x8,%RDI |
0x442a CMP $0x6,%R15 |
0x442e CMOVNE %RDI,%RBX |
0x4432 ADD $0x8,%RBX |
0x4436 MOV %RBX,0x70(%RSP) |
0x443b ADDQ $0x8,0x70(%RSP) |
0x4441 ADDQ $0x8,0x70(%RSP) |
0x4447 ADDQ $0x8,0x70(%RSP) |
0x444d ADDQ $0x8,0x70(%RSP) |
0x4453 ADDQ $0x8,0x70(%RSP) |
0x4459 MOV 0x18(%RSP),%R10 |
0x445e MOV 0x70(%RSP),%R13 |
0x4463 CMP %R10,%R13 |
0x4466 JE 39c6 |
(12) 0x446c MOV 0x18(%RSP),%R12 |
(12) 0x4471 ADD $0x40,%R13 |
(12) 0x4475 CMP %R12,%R13 |
(12) 0x4478 JNE 446c |
0x447a JMP 39c6 |
0x447f NOP |
(10) 0x4480 VXORPD %XMM4,%XMM4,%XMM4 |
(10) 0x4484 XOR %R12D,%R12D |
(10) 0x4487 VXORPD %XMM14,%XMM14,%XMM14 |
(10) 0x448c JMP 3ee6 |
0x4491 MOV 0x70(%RSP),%R13 |
0x4496 JMP 446c |
0x4498 NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○96.16 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○3.84 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | csr_matvec.c:178-204 |
Module | libseq_mv.so |
nb instructions | 154 |
nb uops | 159 |
loop length | 635 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 26.50 cycles |
front end | 26.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 19.25 | 19.25 | 19.00 | 19.00 | 14.50 | 14.33 | 14.33 | 14.33 | 1.75 | 1.75 | 1.75 | 1.75 | 0.00 | 0.00 |
cycles | 19.25 | 19.25 | 19.00 | 19.00 | 14.50 | 14.33 | 14.33 | 14.33 | 1.75 | 1.75 | 1.75 | 1.75 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 7.00-12.00 |
Front-end | 26.50 |
Dispatch | 19.25 |
DIV/SQRT | 7.00-12.00 |
Overall L1 | 26.50 |
all | 9% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 18% |
all | 14% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 17% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SUB $0xa0,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CALL 2050 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL 2100 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOVSXD %R12D,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOVSXD %EAX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x20(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CQTO | scal (12.5%) | |||||||||||||||||
IDIV %RDI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 | scal (12.5%) |
CMP %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JL 39f0 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4d0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
IMUL %RAX,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
LEA (%RSI,%RDX,1),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%RAX,%RCX,1),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 39c6 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x18(%R13),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x58(%R13),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%R13),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x40(%R13),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x38(%R13),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x8(%R13),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%R12,%RCX,8),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RBX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x50(%R13),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x48(%R13),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R15,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x28(%R13),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x10(%R13),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %R10,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R11,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%R13),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CMP $0x1,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 39fa <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4da> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA (,%R9,8),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%R12,%R8,8),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPCMPEQD %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
MOV (%RDI),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMPQ $0x1,0x98(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
MOV %R9,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VPCMPEQD %YMM9,%YMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (50.0%) |
JE 40f2 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xbd2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMPQ $0,0x98(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JLE 395c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x43c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R14,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R14,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
SUB %RDI,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
SUB $0x8,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
SHR $0x3,%R15 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
INC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
AND $0x7,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JE 39ba <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x49a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x1,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39b1 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x491> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x2,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39ad <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x48d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x3,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39a9 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x489> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39a5 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x485> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x5,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39a1 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x481> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD $0x8,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x6,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
CMOVNE %RDI,%RAX | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R14,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39c3 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 3560 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x40> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
LEA (%R12,%R8,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RDI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%RDI),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
VPCMPEQD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
LEA (,%R9,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VPCMPEQD %YMM2,%YMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (50.0%) |
MOV %RDI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CMPQ $0x1,0x98(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
MOV %RCX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JE 3f58 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xa38> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMPQ $0,0x98(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JLE 43e8 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xec8> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVQ %R14,%XMM3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
MOV %RBX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VPCMPEQD %YMM2,%YMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (50.0%) |
MOV %R13,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VPBROADCASTQ %XMM3,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
VPSRLQ $0x20,%YMM8,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV %R14,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x40(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x18(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %RDI,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
SUB $0x8,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
SHR $0x3,%R15 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
INC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
AND $0x7,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JE 4491 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf71> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x1,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 4453 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf33> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x2,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 444d <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf2d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x3,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 4447 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf27> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 4441 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf21> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x5,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 443b <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf1b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
ADD $0x8,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x6,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
CMOVNE %RDI,%RBX | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
ADD $0x8,%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV %RBX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
ADDQ $0x8,0x70(%RSP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
ADDQ $0x8,0x70(%RSP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
ADDQ $0x8,0x70(%RSP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
ADDQ $0x8,0x70(%RSP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
ADDQ $0x8,0x70(%RSP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x18(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x70(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R10,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 39c6 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 39c6 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
MOV 0x70(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 446c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf4c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Source file and lines | csr_matvec.c:178-204 |
Module | libseq_mv.so |
nb instructions | 154 |
nb uops | 159 |
loop length | 635 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 26.50 cycles |
front end | 26.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 19.25 | 19.25 | 19.00 | 19.00 | 14.50 | 14.33 | 14.33 | 14.33 | 1.75 | 1.75 | 1.75 | 1.75 | 0.00 | 0.00 |
cycles | 19.25 | 19.25 | 19.00 | 19.00 | 14.50 | 14.33 | 14.33 | 14.33 | 1.75 | 1.75 | 1.75 | 1.75 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 7.00-12.00 |
Front-end | 26.50 |
Dispatch | 19.25 |
DIV/SQRT | 7.00-12.00 |
Overall L1 | 26.50 |
all | 9% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 18% |
all | 14% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 17% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SUB $0xa0,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CALL 2050 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CALL 2100 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOVSXD %R12D,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOVSXD %EAX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x20(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CQTO | scal (12.5%) | |||||||||||||||||
IDIV %RDI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 | scal (12.5%) |
CMP %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JL 39f0 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4d0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
IMUL %RAX,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | N/A |
LEA (%RSI,%RDX,1),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%RAX,%RCX,1),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 39c6 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x18(%R13),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x58(%R13),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%R13),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x40(%R13),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x38(%R13),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x8(%R13),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%R12,%RCX,8),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RBX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x50(%R13),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x48(%R13),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R15,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x28(%R13),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x10(%R13),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %R10,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R11,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%R13),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CMP $0x1,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 39fa <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4da> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA (,%R9,8),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%R12,%R8,8),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPCMPEQD %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
MOV (%RDI),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMPQ $0x1,0x98(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
MOV %R9,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VPCMPEQD %YMM9,%YMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (50.0%) |
JE 40f2 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xbd2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMPQ $0,0x98(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JLE 395c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x43c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R14,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R14,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
SUB %RDI,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
SUB $0x8,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
SHR $0x3,%R15 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
INC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
AND $0x7,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JE 39ba <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x49a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x1,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39b1 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x491> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x2,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39ad <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x48d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x3,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39a9 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x489> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39a5 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x485> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x5,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39a1 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x481> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD $0x8,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x6,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
CMOVNE %RDI,%RAX | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R14,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 39c3 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 3560 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x40> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
LEA (%R12,%R8,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RDI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%RDI),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
VPCMPEQD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
LEA (,%R9,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VPCMPEQD %YMM2,%YMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (50.0%) |
MOV %RDI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CMPQ $0x1,0x98(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
MOV %RCX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JE 3f58 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xa38> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMPQ $0,0x98(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JLE 43e8 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xec8> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVQ %R14,%XMM3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (12.5%) |
MOV %RBX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
VPCMPEQD %YMM2,%YMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (50.0%) |
MOV %R13,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VPBROADCASTQ %XMM3,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
VPSRLQ $0x20,%YMM8,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV %R14,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x40(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x18(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %RDI,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
SUB $0x8,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
SHR $0x3,%R15 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
INC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
AND $0x7,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JE 4491 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf71> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x1,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 4453 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf33> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x2,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 444d <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf2d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x3,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 4447 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf27> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 4441 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf21> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x5,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 443b <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf1b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
ADD $0x8,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x6,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
CMOVNE %RDI,%RBX | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
ADD $0x8,%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV %RBX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
ADDQ $0x8,0x70(%RSP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
ADDQ $0x8,0x70(%RSP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
ADDQ $0x8,0x70(%RSP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
ADDQ $0x8,0x70(%RSP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
ADDQ $0x8,0x70(%RSP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x18(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x70(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R10,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 39c6 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 39c6 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0x4a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
MOV 0x70(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 446c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3+0xf4c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixMatvecOutOfPlace._omp_fn.3– | 0.05 | 0.04 |
▼Loop 10 - csr_matvec.c:183-196 - libseq_mv.so– | 0.03 | 0.02 |
○Loop 11 - csr_matvec.c:194-195 - libseq_mv.so | 0.00 | 0.01 |
▼Loop 20 - csr_matvec.c:183-204 - libseq_mv.so– | 0.00 | 0.00 |
▼Loop 19 - csr_matvec.c:199-204 - libseq_mv.so– | 0.00 | 0.00 |
○Loop 21 - csr_matvec.c:202-203 - libseq_mv.so | 0.00 | 0.00 |
▼Loop 16 - csr_matvec.c:183-196 - libseq_mv.so– | 0.00 | 0.01 |
○Loop 17 - csr_matvec.c:194-195 - libseq_mv.so | 0.01 | 0.02 |
○Loop 12 - csr_matvec.c:193-193 - libseq_mv.so | 0.00 | 0.00 |
○Loop 18 - csr_matvec.c:191-191 - libseq_mv.so | 0.00 | 0.00 |
▼Loop 14 - csr_matvec.c:183-204 - libseq_mv.so– | 0.00 | 0.01 |
▼Loop 13 - csr_matvec.c:199-204 - libseq_mv.so– | 0.00 | 0.00 |
○Loop 15 - csr_matvec.c:202-203 - libseq_mv.so | 0.00 | 0.01 |