Loop Id: 6542 | Module: exec | Source: csr_matvec.c:560-567 | Coverage: 0.34% |
---|
Loop Id: 6542 | Module: exec | Source: csr_matvec.c:560-567 | Coverage: 0.34% |
---|
0x578ab0 MOV -0x68(%RBP),%RDX |
0x578ab4 ADD $0x4,%R11 |
0x578ab8 CMP %RDX,%R11 |
0x578abb JE 5787b1 |
0x578ac1 MOV -0x40(%RBP),%RAX |
0x578ac5 MOV %R11,-0x58(%RBP) |
0x578ac9 LEA (%RAX,%R11,1),%RDI |
0x578acd MOV 0x8(%RSI,%RDI,8),%R8 |
0x578ad2 MOV (%RSI,%RDI,8),%R10 |
0x578ad6 MOV %R8,%RCX |
0x578ad9 SUB %R10,%RCX |
0x578adc JLE 578be6 |
0x578ae2 MOV %RCX,%R9 |
0x578ae5 AND $-0x8,%R9 |
0x578ae9 JE 578bb7 |
0x578aef LEA -0x1(%R9),%R11 |
0x578af3 XOR %EAX,%EAX |
0x578af5 JMP 578b3a |
(6553) 0x578b00 KXNORW %K0,%K0,%K1 |
(6553) 0x578b04 VXORPD %XMM7,%XMM7,%XMM7 |
(6553) 0x578b08 VPSLLQ $0x3,%ZMM5,%ZMM6 |
(6553) 0x578b0f ADD $0x8,%RAX |
(6553) 0x578b13 VGATHERQPD (%RBX,%ZMM5,8),%ZMM7{%K1} |
(6553) 0x578b1a VPADDQ %ZMM6,%ZMM0,%ZMM6 |
(6553) 0x578b20 KXNORW %K0,%K0,%K1 |
(6553) 0x578b24 VADDPD %ZMM4,%ZMM7,%ZMM4 |
(6553) 0x578b2a VSCATTERQPD %ZMM4,(,%ZMM6,1){%K1} |
(6553) 0x578b35 CMP %R11,%RAX |
(6553) 0x578b38 JA 578bb0 |
(6553) 0x578b3a VBROADCASTSD (%R15,%RDI,8),%ZMM4 |
(6553) 0x578b41 LEA (%R10,%RAX,1),%RDX |
(6553) 0x578b45 VPADDQ (%R14,%RDX,8),%ZMM1,%ZMM5 |
(6553) 0x578b4c VPCONFLICTQ %ZMM5,%ZMM6 |
(6553) 0x578b52 VPTESTMQ %ZMM6,%ZMM6,%K1 |
(6553) 0x578b58 KORTESTB %K1,%K1 |
(6553) 0x578b5c VMULPD (%R12,%RDX,8),%ZMM4,%ZMM4 |
(6553) 0x578b63 JE 578b00 |
(6553) 0x578b65 VPLZCNTQ %ZMM6,%ZMM6 |
(6553) 0x578b6b VPSUBQ %ZMM6,%ZMM2,%ZMM6 |
(6553) 0x578b71 NOPW %CS:(%RAX,%RAX,1) |
(6554) 0x578b80 VPERMQ %ZMM4,%ZMM6,%ZMM7 |
(6554) 0x578b86 VPERMQ %ZMM6,%ZMM6,%ZMM6{%K1} |
(6554) 0x578b8c VADDPD %ZMM7,%ZMM4,%ZMM4{%K1} |
(6554) 0x578b92 VPCMPNEQQ %ZMM3,%ZMM6,%K1 |
(6554) 0x578b99 KORTESTB %K1,%K1 |
(6554) 0x578b9d JNE 578b80 |
(6553) 0x578b9f JMP 578b00 |
0x578bb0 CMP %R9,%RCX |
0x578bb3 JNE 578bba |
0x578bb5 JMP 578be6 |
0x578bb7 XOR %R9D,%R9D |
0x578bba ADD %R10,%R9 |
0x578bbd NOPL (%RAX) |
(6552) 0x578bc0 VMOVSD (%R15,%RDI,8),%XMM4 |
(6552) 0x578bc6 MOV (%R14,%R9,8),%RAX |
(6552) 0x578bca VMOVSD (%R12,%R9,8),%XMM5 |
(6552) 0x578bd0 INC %R9 |
(6552) 0x578bd3 ADD %R13,%RAX |
(6552) 0x578bd6 VFMADD213SD (%RBX,%RAX,8),%XMM4,%XMM5 |
(6552) 0x578bdc VMOVSD %XMM5,(%RBX,%RAX,8) |
(6552) 0x578be1 CMP %R9,%R8 |
(6552) 0x578be4 JNE 578bc0 |
0x578be6 MOV 0x10(%RSI,%RDI,8),%R8 |
0x578beb MOV 0x8(%RSI,%RDI,8),%R11 |
0x578bf0 MOV %R8,%RCX |
0x578bf3 SUB %R11,%RCX |
0x578bf6 JLE 578d16 |
0x578bfc MOV -0x40(%RBP),%RAX |
0x578c00 MOV -0x58(%RBP),%RDX |
0x578c04 MOV %RCX,%R10 |
0x578c07 AND $-0x8,%R10 |
0x578c0b LEA 0x1(%RDX,%RAX,1),%R9 |
0x578c10 JE 578cdb |
0x578c16 LEA -0x1(%R10),%RSI |
0x578c1a XOR %EAX,%EAX |
0x578c1c JMP 578c5a |
(6550) 0x578c20 KXNORW %K0,%K0,%K1 |
(6550) 0x578c24 VXORPD %XMM7,%XMM7,%XMM7 |
(6550) 0x578c28 VPSLLQ $0x3,%ZMM5,%ZMM6 |
(6550) 0x578c2f ADD $0x8,%RAX |
(6550) 0x578c33 VGATHERQPD (%RBX,%ZMM5,8),%ZMM7{%K1} |
(6550) 0x578c3a VPADDQ %ZMM6,%ZMM0,%ZMM6 |
(6550) 0x578c40 KXNORW %K0,%K0,%K1 |
(6550) 0x578c44 VADDPD %ZMM4,%ZMM7,%ZMM4 |
(6550) 0x578c4a VSCATTERQPD %ZMM4,(,%ZMM6,1){%K1} |
(6550) 0x578c55 CMP %RSI,%RAX |
(6550) 0x578c58 JA 578cd0 |
(6550) 0x578c5a VBROADCASTSD (%R15,%R9,8),%ZMM4 |
(6550) 0x578c61 LEA (%R11,%RAX,1),%RDX |
(6550) 0x578c65 VPADDQ (%R14,%RDX,8),%ZMM1,%ZMM5 |
(6550) 0x578c6c VPCONFLICTQ %ZMM5,%ZMM6 |
(6550) 0x578c72 VPTESTMQ %ZMM6,%ZMM6,%K1 |
(6550) 0x578c78 KORTESTB %K1,%K1 |
(6550) 0x578c7c VMULPD (%R12,%RDX,8),%ZMM4,%ZMM4 |
(6550) 0x578c83 JE 578c20 |
(6550) 0x578c85 VPLZCNTQ %ZMM6,%ZMM6 |
(6550) 0x578c8b VPSUBQ %ZMM6,%ZMM2,%ZMM6 |
(6550) 0x578c91 NOPW %CS:(%RAX,%RAX,1) |
(6551) 0x578ca0 VPERMQ %ZMM4,%ZMM6,%ZMM7 |
(6551) 0x578ca6 VPERMQ %ZMM6,%ZMM6,%ZMM6{%K1} |
(6551) 0x578cac VADDPD %ZMM7,%ZMM4,%ZMM4{%K1} |
(6551) 0x578cb2 VPCMPNEQQ %ZMM3,%ZMM6,%K1 |
(6551) 0x578cb9 KORTESTB %K1,%K1 |
(6551) 0x578cbd JNE 578ca0 |
(6550) 0x578cbf JMP 578c20 |
0x578cd0 MOV -0x50(%RBP),%RSI |
0x578cd4 CMP %R10,%RCX |
0x578cd7 JNE 578cde |
0x578cd9 JMP 578d16 |
0x578cdb XOR %R10D,%R10D |
0x578cde ADD %R11,%R10 |
0x578ce1 NOPW %CS:(%RAX,%RAX,1) |
(6549) 0x578cf0 VMOVSD (%R15,%R9,8),%XMM4 |
(6549) 0x578cf6 MOV (%R14,%R10,8),%RAX |
(6549) 0x578cfa VMOVSD (%R12,%R10,8),%XMM5 |
(6549) 0x578d00 INC %R10 |
(6549) 0x578d03 ADD %R13,%RAX |
(6549) 0x578d06 VFMADD213SD (%RBX,%RAX,8),%XMM4,%XMM5 |
(6549) 0x578d0c VMOVSD %XMM5,(%RBX,%RAX,8) |
(6549) 0x578d11 CMP %R10,%R8 |
(6549) 0x578d14 JNE 578cf0 |
0x578d16 MOV 0x18(%RSI,%RDI,8),%R8 |
0x578d1b MOV 0x10(%RSI,%RDI,8),%R11 |
0x578d20 MOV %R8,%RCX |
0x578d23 SUB %R11,%RCX |
0x578d26 JLE 578e46 |
0x578d2c MOV -0x40(%RBP),%RAX |
0x578d30 MOV -0x58(%RBP),%RDX |
0x578d34 MOV %RCX,%R10 |
0x578d37 AND $-0x8,%R10 |
0x578d3b LEA 0x2(%RDX,%RAX,1),%R9 |
0x578d40 JE 578e0b |
0x578d46 LEA -0x1(%R10),%RSI |
0x578d4a XOR %EAX,%EAX |
0x578d4c JMP 578d8a |
(6547) 0x578d50 KXNORW %K0,%K0,%K1 |
(6547) 0x578d54 VXORPD %XMM7,%XMM7,%XMM7 |
(6547) 0x578d58 VPSLLQ $0x3,%ZMM5,%ZMM6 |
(6547) 0x578d5f ADD $0x8,%RAX |
(6547) 0x578d63 VGATHERQPD (%RBX,%ZMM5,8),%ZMM7{%K1} |
(6547) 0x578d6a VPADDQ %ZMM6,%ZMM0,%ZMM6 |
(6547) 0x578d70 KXNORW %K0,%K0,%K1 |
(6547) 0x578d74 VADDPD %ZMM4,%ZMM7,%ZMM4 |
(6547) 0x578d7a VSCATTERQPD %ZMM4,(,%ZMM6,1){%K1} |
(6547) 0x578d85 CMP %RSI,%RAX |
(6547) 0x578d88 JA 578e00 |
(6547) 0x578d8a VBROADCASTSD (%R15,%R9,8),%ZMM4 |
(6547) 0x578d91 LEA (%R11,%RAX,1),%RDX |
(6547) 0x578d95 VPADDQ (%R14,%RDX,8),%ZMM1,%ZMM5 |
(6547) 0x578d9c VPCONFLICTQ %ZMM5,%ZMM6 |
(6547) 0x578da2 VPTESTMQ %ZMM6,%ZMM6,%K1 |
(6547) 0x578da8 KORTESTB %K1,%K1 |
(6547) 0x578dac VMULPD (%R12,%RDX,8),%ZMM4,%ZMM4 |
(6547) 0x578db3 JE 578d50 |
(6547) 0x578db5 VPLZCNTQ %ZMM6,%ZMM6 |
(6547) 0x578dbb VPSUBQ %ZMM6,%ZMM2,%ZMM6 |
(6547) 0x578dc1 NOPW %CS:(%RAX,%RAX,1) |
(6548) 0x578dd0 VPERMQ %ZMM4,%ZMM6,%ZMM7 |
(6548) 0x578dd6 VPERMQ %ZMM6,%ZMM6,%ZMM6{%K1} |
(6548) 0x578ddc VADDPD %ZMM7,%ZMM4,%ZMM4{%K1} |
(6548) 0x578de2 VPCMPNEQQ %ZMM3,%ZMM6,%K1 |
(6548) 0x578de9 KORTESTB %K1,%K1 |
(6548) 0x578ded JNE 578dd0 |
(6547) 0x578def JMP 578d50 |
0x578e00 MOV -0x50(%RBP),%RSI |
0x578e04 CMP %R10,%RCX |
0x578e07 JNE 578e0e |
0x578e09 JMP 578e46 |
0x578e0b XOR %R10D,%R10D |
0x578e0e ADD %R11,%R10 |
0x578e11 NOPW %CS:(%RAX,%RAX,1) |
(6546) 0x578e20 VMOVSD (%R15,%R9,8),%XMM4 |
(6546) 0x578e26 MOV (%R14,%R10,8),%RAX |
(6546) 0x578e2a VMOVSD (%R12,%R10,8),%XMM5 |
(6546) 0x578e30 INC %R10 |
(6546) 0x578e33 ADD %R13,%RAX |
(6546) 0x578e36 VFMADD213SD (%RBX,%RAX,8),%XMM4,%XMM5 |
(6546) 0x578e3c VMOVSD %XMM5,(%RBX,%RAX,8) |
(6546) 0x578e41 CMP %R10,%R8 |
(6546) 0x578e44 JNE 578e20 |
0x578e46 MOV 0x18(%RSI,%RDI,8),%R10 |
0x578e4b MOV 0x20(%RSI,%RDI,8),%RDI |
0x578e50 MOV -0x58(%RBP),%R11 |
0x578e54 MOV %RDI,%RCX |
0x578e57 SUB %R10,%RCX |
0x578e5a JLE 578ab0 |
0x578e60 MOV -0x40(%RBP),%RAX |
0x578e64 MOV %RCX,%R9 |
0x578e67 AND $-0x8,%R9 |
0x578e6b LEA 0x3(%R11,%RAX,1),%R8 |
0x578e70 JE 578f3f |
0x578e76 LEA -0x1(%R9),%RSI |
0x578e7a XOR %EAX,%EAX |
0x578e7c JMP 578eba |
(6544) 0x578e80 KXNORW %K0,%K0,%K1 |
(6544) 0x578e84 VXORPD %XMM7,%XMM7,%XMM7 |
(6544) 0x578e88 VPSLLQ $0x3,%ZMM5,%ZMM6 |
(6544) 0x578e8f ADD $0x8,%RAX |
(6544) 0x578e93 VGATHERQPD (%RBX,%ZMM5,8),%ZMM7{%K1} |
(6544) 0x578e9a VPADDQ %ZMM6,%ZMM0,%ZMM6 |
(6544) 0x578ea0 KXNORW %K0,%K0,%K1 |
(6544) 0x578ea4 VADDPD %ZMM4,%ZMM7,%ZMM4 |
(6544) 0x578eaa VSCATTERQPD %ZMM4,(,%ZMM6,1){%K1} |
(6544) 0x578eb5 CMP %RSI,%RAX |
(6544) 0x578eb8 JA 578f30 |
(6544) 0x578eba VBROADCASTSD (%R15,%R8,8),%ZMM4 |
(6544) 0x578ec1 LEA (%R10,%RAX,1),%RDX |
(6544) 0x578ec5 VPADDQ (%R14,%RDX,8),%ZMM1,%ZMM5 |
(6544) 0x578ecc VPCONFLICTQ %ZMM5,%ZMM6 |
(6544) 0x578ed2 VPTESTMQ %ZMM6,%ZMM6,%K1 |
(6544) 0x578ed8 KORTESTB %K1,%K1 |
(6544) 0x578edc VMULPD (%R12,%RDX,8),%ZMM4,%ZMM4 |
(6544) 0x578ee3 JE 578e80 |
(6544) 0x578ee5 VPLZCNTQ %ZMM6,%ZMM6 |
(6544) 0x578eeb VPSUBQ %ZMM6,%ZMM2,%ZMM6 |
(6544) 0x578ef1 NOPW %CS:(%RAX,%RAX,1) |
(6545) 0x578f00 VPERMQ %ZMM4,%ZMM6,%ZMM7 |
(6545) 0x578f06 VPERMQ %ZMM6,%ZMM6,%ZMM6{%K1} |
(6545) 0x578f0c VADDPD %ZMM7,%ZMM4,%ZMM4{%K1} |
(6545) 0x578f12 VPCMPNEQQ %ZMM3,%ZMM6,%K1 |
(6545) 0x578f19 KORTESTB %K1,%K1 |
(6545) 0x578f1d JNE 578f00 |
(6544) 0x578f1f JMP 578e80 |
0x578f30 MOV -0x50(%RBP),%RSI |
0x578f34 CMP %R9,%RCX |
0x578f37 JE 578ab0 |
0x578f3d JMP 578f42 |
0x578f3f XOR %R9D,%R9D |
0x578f42 ADD %R10,%R9 |
0x578f45 NOPW %CS:(%RAX,%RAX,1) |
(6543) 0x578f50 VMOVSD (%R15,%R8,8),%XMM4 |
(6543) 0x578f56 MOV (%R14,%R9,8),%RAX |
(6543) 0x578f5a VMOVSD (%R12,%R9,8),%XMM5 |
(6543) 0x578f60 INC %R9 |
(6543) 0x578f63 ADD %R13,%RAX |
(6543) 0x578f66 VFMADD213SD (%RBX,%RAX,8),%XMM4,%XMM5 |
(6543) 0x578f6c VMOVSD %XMM5,(%RBX,%RAX,8) |
(6543) 0x578f71 CMP %R9,%RDI |
(6543) 0x578f74 JNE 578f50 |
0x578f76 JMP 578ab0 |
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 560 - 567 |
-------------------------------------------------------------------------------- |
560: #pragma omp for HYPRE_SMP_SCHEDULE |
561: #endif |
562: for (i = 0; i < num_rows; i++) |
563: { |
564: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
565: { |
566: j = A_j[jj]; |
567: y_data_expand[offset + j] += A_data[jj] * x_data[i]; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.32 |
Bottlenecks | micro-operation queue, |
Function | hypre_CSRMatrixMatvecT.extracted.49 |
Source | csr_matvec.c:560-564 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 14.50 |
CQA cycles if no scalar integer | 14.50 |
CQA cycles if FP arith vectorized | 14.50 |
CQA cycles if fully vectorized | 1.81 |
Front-end cycles | 14.50 |
DIV/SQRT cycles | 11.00 |
P0 cycles | 8.75 |
P1 cycles | 8.75 |
P2 cycles | 8.50 |
P3 cycles | 11.00 |
P4 cycles | 7.67 |
P5 cycles | 7.67 |
P6 cycles | 7.67 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.00 |
P10 cycles | 0.00 |
P11 cycles | 0.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 88.00 |
Nb uops | 87.00 |
Nb loads | 19.00 |
Nb stores | 1.00 |
Nb stack references | 4.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 11.03 |
Bytes prefetched | 0.00 |
Bytes loaded | 152.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.50 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.32 |
Bottlenecks | micro-operation queue, |
Function | hypre_CSRMatrixMatvecT.extracted.49 |
Source | csr_matvec.c:560-564 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 14.50 |
CQA cycles if no scalar integer | 14.50 |
CQA cycles if FP arith vectorized | 14.50 |
CQA cycles if fully vectorized | 1.81 |
Front-end cycles | 14.50 |
DIV/SQRT cycles | 11.00 |
P0 cycles | 8.75 |
P1 cycles | 8.75 |
P2 cycles | 8.50 |
P3 cycles | 11.00 |
P4 cycles | 7.67 |
P5 cycles | 7.67 |
P6 cycles | 7.67 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.00 |
P10 cycles | 0.00 |
P11 cycles | 0.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 88.00 |
Nb uops | 87.00 |
Nb loads | 19.00 |
Nb stores | 1.00 |
Nb stack references | 4.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 11.03 |
Bytes prefetched | 0.00 |
Bytes loaded | 152.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.50 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Path / |
Function | hypre_CSRMatrixMatvecT.extracted.49 |
Source file and lines | csr_matvec.c:560-567 |
Module | exec |
nb instructions | 88 |
nb uops | 87 |
loop length | 356 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 14.50 cycles |
front end | 14.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.00 | 8.75 | 8.75 | 8.50 | 11.00 | 7.67 | 7.67 | 7.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 11.00 | 8.75 | 8.75 | 8.50 | 11.00 | 7.67 | 7.67 | 7.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 14.50 |
Dispatch | 11.00 |
Overall L1 | 14.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x68(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD $0x4,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 5787b1 <hypre_CSRMatrixMatvecT.extracted.49+0x111> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R11,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA (%RAX,%R11,1),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x8(%RSI,%RDI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV (%RSI,%RDI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
SUB %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 578be6 <hypre_CSRMatrixMatvecT.extracted.49+0x546> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RCX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 578bb7 <hypre_CSRMatrixMatvecT.extracted.49+0x517> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA -0x1(%R9),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 578b3a <hypre_CSRMatrixMatvecT.extracted.49+0x49a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
CMP %R9,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 578bba <hypre_CSRMatrixMatvecT.extracted.49+0x51a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 578be6 <hypre_CSRMatrixMatvecT.extracted.49+0x546> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
ADD %R10,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
MOV 0x10(%RSI,%RDI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x8(%RSI,%RDI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
SUB %R11,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 578d16 <hypre_CSRMatrixMatvecT.extracted.49+0x676> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x1(%RDX,%RAX,1),%R9 | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
JE 578cdb <hypre_CSRMatrixMatvecT.extracted.49+0x63b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA -0x1(%R10),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 578c5a <hypre_CSRMatrixMatvecT.extracted.49+0x5ba> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 578cde <hypre_CSRMatrixMatvecT.extracted.49+0x63e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 578d16 <hypre_CSRMatrixMatvecT.extracted.49+0x676> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
ADD %R11,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
MOV 0x18(%RSI,%RDI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x10(%RSI,%RDI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
SUB %R11,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 578e46 <hypre_CSRMatrixMatvecT.extracted.49+0x7a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x2(%RDX,%RAX,1),%R9 | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
JE 578e0b <hypre_CSRMatrixMatvecT.extracted.49+0x76b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA -0x1(%R10),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 578d8a <hypre_CSRMatrixMatvecT.extracted.49+0x6ea> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 578e0e <hypre_CSRMatrixMatvecT.extracted.49+0x76e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 578e46 <hypre_CSRMatrixMatvecT.extracted.49+0x7a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
ADD %R11,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
MOV 0x18(%RSI,%RDI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x20(%RSI,%RDI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x58(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
SUB %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 578ab0 <hypre_CSRMatrixMatvecT.extracted.49+0x410> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x3(%R11,%RAX,1),%R8 | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
JE 578f3f <hypre_CSRMatrixMatvecT.extracted.49+0x89f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA -0x1(%R9),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 578eba <hypre_CSRMatrixMatvecT.extracted.49+0x81a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R9,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 578ab0 <hypre_CSRMatrixMatvecT.extracted.49+0x410> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 578f42 <hypre_CSRMatrixMatvecT.extracted.49+0x8a2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
ADD %R10,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
JMP 578ab0 <hypre_CSRMatrixMatvecT.extracted.49+0x410> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
Function | hypre_CSRMatrixMatvecT.extracted.49 |
Source file and lines | csr_matvec.c:560-567 |
Module | exec |
nb instructions | 88 |
nb uops | 87 |
loop length | 356 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 14.50 cycles |
front end | 14.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.00 | 8.75 | 8.75 | 8.50 | 11.00 | 7.67 | 7.67 | 7.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 11.00 | 8.75 | 8.75 | 8.50 | 11.00 | 7.67 | 7.67 | 7.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 14.50 |
Dispatch | 11.00 |
Overall L1 | 14.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x68(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD $0x4,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 5787b1 <hypre_CSRMatrixMatvecT.extracted.49+0x111> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R11,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA (%RAX,%R11,1),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x8(%RSI,%RDI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV (%RSI,%RDI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
SUB %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 578be6 <hypre_CSRMatrixMatvecT.extracted.49+0x546> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RCX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 578bb7 <hypre_CSRMatrixMatvecT.extracted.49+0x517> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA -0x1(%R9),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 578b3a <hypre_CSRMatrixMatvecT.extracted.49+0x49a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
CMP %R9,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 578bba <hypre_CSRMatrixMatvecT.extracted.49+0x51a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 578be6 <hypre_CSRMatrixMatvecT.extracted.49+0x546> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
ADD %R10,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
MOV 0x10(%RSI,%RDI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x8(%RSI,%RDI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
SUB %R11,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 578d16 <hypre_CSRMatrixMatvecT.extracted.49+0x676> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x1(%RDX,%RAX,1),%R9 | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
JE 578cdb <hypre_CSRMatrixMatvecT.extracted.49+0x63b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA -0x1(%R10),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 578c5a <hypre_CSRMatrixMatvecT.extracted.49+0x5ba> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 578cde <hypre_CSRMatrixMatvecT.extracted.49+0x63e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 578d16 <hypre_CSRMatrixMatvecT.extracted.49+0x676> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
ADD %R11,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
MOV 0x18(%RSI,%RDI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x10(%RSI,%RDI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
SUB %R11,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 578e46 <hypre_CSRMatrixMatvecT.extracted.49+0x7a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x2(%RDX,%RAX,1),%R9 | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
JE 578e0b <hypre_CSRMatrixMatvecT.extracted.49+0x76b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA -0x1(%R10),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 578d8a <hypre_CSRMatrixMatvecT.extracted.49+0x6ea> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 578e0e <hypre_CSRMatrixMatvecT.extracted.49+0x76e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 578e46 <hypre_CSRMatrixMatvecT.extracted.49+0x7a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
ADD %R11,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
MOV 0x18(%RSI,%RDI,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x20(%RSI,%RDI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x58(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
SUB %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 578ab0 <hypre_CSRMatrixMatvecT.extracted.49+0x410> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x3(%R11,%RAX,1),%R8 | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
JE 578f3f <hypre_CSRMatrixMatvecT.extracted.49+0x89f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA -0x1(%R9),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 578eba <hypre_CSRMatrixMatvecT.extracted.49+0x81a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R9,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 578ab0 <hypre_CSRMatrixMatvecT.extracted.49+0x410> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 578f42 <hypre_CSRMatrixMatvecT.extracted.49+0x8a2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
ADD %R10,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
JMP 578ab0 <hypre_CSRMatrixMatvecT.extracted.49+0x410> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |