Loop Id: 147 | Module: libseq_mv.so | Source: csr_matvec.c:307-314 | Coverage: 12.08% |
---|
Loop Id: 147 | Module: libseq_mv.so | Source: csr_matvec.c:307-314 | Coverage: 12.08% |
---|
0xdef8 MOV 0x30(%RSP),%R15 |
0xdefd MOV (%R8,%R12,8),%RDX |
0xdf01 MOV 0x8(%R8,%R12,8),%RCX |
0xdf06 VMOVSD (%R15,%R12,8),%XMM4 |
0xdf0c CMP %RCX,%RDX |
0xdf0f JGE e768 |
0xdf15 SUB %RDX,%RCX |
0xdf18 MOV %RDX,%R15 |
0xdf1b LEA -0x1(%RCX),%RSI |
0xdf1f CMP $0x2,%RSI |
0xdf23 JBE ed41 |
0xdf29 MOV %RCX,%R11 |
0xdf2c LEA (,%RDX,8),%RSI |
0xdf34 VXORPD %XMM6,%XMM6,%XMM6 |
0xdf38 XOR %EAX,%EAX |
0xdf3a SHR $0x2,%R11 |
0xdf3e LEA (%R14,%RSI,1),%R10 |
0xdf42 ADD %R13,%RSI |
0xdf45 SAL $0x5,%R11 |
0xdf49 LEA -0x20(%R11),%RDI |
0xdf4d SHR $0x5,%RDI |
0xdf51 INC %RDI |
0xdf54 AND $0x7,%EDI |
0xdf57 JE e045 |
0xdf5d CMP $0x1,%RDI |
0xdf61 JE e022 |
0xdf67 CMP $0x2,%RDI |
0xdf6b JE e008 |
0xdf71 CMP $0x3,%RDI |
0xdf75 JE dfee |
0xdf77 CMP $0x4,%RDI |
0xdf7b JE dfd5 |
0xdf7d CMP $0x5,%RDI |
0xdf81 JE dfbb |
0xdf83 CMP $0x6,%RDI |
0xdf87 JE dfa1 |
0xdf89 VMOVDQU (%RSI),%YMM9 |
0xdf8d VMOVAPD %YMM12,%YMM1 |
0xdf91 MOV $0x20,%EAX |
0xdf96 VGATHERQPD %YMM1,(%RBX,%YMM9,8),%YMM8 |
0xdf9c VFNMADD231PD (%R10),%YMM8,%YMM6 |
0xdfa1 VMOVDQU (%RSI,%RAX,1),%YMM11 |
0xdfa6 VMOVAPD %YMM12,%YMM14 |
0xdfab VGATHERQPD %YMM14,(%RBX,%YMM11,8),%YMM2 |
0xdfb1 VFNMADD231PD (%R10,%RAX,1),%YMM2,%YMM6 |
0xdfb7 ADD $0x20,%RAX |
0xdfbb VMOVDQU (%RSI,%RAX,1),%YMM10 |
0xdfc0 VMOVAPD %YMM12,%YMM15 |
0xdfc5 VGATHERQPD %YMM15,(%RBX,%YMM10,8),%YMM0 |
0xdfcb VFNMADD231PD (%R10,%RAX,1),%YMM0,%YMM6 |
0xdfd1 ADD $0x20,%RAX |
0xdfd5 VMOVDQU (%RSI,%RAX,1),%YMM7 |
0xdfda VMOVAPD %YMM12,%YMM3 |
0xdfde VGATHERQPD %YMM3,(%RBX,%YMM7,8),%YMM13 |
0xdfe4 VFNMADD231PD (%R10,%RAX,1),%YMM13,%YMM6 |
0xdfea ADD $0x20,%RAX |
0xdfee VMOVDQU (%RSI,%RAX,1),%YMM8 |
0xdff3 VMOVAPD %YMM12,%YMM9 |
0xdff8 VGATHERQPD %YMM9,(%RBX,%YMM8,8),%YMM1 |
0xdffe VFNMADD231PD (%R10,%RAX,1),%YMM1,%YMM6 |
0xe004 ADD $0x20,%RAX |
0xe008 VMOVDQU (%RSI,%RAX,1),%YMM11 |
0xe00d VMOVAPD %YMM12,%YMM14 |
0xe012 VGATHERQPD %YMM14,(%RBX,%YMM11,8),%YMM2 |
0xe018 VFNMADD231PD (%R10,%RAX,1),%YMM2,%YMM6 |
0xe01e ADD $0x20,%RAX |
0xe022 VMOVDQU (%RSI,%RAX,1),%YMM10 |
0xe027 VMOVAPD %YMM12,%YMM15 |
0xe02c VGATHERQPD %YMM15,(%RBX,%YMM10,8),%YMM0 |
0xe032 VFNMADD231PD (%R10,%RAX,1),%YMM0,%YMM6 |
0xe038 ADD $0x20,%RAX |
0xe03c CMP %R11,%RAX |
0xe03f JE e126 |
(148) 0xe045 VMOVDQU (%RSI,%RAX,1),%YMM7 |
(148) 0xe04a VMOVDQU 0x20(%RSI,%RAX,1),%YMM9 |
(148) 0xe050 VMOVAPD %YMM12,%YMM3 |
(148) 0xe054 VMOVAPD %YMM12,%YMM1 |
(148) 0xe058 VMOVDQU 0x40(%RSI,%RAX,1),%YMM14 |
(148) 0xe05e VMOVDQU 0x60(%RSI,%RAX,1),%YMM2 |
(148) 0xe064 VMOVAPD %YMM12,%YMM15 |
(148) 0xe069 VGATHERQPD %YMM3,(%RBX,%YMM7,8),%YMM13 |
(148) 0xe06f VFNMADD132PD (%R10,%RAX,1),%YMM6,%YMM13 |
(148) 0xe075 VMOVAPD %YMM12,%YMM6 |
(148) 0xe079 VMOVDQU 0x80(%RSI,%RAX,1),%YMM7 |
(148) 0xe082 VGATHERQPD %YMM6,(%RBX,%YMM9,8),%YMM8 |
(148) 0xe088 VFNMADD132PD 0x20(%R10,%RAX,1),%YMM13,%YMM8 |
(148) 0xe08f VMOVAPD %YMM12,%YMM3 |
(148) 0xe093 VMOVDQU 0xa0(%RSI,%RAX,1),%YMM6 |
(148) 0xe09c VGATHERQPD %YMM1,(%RBX,%YMM14,8),%YMM11 |
(148) 0xe0a2 VFNMADD132PD 0x40(%R10,%RAX,1),%YMM8,%YMM11 |
(148) 0xe0a9 VMOVAPD %YMM12,%YMM13 |
(148) 0xe0ae VMOVDQU 0xc0(%RSI,%RAX,1),%YMM1 |
(148) 0xe0b7 VGATHERQPD %YMM15,(%RBX,%YMM2,8),%YMM10 |
(148) 0xe0bd VFNMADD132PD 0x60(%R10,%RAX,1),%YMM11,%YMM10 |
(148) 0xe0c4 VMOVAPD %YMM12,%YMM8 |
(148) 0xe0c9 VMOVDQU 0xe0(%RSI,%RAX,1),%YMM15 |
(148) 0xe0d2 VGATHERQPD %YMM3,(%RBX,%YMM7,8),%YMM0 |
(148) 0xe0d8 VMOVAPD %YMM12,%YMM11 |
(148) 0xe0dd VGATHERQPD %YMM13,(%RBX,%YMM6,8),%YMM9 |
(148) 0xe0e3 VFNMADD132PD 0x80(%R10,%RAX,1),%YMM10,%YMM0 |
(148) 0xe0ed VGATHERQPD %YMM8,(%RBX,%YMM1,8),%YMM14 |
(148) 0xe0f3 VGATHERQPD %YMM11,(%RBX,%YMM15,8),%YMM6 |
(148) 0xe0f9 VFNMADD132PD 0xa0(%R10,%RAX,1),%YMM0,%YMM9 |
(148) 0xe103 VFNMADD132PD 0xc0(%R10,%RAX,1),%YMM9,%YMM14 |
(148) 0xe10d VFNMADD132PD 0xe0(%R10,%RAX,1),%YMM14,%YMM6 |
(148) 0xe117 ADD $0x100,%RAX |
(148) 0xe11d CMP %R11,%RAX |
(148) 0xe120 JNE e045 |
0xe126 VEXTRACTF128 $0x1,%YMM6,%XMM10 |
0xe12c MOV %RCX,%R10 |
0xe12f VADDPD %XMM6,%XMM10,%XMM2 |
0xe133 AND $-0x4,%R10 |
0xe137 VADDPD %XMM6,%XMM10,%XMM13 |
0xe13b ADD %R10,%RDX |
0xe13e VUNPCKHPD %XMM2,%XMM2,%XMM3 |
0xe142 VADDPD %XMM2,%XMM3,%XMM7 |
0xe146 VADDSD %XMM7,%XMM4,%XMM14 |
0xe14a TEST $0x3,%CL |
0xe14d JE e19c |
0xe14f SUB %R10,%RCX |
0xe152 CMP $0x1,%RCX |
0xe156 JE e18c |
0xe158 ADD %R15,%R10 |
0xe15b VMOVAPD %XMM5,%XMM0 |
0xe15f VMOVDQU (%R13,%R10,8),%XMM9 |
0xe166 VGATHERQPD %XMM0,(%RBX,%XMM9,8),%XMM8 |
0xe16c VFNMADD132PD (%R14,%R10,8),%XMM13,%XMM8 |
0xe172 VUNPCKHPD %XMM8,%XMM8,%XMM13 |
0xe177 VADDPD %XMM8,%XMM13,%XMM1 |
0xe17c VADDSD %XMM1,%XMM4,%XMM14 |
0xe180 TEST $0x1,%CL |
0xe183 JE e19c |
0xe185 AND $-0x2,%RCX |
0xe189 ADD %RCX,%RDX |
0xe18c MOV (%R13,%RDX,8),%RCX |
0xe191 VMOVSD (%RBX,%RCX,8),%XMM4 |
0xe196 VFNMADD231SD (%R14,%RDX,8),%XMM4,%XMM14 |
0xe19c MOV 0x38(%RSP),%RDX |
0xe1a1 VMOVSD %XMM14,(%RDX,%R12,8) |
0xe1a7 INC %R12 |
0xe1aa CMP %R12,%R9 |
0xe1ad JNE def8 |
0xe768 VMOVSD %XMM4,%XMM4,%XMM14 |
0xe76c JMP e19c |
0xed41 VMOVSD %XMM4,%XMM4,%XMM14 |
0xed45 VXORPD %XMM13,%XMM13,%XMM13 |
0xed4a XOR %R10D,%R10D |
0xed4d JMP e14f |
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 307 - 314 |
-------------------------------------------------------------------------------- |
307: for (i = iBegin; i < iEnd; i++) |
308: { |
309: tempx = b_data[i]; |
310: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
311: { |
312: tempx -= A_data[jj] * x_data[A_j[jj]]; |
313: } |
314: y_data[i] = tempx; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○95.76 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○4.24 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.20 |
CQA speedup if FP arith vectorized | 2.23 |
CQA speedup if fully vectorized | 4.36 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 2.42 |
Bottlenecks | micro-operation queue, |
Function | hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6 |
Source | csr_matvec.c:307-314 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 48.50 |
CQA cycles if no scalar integer | 40.50 |
CQA cycles if FP arith vectorized | 21.70 |
CQA cycles if fully vectorized | 11.13 |
Front-end cycles | 48.50 |
DIV/SQRT cycles | 11.50 |
P0 cycles | 11.50 |
P1 cycles | 11.50 |
P2 cycles | 11.50 |
P3 cycles | 8.00 |
P4 cycles | 8.33 |
P5 cycles | 8.33 |
P6 cycles | 8.33 |
P7 cycles | 20.00 |
P8 cycles | 20.00 |
P9 cycles | 20.00 |
P10 cycles | 20.00 |
P11 cycles | 19.50 |
P12 cycles | 19.50 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 113.00 |
Nb uops | 291.00 |
Nb loads | 32.00 |
Nb stores | 1.00 |
Nb stack references | 2.00 |
FLOP/cycle | 1.48 |
Nb FLOP add-sub | 10.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 31.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 16.33 |
Bytes prefetched | 0.00 |
Bytes loaded | 784.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 62.90 |
Vectorization ratio load | 88.89 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 57.14 |
Vectorization ratio fma | 88.89 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 54.29 |
Vector-efficiency ratio all | 31.55 |
Vector-efficiency ratio load | 43.06 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 19.64 |
Vector-efficiency ratio fma | 43.06 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 29.11 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.20 |
CQA speedup if FP arith vectorized | 2.23 |
CQA speedup if fully vectorized | 4.36 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 2.42 |
Bottlenecks | micro-operation queue, |
Function | hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6 |
Source | csr_matvec.c:307-314 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 48.50 |
CQA cycles if no scalar integer | 40.50 |
CQA cycles if FP arith vectorized | 21.70 |
CQA cycles if fully vectorized | 11.13 |
Front-end cycles | 48.50 |
DIV/SQRT cycles | 11.50 |
P0 cycles | 11.50 |
P1 cycles | 11.50 |
P2 cycles | 11.50 |
P3 cycles | 8.00 |
P4 cycles | 8.33 |
P5 cycles | 8.33 |
P6 cycles | 8.33 |
P7 cycles | 20.00 |
P8 cycles | 20.00 |
P9 cycles | 20.00 |
P10 cycles | 20.00 |
P11 cycles | 19.50 |
P12 cycles | 19.50 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 113.00 |
Nb uops | 291.00 |
Nb loads | 32.00 |
Nb stores | 1.00 |
Nb stack references | 2.00 |
FLOP/cycle | 1.48 |
Nb FLOP add-sub | 10.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 31.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 16.33 |
Bytes prefetched | 0.00 |
Bytes loaded | 784.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 62.90 |
Vectorization ratio load | 88.89 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 57.14 |
Vectorization ratio fma | 88.89 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 54.29 |
Vector-efficiency ratio all | 31.55 |
Vector-efficiency ratio load | 43.06 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 19.64 |
Vector-efficiency ratio fma | 43.06 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 29.11 |
Path / |
Function | hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6 |
Source file and lines | csr_matvec.c:307-314 |
Module | libseq_mv.so |
nb instructions | 113 |
nb uops | 291 |
loop length | 500 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 13 |
used ymm registers | 14 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 48.50 cycles |
front end | 48.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.50 | 11.50 | 11.50 | 11.50 | 8.00 | 8.33 | 8.33 | 8.33 | 20.00 | 20.00 | 20.00 | 20.00 | 19.50 | 19.50 |
cycles | 11.50 | 11.50 | 11.50 | 11.50 | 8.00 | 8.33 | 8.33 | 8.33 | 20.00 | 20.00 | 20.00 | 20.00 | 19.50 | 19.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 48.50 |
Dispatch | 20.00 |
Overall L1 | 48.50 |
all | 38% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 75% |
load | 84% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | 88% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 82% |
all | 62% |
load | 88% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 57% |
fma | 88% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 54% |
all | 25% |
load | 46% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 34% |
load | 41% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 20% |
fma | 43% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 38% |
all | 31% |
load | 43% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 19% |
fma | 43% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 29% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%R8,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%R8,%R12,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%R15,%R12,8),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE e768 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x1f68> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
SUB %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA -0x1(%RCX),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x2,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JBE ed41 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x2541> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA (,%RDX,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
SHR $0x2,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R14,%RSI,1),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R13,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SAL $0x5,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA -0x20(%R11),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SHR $0x5,%RDI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
INC %RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
AND $0x7,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JE e045 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x1845> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x1,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE e022 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x1822> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE e008 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x1808> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x3,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE dfee <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x17ee> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE dfd5 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x17d5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x5,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE dfbb <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x17bb> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x6,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE dfa1 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x17a1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVDQU (%RSI),%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
MOV $0x20,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VGATHERQPD %YMM1,(%RBX,%YMM9,8),%YMM8 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10),%YMM8,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
VMOVDQU (%RSI,%RAX,1),%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM14,(%RBX,%YMM11,8),%YMM2 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM2,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RAX,1),%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM15,(%RBX,%YMM10,8),%YMM0 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM0,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RAX,1),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM3,(%RBX,%YMM7,8),%YMM13 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM13,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RAX,1),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM9,(%RBX,%YMM8,8),%YMM1 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM1,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RAX,1),%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM14,(%RBX,%YMM11,8),%YMM2 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM2,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RAX,1),%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM15,(%RBX,%YMM10,8),%YMM0 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM0,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R11,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE e126 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x1926> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VEXTRACTF128 $0x1,%YMM6,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VADDPD %XMM6,%XMM10,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
AND $-0x4,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VADDPD %XMM6,%XMM10,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
ADD %R10,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VUNPCKHPD %XMM2,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM2,%XMM3,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM7,%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x3,%CL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE e19c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x199c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
SUB %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x1,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE e18c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x198c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %R15,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVAPD %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VMOVDQU (%R13,%R10,8),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VGATHERQPD %XMM0,(%RBX,%XMM9,8),%XMM8 | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.25 | 1.58 | 0.58 | 0.58 | 1.50 | 1.50 | 0-16 | 3 | vect (25.0%) |
VFNMADD132PD (%R14,%R10,8),%XMM13,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
VUNPCKHPD %XMM8,%XMM8,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM8,%XMM13,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM1,%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x1,%CL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE e19c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x199c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
AND $-0x2,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV (%R13,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%RBX,%RCX,8),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
VFNMADD231SD (%R14,%RDX,8),%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x38(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD %XMM14,(%RDX,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
INC %R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R12,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JNE def8 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x16f8> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVSD %XMM4,%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JMP e19c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x199c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVSD %XMM4,%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP e14f <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x194f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
Function | hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6 |
Source file and lines | csr_matvec.c:307-314 |
Module | libseq_mv.so |
nb instructions | 113 |
nb uops | 291 |
loop length | 500 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 13 |
used ymm registers | 14 |
used zmm registers | 0 |
nb stack references | 2 |
micro-operation queue | 48.50 cycles |
front end | 48.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.50 | 11.50 | 11.50 | 11.50 | 8.00 | 8.33 | 8.33 | 8.33 | 20.00 | 20.00 | 20.00 | 20.00 | 19.50 | 19.50 |
cycles | 11.50 | 11.50 | 11.50 | 11.50 | 8.00 | 8.33 | 8.33 | 8.33 | 20.00 | 20.00 | 20.00 | 20.00 | 19.50 | 19.50 |
Cycles executing div or sqrt instructions | NA |
Front-end | 48.50 |
Dispatch | 20.00 |
Overall L1 | 48.50 |
all | 38% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 75% |
load | 84% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | 88% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 82% |
all | 62% |
load | 88% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 57% |
fma | 88% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 54% |
all | 25% |
load | 46% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 34% |
load | 41% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 20% |
fma | 43% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 38% |
all | 31% |
load | 43% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 19% |
fma | 43% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 29% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%R8,%R12,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%R8,%R12,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%R15,%R12,8),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE e768 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x1f68> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
SUB %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA -0x1(%RCX),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x2,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JBE ed41 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x2541> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA (,%RDX,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
SHR $0x2,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R14,%RSI,1),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R13,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SAL $0x5,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA -0x20(%R11),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SHR $0x5,%RDI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
INC %RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
AND $0x7,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
JE e045 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x1845> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x1,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE e022 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x1822> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE e008 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x1808> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x3,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE dfee <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x17ee> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE dfd5 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x17d5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x5,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE dfbb <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x17bb> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x6,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE dfa1 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x17a1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVDQU (%RSI),%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
MOV $0x20,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VGATHERQPD %YMM1,(%RBX,%YMM9,8),%YMM8 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10),%YMM8,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
VMOVDQU (%RSI,%RAX,1),%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM14,(%RBX,%YMM11,8),%YMM2 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM2,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RAX,1),%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM15,(%RBX,%YMM10,8),%YMM0 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM0,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RAX,1),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM3,(%RBX,%YMM7,8),%YMM13 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM13,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RAX,1),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM9,(%RBX,%YMM8,8),%YMM1 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM1,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RAX,1),%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM14,(%RBX,%YMM11,8),%YMM2 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM2,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RAX,1),%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM12,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM15,(%RBX,%YMM10,8),%YMM0 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%R10,%RAX,1),%YMM0,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R11,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE e126 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x1926> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VEXTRACTF128 $0x1,%YMM6,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VADDPD %XMM6,%XMM10,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
AND $-0x4,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VADDPD %XMM6,%XMM10,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
ADD %R10,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VUNPCKHPD %XMM2,%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM2,%XMM3,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM7,%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x3,%CL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE e19c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x199c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
SUB %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x1,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE e18c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x198c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %R15,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVAPD %XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VMOVDQU (%R13,%R10,8),%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VGATHERQPD %XMM0,(%RBX,%XMM9,8),%XMM8 | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.25 | 1.58 | 0.58 | 0.58 | 1.50 | 1.50 | 0-16 | 3 | vect (25.0%) |
VFNMADD132PD (%R14,%R10,8),%XMM13,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
VUNPCKHPD %XMM8,%XMM8,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM8,%XMM13,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM1,%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x1,%CL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE e19c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x199c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
AND $-0x2,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV (%R13,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%RBX,%RCX,8),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
VFNMADD231SD (%R14,%RDX,8),%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x38(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD %XMM14,(%RDX,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
INC %R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R12,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JNE def8 <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x16f8> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVSD %XMM4,%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JMP e19c <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x199c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVSD %XMM4,%XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP e14f <hypre_CSRMatrixMatvecOutOfPlace._omp_fn.6+0x194f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |