Loop Id: 140 | Module: exec | Source: accelerate.cpp:40-53 [...] | Coverage: 3.33% |
---|
Loop Id: 140 | Module: exec | Source: accelerate.cpp:40-53 [...] | Coverage: 3.33% |
---|
0x41a9e0 VMOVDQA64 %ZMM17,%ZMM0 |
0x41a9e6 VMOVDQA64 %ZMM16,%ZMM1 |
0x41a9ec LEA 0x410cd(%RIP),%RAX |
0x41a9f3 CALL %RAX |
0x41a9f5 VPMOVQD %ZMM0,%YMM0 |
0x41a9fb VPADDD 0x160(%RSP),%YMM0,%YMM31 [19] |
0x41aa03 VMOVDQA64 %ZMM17,%ZMM0 |
0x41aa09 VMOVDQA64 %ZMM16,%ZMM1 |
0x41aa0f CALLQ 0x715bb(%RIP) |
0x41aa15 VPMOVQD %ZMM0,%YMM0 |
0x41aa1b VPADDD 0x140(%RSP),%YMM0,%YMM2 [19] |
0x41aa24 VPCMPEQD %YMM1,%YMM1,%YMM1 |
0x41aa28 VPADDD %YMM1,%YMM2,%YMM0 |
0x41aa2c VPADDD %YMM1,%YMM31,%YMM1 |
0x41aa32 VPMOVSXDQ %YMM1,%ZMM1 |
0x41aa38 VPXOR %XMM5,%XMM5,%XMM5 |
0x41aa3c VPMULLQ %ZMM1,%ZMM20,%ZMM5 |
0x41aa42 VPMOVSXDQ %YMM0,%ZMM0 |
0x41aa48 KXNORW %K0,%K0,%K1 |
0x41aa4c VXORPD %XMM4,%XMM4,%XMM4 |
0x41aa50 VPMULLQ %ZMM1,%ZMM21,%ZMM8 |
0x41aa56 VPADDQ %ZMM0,%ZMM5,%ZMM7 |
0x41aa5c KXNORW %K0,%K0,%K2 |
0x41aa60 VPMOVSXDQ %YMM31,%ZMM3 |
0x41aa66 VPMULLQ %ZMM3,%ZMM20,%ZMM9 |
0x41aa6c VPADDQ %ZMM0,%ZMM8,%ZMM10 |
0x41aa72 VXORPD %XMM6,%XMM6,%XMM6 |
0x41aa76 VGATHERQPD (%R15,%ZMM7,8),%ZMM4{%K1} [25] |
0x41aa7d VPADDQ %ZMM0,%ZMM9,%ZMM11 |
0x41aa83 VGATHERQPD (%R12,%ZMM10,8),%ZMM6{%K2} [8] |
0x41aa8a KXNORW %K0,%K0,%K1 |
0x41aa8e VXORPD %XMM7,%XMM7,%XMM7 |
0x41aa92 VPXOR %XMM10,%XMM10,%XMM10 |
0x41aa97 VPMULLQ %ZMM3,%ZMM21,%ZMM10 |
0x41aa9d VPADDQ %ZMM0,%ZMM10,%ZMM12 |
0x41aaa3 KXNORW %K0,%K0,%K2 |
0x41aaa7 VGATHERQPD (%R15,%ZMM11,8),%ZMM7{%K1} [11] |
0x41aaae VXORPD %XMM11,%XMM11,%XMM11 |
0x41aab3 VGATHERQPD (%R12,%ZMM12,8),%ZMM11{%K2} [24] |
0x41aaba VPMOVSXDQ %YMM2,%ZMM2 |
0x41aac0 VPADDQ %ZMM2,%ZMM9,%ZMM9 |
0x41aac6 KXNORW %K0,%K0,%K1 |
0x41aaca VXORPD %XMM12,%XMM12,%XMM12 |
0x41aacf VPADDQ %ZMM2,%ZMM10,%ZMM10 |
0x41aad5 KXNORW %K0,%K0,%K2 |
0x41aad9 VGATHERQPD (%R15,%ZMM9,8),%ZMM12{%K1} [21] |
0x41aae0 VXORPD %XMM9,%XMM9,%XMM9 |
0x41aae5 VGATHERQPD (%R12,%ZMM10,8),%ZMM9{%K2} [7] |
0x41aaec VPADDQ %ZMM2,%ZMM5,%ZMM5 |
0x41aaf2 KXNORW %K0,%K0,%K1 |
0x41aaf6 VXORPD %XMM10,%XMM10,%XMM10 |
0x41aafb VPADDQ %ZMM2,%ZMM8,%ZMM8 |
0x41ab01 KXNORW %K0,%K0,%K2 |
0x41ab05 VGATHERQPD (%R15,%ZMM5,8),%ZMM10{%K1} [26] |
0x41ab0c VXORPD %XMM13,%XMM13,%XMM13 |
0x41ab11 VGATHERQPD (%R12,%ZMM8,8),%ZMM13{%K2} [6] |
0x41ab18 VPXOR %XMM5,%XMM5,%XMM5 |
0x41ab1c VPMULLQ %ZMM3,%ZMM24,%ZMM5 |
0x41ab22 KXNORW %K0,%K0,%K1 |
0x41ab26 VXORPD %XMM8,%XMM8,%XMM8 |
0x41ab2b VPADDQ %ZMM2,%ZMM5,%ZMM5 |
0x41ab31 VPMULLQ %ZMM3,%ZMM25,%ZMM14 |
0x41ab37 KXNORW %K0,%K0,%K2 |
0x41ab3b VXORPD %XMM15,%XMM15,%XMM15 |
0x41ab40 VPADDQ %ZMM2,%ZMM14,%ZMM31 |
0x41ab46 VPADDQ %ZMM0,%ZMM14,%ZMM14 |
0x41ab4c VGATHERQPD (%R14,%ZMM5,8),%ZMM8{%K1} [23] |
0x41ab53 KXNORW %K0,%K0,%K1 |
0x41ab57 VGATHERQPD (%RBX,%ZMM31,8),%ZMM15{%K2} [13] |
0x41ab5e VXORPD %XMM18,%XMM18,%XMM18 |
0x41ab64 VGATHERQPD (%RBX,%ZMM14,8),%ZMM18{%K1} [2] |
0x41ab6b VPMULLQ %ZMM1,%ZMM25,%ZMM19 |
0x41ab71 KXNORW %K0,%K0,%K1 |
0x41ab75 VPADDQ %ZMM2,%ZMM19,%ZMM22 |
0x41ab7b VXORPD %XMM23,%XMM23,%XMM23 |
0x41ab81 VGATHERQPD (%RBX,%ZMM22,8),%ZMM23{%K1} [10] |
0x41ab88 VPADDQ %ZMM0,%ZMM19,%ZMM19 |
0x41ab8e KXNORW %K0,%K0,%K1 |
0x41ab92 VMULPD %ZMM4,%ZMM6,%ZMM4 |
0x41ab98 VXORPD %XMM26,%XMM26,%XMM26 |
0x41ab9e VGATHERQPD (%RBX,%ZMM19,8),%ZMM26{%K1} [3] |
0x41aba5 VFMADD213PD %ZMM4,%ZMM7,%ZMM11 |
0x41abab VMOVDQU64 0x1c0(%RSP),%ZMM4 [19] |
0x41abb3 VPMULLQ %ZMM3,%ZMM4,%ZMM4 |
0x41abb9 KXNORW %K0,%K0,%K1 |
0x41abbd VFMADD213PD %ZMM11,%ZMM12,%ZMM9 |
0x41abc3 VXORPD %XMM7,%XMM7,%XMM7 |
0x41abc7 VPXOR %XMM6,%XMM6,%XMM6 |
0x41abcb VPMULLQ %ZMM1,%ZMM24,%ZMM6 |
0x41abd1 VPADDQ %ZMM2,%ZMM6,%ZMM6 |
0x41abd7 VPADDQ %ZMM2,%ZMM4,%ZMM4 |
0x41abdd KXNORW %K0,%K0,%K2 |
0x41abe1 MOV 0x70(%RSP),%RAX [19] |
0x41abe6 VGATHERQPD (%RAX,%ZMM4,8),%ZMM7{%K1} [5] |
0x41abed VXORPD %XMM11,%XMM11,%XMM11 |
0x41abf2 VGATHERQPD (%R14,%ZMM6,8),%ZMM11{%K2} [4] |
0x41abf9 VFMADD213PD %ZMM9,%ZMM10,%ZMM13 |
0x41abff VSUBPD %ZMM15,%ZMM18,%ZMM9 |
0x41ac05 VMULPD 0x52461(%RIP){1to8},%ZMM13,%ZMM4 [16] |
0x41ac0f VMOVUPD 0x200(%RSP),%ZMM10 [19] |
0x41ac17 VDIVPD %ZMM4,%ZMM10,%ZMM4 |
0x41ac1d VMULPD %ZMM8,%ZMM9,%ZMM8 |
0x41ac23 VSUBPD %ZMM23,%ZMM26,%ZMM9 |
0x41ac29 VMOVDQU64 0x180(%RSP),%ZMM10 [19] |
0x41ac31 VPMULLQ %ZMM3,%ZMM10,%ZMM10 |
0x41ac37 VFMADD213PD %ZMM8,%ZMM11,%ZMM9 |
0x41ac3d VPADDQ %ZMM2,%ZMM10,%ZMM8 |
0x41ac43 KXNORW %K0,%K0,%K1 |
0x41ac47 VPXOR %XMM10,%XMM10,%XMM10 |
0x41ac4c VPMULLQ %ZMM3,%ZMM27,%ZMM10 |
0x41ac52 VFMADD213PD %ZMM7,%ZMM4,%ZMM9 |
0x41ac58 VPXOR %XMM11,%XMM11,%XMM11 |
0x41ac5d VPMULLQ %ZMM3,%ZMM28,%ZMM11 |
0x41ac63 KXNORW %K0,%K0,%K2 |
0x41ac67 VXORPD %XMM12,%XMM12,%XMM12 |
0x41ac6c MOV 0x48(%RSP),%RCX [19] |
0x41ac71 VSCATTERQPD %ZMM9,(%RCX,%ZMM8,8){%K1} [14] |
0x41ac78 KXNORW %K0,%K0,%K1 |
0x41ac7c VGATHERQPD (%RBX,%ZMM31,8),%ZMM12{%K2} [13] |
0x41ac83 VXORPD %XMM13,%XMM13,%XMM13 |
0x41ac88 VGATHERQPD (%RBX,%ZMM22,8),%ZMM13{%K1} [10] |
0x41ac8f VPADDQ %ZMM2,%ZMM11,%ZMM7 |
0x41ac95 KXNORW %K0,%K0,%K1 |
0x41ac99 VXORPD %XMM15,%XMM15,%XMM15 |
0x41ac9e KXNORW %K0,%K0,%K2 |
0x41aca2 VXORPD %XMM18,%XMM18,%XMM18 |
0x41aca8 VGATHERQPD (%RDI,%ZMM7,8),%ZMM15{%K1} [15] |
0x41acaf KXNORW %K0,%K0,%K1 |
0x41acb3 VGATHERQPD (%RBX,%ZMM14,8),%ZMM18{%K2} [2] |
0x41acba VXORPD %XMM14,%XMM14,%XMM14 |
0x41acbf VGATHERQPD (%RBX,%ZMM19,8),%ZMM14{%K1} [3] |
0x41acc6 KXNORW %K0,%K0,%K1 |
0x41acca VXORPD %XMM19,%XMM19,%XMM19 |
0x41acd0 VPADDQ %ZMM0,%ZMM11,%ZMM9 |
0x41acd6 VPADDQ %ZMM2,%ZMM10,%ZMM10 |
0x41acdc KXNORW %K0,%K0,%K2 |
0x41ace0 MOV 0xe0(%RSP),%RAX [19] |
0x41ace8 VGATHERQPD (%RAX,%ZMM10,8),%ZMM19{%K1} [12] |
0x41acef VXORPD %XMM10,%XMM10,%XMM10 |
0x41acf4 VGATHERQPD (%RDI,%ZMM9,8),%ZMM10{%K2} [22] |
0x41acfb VSUBPD %ZMM12,%ZMM13,%ZMM11 |
0x41ad01 VMULPD %ZMM15,%ZMM11,%ZMM11 |
0x41ad07 VSUBPD %ZMM18,%ZMM14,%ZMM12 |
0x41ad0d VPXOR %XMM13,%XMM13,%XMM13 |
0x41ad12 VPMULLQ %ZMM3,%ZMM29,%ZMM13 |
0x41ad18 VFMADD213PD %ZMM11,%ZMM10,%ZMM12 |
0x41ad1e VPADDQ %ZMM2,%ZMM13,%ZMM10 |
0x41ad24 VPMULLQ %ZMM3,%ZMM30,%ZMM3 |
0x41ad2a VFMADD213PD %ZMM19,%ZMM4,%ZMM12 |
0x41ad30 KXNORW %K0,%K0,%K1 |
0x41ad34 VPADDQ %ZMM2,%ZMM3,%ZMM11 |
0x41ad3a KXNORW %K0,%K0,%K2 |
0x41ad3e VPXOR %XMM13,%XMM13,%XMM13 |
0x41ad43 VPADDQ %ZMM0,%ZMM3,%ZMM3 |
0x41ad49 MOV 0x68(%RSP),%RAX [19] |
0x41ad4e VSCATTERQPD %ZMM12,(%RAX,%ZMM10,8){%K1} [9] |
0x41ad55 KXNORW %K0,%K0,%K1 |
0x41ad59 VGATHERQPD (%RSI,%ZMM11,8),%ZMM13{%K2} [1] |
0x41ad60 VXORPD %XMM12,%XMM12,%XMM12 |
0x41ad65 VGATHERQPD (%RSI,%ZMM3,8),%ZMM12{%K1} [18] |
0x41ad6c VPMULLQ %ZMM1,%ZMM30,%ZMM1 |
0x41ad72 KXNORW %K0,%K0,%K1 |
0x41ad76 VXORPD %XMM14,%XMM14,%XMM14 |
0x41ad7b VGATHERQPD (%R14,%ZMM5,8),%ZMM14{%K1} [23] |
0x41ad82 KXNORW %K0,%K0,%K1 |
0x41ad86 VPADDQ %ZMM2,%ZMM1,%ZMM2 |
0x41ad8c KXNORW %K0,%K0,%K2 |
0x41ad90 VPADDQ %ZMM0,%ZMM1,%ZMM0 |
0x41ad96 VPXOR %XMM1,%XMM1,%XMM1 |
0x41ad9a VXORPD %XMM5,%XMM5,%XMM5 |
0x41ad9e VGATHERQPD (%R14,%ZMM6,8),%ZMM1{%K1} [4] |
0x41ada5 KXNORW %K0,%K0,%K1 |
0x41ada9 VGATHERQPD (%RSI,%ZMM2,8),%ZMM5{%K2} [20] |
0x41adb0 VXORPD %XMM6,%XMM6,%XMM6 |
0x41adb4 VGATHERQPD (%RSI,%ZMM0,8),%ZMM6{%K1} [17] |
0x41adbb KXNORW %K0,%K0,%K1 |
0x41adbf VSUBPD %ZMM13,%ZMM12,%ZMM12 |
0x41adc5 VXORPD %XMM13,%XMM13,%XMM13 |
0x41adca VGATHERQPD (%RCX,%ZMM8,8),%ZMM13{%K1} [14] |
0x41add1 VMULPD %ZMM14,%ZMM12,%ZMM12 |
0x41add7 VSUBPD %ZMM5,%ZMM6,%ZMM5 |
0x41addd VFMADD213PD %ZMM12,%ZMM1,%ZMM5 |
0x41ade3 VFMADD213PD %ZMM13,%ZMM4,%ZMM5 |
0x41ade9 KXNORW %K0,%K0,%K1 |
0x41aded VSCATTERQPD %ZMM5,(%RCX,%ZMM8,8){%K1} [14] |
0x41adf4 KXNORW %K0,%K0,%K1 |
0x41adf8 VXORPD %XMM1,%XMM1,%XMM1 |
0x41adfc KXNORW %K0,%K0,%K2 |
0x41ae00 VGATHERQPD (%RSI,%ZMM11,8),%ZMM1{%K1} [1] |
0x41ae07 VXORPD %XMM5,%XMM5,%XMM5 |
0x41ae0b VGATHERQPD (%RSI,%ZMM2,8),%ZMM5{%K2} [20] |
0x41ae12 KXNORW %K0,%K0,%K1 |
0x41ae16 VXORPD %XMM2,%XMM2,%XMM2 |
0x41ae1a VGATHERQPD (%RDI,%ZMM7,8),%ZMM2{%K1} [15] |
0x41ae21 KXNORW %K0,%K0,%K1 |
0x41ae25 VXORPD %XMM6,%XMM6,%XMM6 |
0x41ae29 KXNORW %K0,%K0,%K2 |
0x41ae2d VGATHERQPD (%RSI,%ZMM3,8),%ZMM6{%K1} [18] |
0x41ae34 VXORPD %XMM3,%XMM3,%XMM3 |
0x41ae38 VGATHERQPD (%RSI,%ZMM0,8),%ZMM3{%K2} [17] |
0x41ae3f KXNORW %K0,%K0,%K1 |
0x41ae43 VXORPD %XMM0,%XMM0,%XMM0 |
0x41ae47 VGATHERQPD (%RDI,%ZMM9,8),%ZMM0{%K1} [22] |
0x41ae4e KXNORW %K0,%K0,%K1 |
0x41ae52 VSUBPD %ZMM1,%ZMM5,%ZMM1 |
0x41ae58 VXORPD %XMM5,%XMM5,%XMM5 |
0x41ae5c VGATHERQPD (%RAX,%ZMM10,8),%ZMM5{%K1} [9] |
0x41ae63 VMULPD %ZMM2,%ZMM1,%ZMM1 |
0x41ae69 VSUBPD %ZMM6,%ZMM3,%ZMM2 |
0x41ae6f VFMADD213PD %ZMM1,%ZMM0,%ZMM2 |
0x41ae75 VFMADD213PD %ZMM5,%ZMM4,%ZMM2 |
0x41ae7b KXNORW %K0,%K0,%K1 |
0x41ae7f VSCATTERQPD %ZMM2,(%RAX,%ZMM10,8){%K1} [9] |
0x41ae86 VPADDQ 0x521e8(%RIP){1to8},%ZMM17,%ZMM17 [16] |
0x41ae90 ADD $0x8,%R13 |
0x41ae94 CMP 0x38(%RSP),%R13 [19] |
0x41ae99 JB 41a9e0 |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/accelerate.cpp: 40 - 53 |
-------------------------------------------------------------------------------- |
40: #pragma omp parallel for simd collapse(2) |
41: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
42: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
43: double stepbymass_s = halfdt / ((density0(i - 1, j - 1) * volume(i - 1, j - 1) + density0(i - 1, j + 0) * volume(i - 1, j + 0) + |
44: density0(i, j) * volume(i, j) + density0(i + 0, j - 1) * volume(i + 0, j - 1)) * |
45: 0.25); |
46: xvel1(i, j) = xvel0(i, j) - stepbymass_s * (xarea(i, j) * (pressure(i, j) - pressure(i - 1, j + 0)) + |
47: xarea(i + 0, j - 1) * (pressure(i + 0, j - 1) - pressure(i - 1, j - 1))); |
48: yvel1(i, j) = yvel0(i, j) - stepbymass_s * (yarea(i, j) * (pressure(i, j) - pressure(i + 0, j - 1)) + |
49: yarea(i - 1, j + 0) * (pressure(i - 1, j + 0) - pressure(i - 1, j - 1))); |
50: xvel1(i, j) = xvel1(i, j) - stepbymass_s * (xarea(i, j) * (viscosity(i, j) - viscosity(i - 1, j + 0)) + |
51: xarea(i + 0, j - 1) * (viscosity(i + 0, j - 1) - viscosity(i - 1, j - 1))); |
52: yvel1(i, j) = yvel1(i, j) - stepbymass_s * (yarea(i, j) * (viscosity(i, j) - viscosity(i + 0, j - 1)) + |
53: yarea(i - 1, j + 0) * (viscosity(i - 1, j + 0) - viscosity(i - 1, j - 1))); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.18 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.30 |
Bottlenecks | P0, P5, |
Function | _Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted |
Source | accelerate.cpp:40-53,context.h:69-69 |
Source loop unroll info | unrolled by 8 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 8 |
CQA cycles | 130.00 |
CQA cycles if no scalar integer | 110.00 |
CQA cycles if FP arith vectorized | 130.00 |
CQA cycles if fully vectorized | 130.00 |
Front-end cycles | 83.50 |
DIV/SQRT cycles | 130.00 |
P0 cycles | 12.50 |
P1 cycles | 100.33 |
P2 cycles | 100.33 |
P3 cycles | 17.00 |
P4 cycles | 130.00 |
P5 cycles | 4.00 |
P6 cycles | 17.00 |
P7 cycles | 17.00 |
P8 cycles | 17.00 |
P9 cycles | 4.00 |
P10 cycles | 100.33 |
P11 cycles | 16.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | 137.35 - 264.73 |
Stall cycles (UFS) | 75.72 - 202.72 |
Nb insns | 217.00 |
Nb uops | 501.00 |
Nb loads | 49.00 |
Nb stores | 4.00 |
Nb stack references | 10.00 |
FLOP/cycle | 2.28 |
Nb FLOP add-sub | 64.00 |
Nb FLOP mul | 48.00 |
Nb FLOP fma | 88.00 |
Nb FLOP div | 8.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 22.15 |
Bytes prefetched | 0.00 |
Bytes loaded | 2624.00 |
Bytes stored | 256.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 2.00 |
Stride indirect | 20.00 |
Vectorization ratio all | 100.00 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 100.00 |
Vector-efficiency ratio all | 77.99 |
Vector-efficiency ratio load | 97.67 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 94.59 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 61.39 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.18 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.30 |
Bottlenecks | P0, P5, |
Function | _Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted |
Source | accelerate.cpp:40-53,context.h:69-69 |
Source loop unroll info | unrolled by 8 |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | main |
Unroll factor | 8 |
CQA cycles | 130.00 |
CQA cycles if no scalar integer | 110.00 |
CQA cycles if FP arith vectorized | 130.00 |
CQA cycles if fully vectorized | 130.00 |
Front-end cycles | 83.50 |
DIV/SQRT cycles | 130.00 |
P0 cycles | 12.50 |
P1 cycles | 100.33 |
P2 cycles | 100.33 |
P3 cycles | 17.00 |
P4 cycles | 130.00 |
P5 cycles | 4.00 |
P6 cycles | 17.00 |
P7 cycles | 17.00 |
P8 cycles | 17.00 |
P9 cycles | 4.00 |
P10 cycles | 100.33 |
P11 cycles | 16.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | 137.35 - 264.73 |
Stall cycles (UFS) | 75.72 - 202.72 |
Nb insns | 217.00 |
Nb uops | 501.00 |
Nb loads | 49.00 |
Nb stores | 4.00 |
Nb stack references | 10.00 |
FLOP/cycle | 2.28 |
Nb FLOP add-sub | 64.00 |
Nb FLOP mul | 48.00 |
Nb FLOP fma | 88.00 |
Nb FLOP div | 8.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 22.15 |
Bytes prefetched | 0.00 |
Bytes loaded | 2624.00 |
Bytes stored | 256.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 2.00 |
Stride indirect | 20.00 |
Vectorization ratio all | 100.00 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 100.00 |
Vector-efficiency ratio all | 77.99 |
Vector-efficiency ratio load | 97.67 |
Vector-efficiency ratio store | 100.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 94.59 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 61.39 |
Path / |
Function | _Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted |
Source file and lines | accelerate.cpp:40-53 |
Module | exec |
nb instructions | 217 |
nb uops | 501 |
loop length | 1215 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 20 |
used ymm registers | 4 |
used zmm registers | 32 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 83.50 cycles |
front end | 83.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 130.00 | 5.00 | 100.33 | 100.33 | 17.00 | 130.00 | 4.00 | 17.00 | 17.00 | 17.00 | 4.00 | 100.33 |
cycles | 130.00 | 12.50 | 100.33 | 100.33 | 17.00 | 130.00 | 4.00 | 17.00 | 17.00 | 17.00 | 4.00 | 100.33 |
Cycles executing div or sqrt instructions | 16.00 |
Longest recurrence chain latency (RecMII) | 1.00 |
FE+BE cycles | 137.35-264.73 |
Stall cycles | 75.72-202.72 |
RS full (events) | 132.61-35.02 |
Front-end | 83.50 |
Dispatch | 130.00 |
DIV/SQRT | 16.00 |
Data deps. | 1.00 |
Overall L1 | 130.00 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 82% |
load | 80% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 93% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 53% |
all | 74% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 63% |
all | 77% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 94% |
fma | 100% |
div/sqrt | 100% |
other | 61% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVDQA64 %ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA64 %ZMM16,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
LEA 0x410cd(%RIP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL %RAX | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VPMOVQD %ZMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x160(%RSP),%YMM0,%YMM31 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VMOVDQA64 %ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA64 %ZMM16,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
CALLQ 0x715bb(%RIP) | 3 | 0.70 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.70 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 0 | 2.27 |
VPMOVQD %ZMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x140(%RSP),%YMM0,%YMM2 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPCMPEQD %YMM1,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDD %YMM1,%YMM2,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDD %YMM1,%YMM31,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPMOVSXDQ %YMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM20,%ZMM5 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMOVSXDQ %YMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM21,%ZMM8 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM5,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPMOVSXDQ %YMM31,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPMULLQ %ZMM3,%ZMM20,%ZMM9 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM8,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R15,%ZMM7,8),%ZMM4{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM0,%ZMM9,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R12,%ZMM10,8),%ZMM6{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM21,%ZMM10 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM10,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM11,8),%ZMM7{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R12,%ZMM12,8),%ZMM11{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPMOVSXDQ %YMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %ZMM2,%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM9,8),%ZMM12{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R12,%ZMM10,8),%ZMM9{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM2,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM8,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM5,8),%ZMM10{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R12,%ZMM8,8),%ZMM13{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM24,%ZMM5 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM3,%ZMM25,%ZMM14 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM14,%ZMM31 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM0,%ZMM14,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R14,%ZMM5,8),%ZMM8{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RBX,%ZMM31,8),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM14,8),%ZMM18{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPMULLQ %ZMM1,%ZMM25,%ZMM19 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM19,%ZMM22 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM22,8),%ZMM23{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM0,%ZMM19,%ZMM19 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM4,%ZMM6,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM26,%XMM26,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM19,8),%ZMM26{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VFMADD213PD %ZMM4,%ZMM7,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU64 0x1c0(%RSP),%ZMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPMULLQ %ZMM3,%ZMM4,%ZMM4 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VFMADD213PD %ZMM11,%ZMM12,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM24,%ZMM6 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM2,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x70(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VGATHERQPD (%RAX,%ZMM4,8),%ZMM7{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM6,8),%ZMM11{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VFMADD213PD %ZMM9,%ZMM10,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM15,%ZMM18,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD 0x52461(%RIP){1to8},%ZMM13,%ZMM4 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0x200(%RSP),%ZMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VDIVPD %ZMM4,%ZMM10,%ZMM4 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 22-24 | 16 |
VMULPD %ZMM8,%ZMM9,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM23,%ZMM26,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVDQU64 0x180(%RSP),%ZMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPMULLQ %ZMM3,%ZMM10,%ZMM10 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM8,%ZMM11,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ %ZMM2,%ZMM10,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM27,%ZMM10 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM7,%ZMM4,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPXOR %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM28,%ZMM11 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x48(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %ZMM9,(%RCX,%ZMM8,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RBX,%ZMM31,8),%ZMM12{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM22,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM2,%ZMM11,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM7,8),%ZMM15{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RBX,%ZMM14,8),%ZMM18{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM19,8),%ZMM14{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM19,%XMM19,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM0,%ZMM11,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0xe0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VGATHERQPD (%RAX,%ZMM10,8),%ZMM19{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM9,8),%ZMM10{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VSUBPD %ZMM12,%ZMM13,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %ZMM15,%ZMM11,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM18,%ZMM14,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPXOR %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM29,%ZMM13 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM11,%ZMM10,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ %ZMM2,%ZMM13,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM3,%ZMM30,%ZMM3 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM19,%ZMM4,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM3,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM0,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %ZMM12,(%RAX,%ZMM10,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM11,8),%ZMM13{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM3,8),%ZMM12{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPMULLQ %ZMM1,%ZMM30,%ZMM1 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM5,8),%ZMM14{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM0,%ZMM1,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM6,8),%ZMM1{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM2,8),%ZMM5{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM0,8),%ZMM6{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM13,%ZMM12,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RCX,%ZMM8,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VMULPD %ZMM14,%ZMM12,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM5,%ZMM6,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213PD %ZMM12,%ZMM1,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %ZMM13,%ZMM4,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %ZMM5,(%RCX,%ZMM8,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM11,8),%ZMM1{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM2,8),%ZMM5{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM7,8),%ZMM2{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM3,8),%ZMM6{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM0,8),%ZMM3{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM9,8),%ZMM0{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM1,%ZMM5,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM10,8),%ZMM5{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VMULPD %ZMM2,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM6,%ZMM3,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213PD %ZMM1,%ZMM0,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %ZMM5,%ZMM4,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %ZMM2,(%RAX,%ZMM10,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPADDQ 0x521e8(%RIP){1to8},%ZMM17,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
ADD $0x8,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP 0x38(%RSP),%R13 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JB 41a9e0 <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0x340> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
Function | _Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted |
Source file and lines | accelerate.cpp:40-53 |
Module | exec |
nb instructions | 217 |
nb uops | 501 |
loop length | 1215 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 20 |
used ymm registers | 4 |
used zmm registers | 32 |
nb stack references | 10 |
ADD-SUB / MUL ratio | 1.33 |
micro-operation queue | 83.50 cycles |
front end | 83.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 130.00 | 5.00 | 100.33 | 100.33 | 17.00 | 130.00 | 4.00 | 17.00 | 17.00 | 17.00 | 4.00 | 100.33 |
cycles | 130.00 | 12.50 | 100.33 | 100.33 | 17.00 | 130.00 | 4.00 | 17.00 | 17.00 | 17.00 | 4.00 | 100.33 |
Cycles executing div or sqrt instructions | 16.00 |
Longest recurrence chain latency (RecMII) | 1.00 |
FE+BE cycles | 137.35-264.73 |
Stall cycles | 75.72-202.72 |
RS full (events) | 132.61-35.02 |
Front-end | 83.50 |
Dispatch | 130.00 |
DIV/SQRT | 16.00 |
Data deps. | 1.00 |
Overall L1 | 130.00 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 82% |
load | 80% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 93% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 53% |
all | 74% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 63% |
all | 77% |
load | 97% |
store | 100% |
mul | 100% |
add-sub | 94% |
fma | 100% |
div/sqrt | 100% |
other | 61% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVDQA64 %ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA64 %ZMM16,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
LEA 0x410cd(%RIP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL %RAX | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 2.14 |
VPMOVQD %ZMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x160(%RSP),%YMM0,%YMM31 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VMOVDQA64 %ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVDQA64 %ZMM16,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
CALLQ 0x715bb(%RIP) | 3 | 0.70 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.70 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 0 | 2.27 |
VPMOVQD %ZMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDD 0x140(%RSP),%YMM0,%YMM2 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPCMPEQD %YMM1,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPADDD %YMM1,%YMM2,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPADDD %YMM1,%YMM31,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPMOVSXDQ %YMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM20,%ZMM5 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPMOVSXDQ %YMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM21,%ZMM8 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM5,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPMOVSXDQ %YMM31,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPMULLQ %ZMM3,%ZMM20,%ZMM9 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM8,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R15,%ZMM7,8),%ZMM4{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM0,%ZMM9,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R12,%ZMM10,8),%ZMM6{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM21,%ZMM10 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM0,%ZMM10,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM11,8),%ZMM7{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R12,%ZMM12,8),%ZMM11{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPMOVSXDQ %YMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %ZMM2,%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM9,8),%ZMM12{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R12,%ZMM10,8),%ZMM9{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM2,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM8,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%R15,%ZMM5,8),%ZMM10{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R12,%ZMM8,8),%ZMM13{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM24,%ZMM5 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM3,%ZMM25,%ZMM14 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM2,%ZMM14,%ZMM31 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM0,%ZMM14,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VGATHERQPD (%R14,%ZMM5,8),%ZMM8{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RBX,%ZMM31,8),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM14,8),%ZMM18{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPMULLQ %ZMM1,%ZMM25,%ZMM19 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM19,%ZMM22 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM22,8),%ZMM23{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM0,%ZMM19,%ZMM19 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMULPD %ZMM4,%ZMM6,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM26,%XMM26,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM19,8),%ZMM26{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VFMADD213PD %ZMM4,%ZMM7,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVDQU64 0x1c0(%RSP),%ZMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPMULLQ %ZMM3,%ZMM4,%ZMM4 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VFMADD213PD %ZMM11,%ZMM12,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM1,%ZMM24,%ZMM6 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPADDQ %ZMM2,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x70(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VGATHERQPD (%RAX,%ZMM4,8),%ZMM7{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM6,8),%ZMM11{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VFMADD213PD %ZMM9,%ZMM10,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM15,%ZMM18,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD 0x52461(%RIP){1to8},%ZMM13,%ZMM4 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0x200(%RSP),%ZMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VDIVPD %ZMM4,%ZMM10,%ZMM4 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 22-24 | 16 |
VMULPD %ZMM8,%ZMM9,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM23,%ZMM26,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVDQU64 0x180(%RSP),%ZMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPMULLQ %ZMM3,%ZMM10,%ZMM10 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM8,%ZMM11,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ %ZMM2,%ZMM10,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM27,%ZMM10 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM7,%ZMM4,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPXOR %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM28,%ZMM11 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x48(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %ZMM9,(%RCX,%ZMM8,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RBX,%ZMM31,8),%ZMM12{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM22,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPADDQ %ZMM2,%ZMM11,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM7,8),%ZMM15{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RBX,%ZMM14,8),%ZMM18{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RBX,%ZMM19,8),%ZMM14{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM19,%XMM19,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM0,%ZMM11,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM10,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0xe0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VGATHERQPD (%RAX,%ZMM10,8),%ZMM19{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM10,%XMM10,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM9,8),%ZMM10{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VSUBPD %ZMM12,%ZMM13,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD %ZMM15,%ZMM11,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM18,%ZMM14,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPXOR %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPMULLQ %ZMM3,%ZMM29,%ZMM13 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM11,%ZMM10,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ %ZMM2,%ZMM13,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM3,%ZMM30,%ZMM3 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VFMADD213PD %ZMM19,%ZMM4,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM3,%ZMM11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPXOR %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPADDQ %ZMM0,%ZMM3,%ZMM3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSCATTERQPD %ZMM12,(%RAX,%ZMM10,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM11,8),%ZMM13{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM12,%XMM12,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM3,8),%ZMM12{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VPMULLQ %ZMM1,%ZMM30,%ZMM1 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM14,%XMM14,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM5,8),%ZMM14{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM2,%ZMM1,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPADDQ %ZMM0,%ZMM1,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPXOR %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%R14,%ZMM6,8),%ZMM1{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM2,8),%ZMM5{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM0,8),%ZMM6{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM13,%ZMM12,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RCX,%ZMM8,8),%ZMM13{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VMULPD %ZMM14,%ZMM12,%ZMM12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM5,%ZMM6,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213PD %ZMM12,%ZMM1,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %ZMM13,%ZMM4,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %ZMM5,(%RCX,%ZMM8,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM11,8),%ZMM1{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM2,8),%ZMM5{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM7,8),%ZMM2{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
KXNORW %K0,%K0,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VGATHERQPD (%RSI,%ZMM3,8),%ZMM6{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RSI,%ZMM0,8),%ZMM3{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RDI,%ZMM9,8),%ZMM0{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSUBPD %ZMM1,%ZMM5,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD (%RAX,%ZMM10,8),%ZMM5{%K1} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
VMULPD %ZMM2,%ZMM1,%ZMM1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBPD %ZMM6,%ZMM3,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD213PD %ZMM1,%ZMM0,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %ZMM5,%ZMM4,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KXNORW %K0,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VSCATTERQPD %ZMM2,(%RAX,%ZMM10,8){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
VPADDQ 0x521e8(%RIP){1to8},%ZMM17,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
ADD $0x8,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP 0x38(%RSP),%R13 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JB 41a9e0 <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0x340> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |