Function: _Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted | Module: exec | Source: viscosity.cpp:36-64 [...] | Coverage: 2.98% |
---|
Function: _Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted | Module: exec | Source: viscosity.cpp:36-64 [...] | Coverage: 2.98% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/viscosity.cpp: 36 - 64 |
-------------------------------------------------------------------------------- |
36: #pragma omp parallel for simd collapse(2) |
37: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
38: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
39: double ugrad = (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1)) - (xvel0(i, j) + xvel0(i + 0, j + 1)); |
40: double vgrad = (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1)) - (yvel0(i, j) + yvel0(i + 1, j + 0)); |
41: double div = (celldx[i] * (ugrad) + celldy[j] * (vgrad)); |
42: double strain2 = 0.5 * (xvel0(i + 0, j + 1) + xvel0(i + 1, j + 1) - xvel0(i, j) - xvel0(i + 1, j + 0)) / celldy[j] + |
43: 0.5 * (yvel0(i + 1, j + 0) + yvel0(i + 1, j + 1) - yvel0(i, j) - yvel0(i + 0, j + 1)) / celldx[i]; |
44: double pgradx = (pressure(i + 1, j + 0) - pressure(i - 1, j + 0)) / (celldx[i] + celldx[i + 1]); |
45: double pgrady = (pressure(i + 0, j + 1) - pressure(i + 0, j - 1)) / (celldy[j] + celldy[j + 2]); |
46: double pgradx2 = pgradx * pgradx; |
47: double pgrady2 = pgrady * pgrady; |
48: double limiter = ((0.5 * (ugrad) / celldx[i]) * pgradx2 + (0.5 * (vgrad) / celldy[j]) * pgrady2 + strain2 * pgradx * pgrady) / |
49: std::fmax(pgradx2 + pgrady2, g_small); |
50: if ((limiter > 0.0) || (div >= 0.0)) { |
51: viscosity(i, j) = 0.0; |
52: } else { |
53: double dirx = 1.0; |
54: if (pgradx < 0.0) dirx = -1.0; |
55: pgradx = dirx * std::fmax(g_small, std::fabs(pgradx)); |
56: double diry = 1.0; |
57: if (pgradx < 0.0) diry = -1.0; |
58: pgrady = diry * std::fmax(g_small, std::fabs(pgrady)); |
59: double pgrad = std::sqrt(pgradx * pgradx + pgrady * pgrady); |
60: double xgrad = std::fabs(celldx[i] * pgrad / pgradx); |
61: double ygrad = std::fabs(celldy[j] * pgrad / pgrady); |
62: double grad = std::fmin(xgrad, ygrad); |
63: double grad2 = grad * grad; |
64: viscosity(i, j) = 2.0 * density0(i, j) * grad2 * limiter * limiter; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x459fa0 PUSH %RBP |
0x459fa1 MOV %RSP,%RBP |
0x459fa4 PUSH %R15 |
0x459fa6 PUSH %R14 |
0x459fa8 PUSH %R13 |
0x459faa PUSH %R12 |
0x459fac PUSH %RBX |
0x459fad AND $-0x40,%RSP |
0x459fb1 SUB $0x1c0,%RSP |
0x459fb8 MOV %R8,0x20(%RSP) |
0x459fbd MOV 0x48(%RBP),%RAX |
0x459fc1 MOV 0x38(%RBP),%RSI |
0x459fc5 MOV %RSI,0x40(%RSP) |
0x459fca MOV 0x30(%RBP),%R12 |
0x459fce MOV 0x28(%RBP),%RSI |
0x459fd2 MOV %RSI,0x28(%RSP) |
0x459fd7 MOV 0x20(%RBP),%RBX |
0x459fdb MOV 0x18(%RBP),%R14 |
0x459fdf MOV 0x10(%RBP),%R13 |
0x459fe3 MOVL $0,0x34(%RSP) |
0x459feb TEST %RAX,%RAX |
0x459fee JS 45a622 |
0x459ff4 MOV %RDX,0x60(%RSP) |
0x459ff9 MOV %RCX,0x18(%RSP) |
0x459ffe MOV %R9,%R15 |
0x45a001 MOV (%RDI),%ESI |
0x45a003 MOVQ $0,0x50(%RSP) |
0x45a00c MOV %RAX,0x48(%RSP) |
0x45a011 MOVQ $0x1,0x58(%RSP) |
0x45a01a SUB $0x8,%RSP |
0x45a01e LEA 0x60(%RSP),%RAX |
0x45a023 LEA 0x3c(%RSP),%RCX |
0x45a028 LEA 0x58(%RSP),%R8 |
0x45a02d LEA 0x50(%RSP),%R9 |
0x45a032 MOV $0x4905d0,%EDI |
0x45a037 MOV %ESI,0x38(%RSP) |
0x45a03b MOV $0x22,%EDX |
0x45a040 PUSH $0x1 |
0x45a042 PUSH $0x1 |
0x45a044 PUSH %RAX |
0x45a045 CALL 4031d0 <__kmpc_for_static_init_8@plt> |
0x45a04a ADD $0x20,%RSP |
0x45a04e MOV 0x50(%RSP),%RCX |
0x45a053 MOV 0x48(%RSP),%RDX |
0x45a058 CMP %RDX,%RCX |
0x45a05b JA 45a646 |
0x45a061 LEA 0x1(%RCX),%RAX |
0x45a065 INC %RDX |
0x45a068 CMP %RDX,%RAX |
0x45a06b CMOVG %RAX,%RDX |
0x45a06f MOV (%R14),%RAX |
0x45a072 VPBROADCASTQ %RAX,%ZMM19 |
0x45a078 MOV (%RBX),%RAX |
0x45a07b VPBROADCASTQ %RAX,%ZMM20 |
0x45a081 MOV (%R15),%RAX |
0x45a084 VPBROADCASTQ %RAX,%ZMM21 |
0x45a08a MOV (%R13),%RAX |
0x45a08e VPBROADCASTQ %RAX,%ZMM0 |
0x45a094 VMOVDQU64 %ZMM0,0x140(%RSP) |
0x45a09c MOV 0x10(%R14),%RDI |
0x45a0a0 MOV 0x10(%RBX),%R14 |
0x45a0a4 MOV 0x60(%RSP),%RAX |
0x45a0a9 MOV 0x8(%RAX),%RAX |
0x45a0ad MOV %RAX,0x38(%RSP) |
0x45a0b2 MOV 0x18(%RSP),%RAX |
0x45a0b7 MOV 0x8(%RAX),%RBX |
0x45a0bb MOV 0x10(%R15),%R15 |
0x45a0bf MOV 0x10(%R13),%RAX |
0x45a0c3 MOV %RAX,0x18(%RSP) |
0x45a0c8 SUB %RCX,%RDX |
0x45a0cb MOV 0x28(%RSP),%RAX |
0x45a0d0 VPBROADCASTD %EAX,%YMM0 |
0x45a0d6 VMOVDQU %YMM0,0x60(%RSP) |
0x45a0dc VPBROADCASTD %R12D,%YMM0 |
0x45a0e2 VMOVDQU %YMM0,0xa0(%RSP) |
0x45a0eb TEST $-0x8,%EDX |
0x45a0f1 JE 45a665 |
0x45a0f7 MOV %RCX,0xc0(%RSP) |
0x45a0ff VPBROADCASTQ %RCX,%ZMM0 |
0x45a105 VMOVDQU64 0x14671(%RIP),%ZMM26 |
0x45a10f VMOVDQU64 %ZMM0,0x100(%RSP) |
0x45a117 VPADDQ %ZMM26,%ZMM0,%ZMM17 |
0x45a11d MOV %RDX,0x80(%RSP) |
0x45a125 MOV %EDX,%ESI |
0x45a127 AND $-0x8,%ESI |
0x45a12a XOR %R13D,%R13D |
0x45a12d VBROADCASTSD 0x12f31(%RIP),%ZMM28 |
0x45a137 VBROADCASTSD 0x12ed7(%RIP),%ZMM31 |
0x45a141 VPBROADCASTQ 0x12f2d(%RIP),%ZMM27 |
0x45a14b VBROADCASTSD 0x12f03(%RIP),%ZMM30 |
0x45a155 JMP 45a24e |
0x45a15a NOPW (%RAX,%RAX,1) |
(807) 0x45a160 VANDPD %ZMM30,%ZMM6,%ZMM10 |
(807) 0x45a166 VMAXPD %ZMM31,%ZMM10,%ZMM10 |
(807) 0x45a16c VFPCLASSPD $0x50,%ZMM6,%K2 |
(807) 0x45a173 VBROADCASTSD 0x12f13(%RIP),%ZMM11 |
(807) 0x45a17d VXORPD %ZMM11,%ZMM10,%ZMM10{%K2} |
(807) 0x45a183 VANDPD %ZMM30,%ZMM7,%ZMM6 |
(807) 0x45a189 VMAXPD %ZMM31,%ZMM6,%ZMM6 |
(807) 0x45a18f VFPCLASSPD $0x50,%ZMM10,%K2 |
(807) 0x45a196 VXORPD %ZMM11,%ZMM6,%ZMM6{%K2} |
(807) 0x45a19c VMULPD %ZMM10,%ZMM10,%ZMM7 |
(807) 0x45a1a2 VFMADD231PD %ZMM6,%ZMM6,%ZMM7 |
(807) 0x45a1a8 VSQRTPD %ZMM7,%ZMM7 |
(807) 0x45a1ae VMULPD %ZMM3,%ZMM7,%ZMM3 |
(807) 0x45a1b4 VDIVPD %ZMM10,%ZMM3,%ZMM3 |
(807) 0x45a1ba VANDPD %ZMM30,%ZMM3,%ZMM3 |
(807) 0x45a1c0 VMULPD %ZMM4,%ZMM7,%ZMM4 |
(807) 0x45a1c6 VDIVPD %ZMM6,%ZMM4,%ZMM4 |
(807) 0x45a1cc VANDPD %ZMM30,%ZMM4,%ZMM4 |
(807) 0x45a1d2 VPBROADCASTQ %XMM8,%ZMM6 |
(807) 0x45a1d8 VPBROADCASTQ %XMM9,%ZMM7 |
(807) 0x45a1de VPMULLQ %ZMM1,%ZMM7,%ZMM1 |
(807) 0x45a1e4 VPADDQ %ZMM2,%ZMM1,%ZMM1 |
(807) 0x45a1ea VPSLLQ $0x3,%ZMM1,%ZMM1 |
(807) 0x45a1f1 VPADDQ %ZMM1,%ZMM6,%ZMM1 |
(807) 0x45a1f7 VPXOR %XMM2,%XMM2,%XMM2 |
(807) 0x45a1fb KMOVQ %K1,%K2 |
(807) 0x45a200 VGATHERQPD (,%ZMM1,1),%ZMM2{%K2} |
(807) 0x45a20b VMINPD %ZMM4,%ZMM3,%ZMM1 |
(807) 0x45a211 VMULPD %ZMM0,%ZMM1,%ZMM0 |
(807) 0x45a217 VMULPD %ZMM0,%ZMM0,%ZMM0 |
(807) 0x45a21d VADDPD %ZMM2,%ZMM2,%ZMM1 |
(807) 0x45a223 VMULPD %ZMM1,%ZMM0,%ZMM0 |
(807) 0x45a229 MOV 0x18(%RSP),%RAX |
(807) 0x45a22e VSCATTERQPD %ZMM0,(%RAX,%ZMM5,8){%K1} |
(807) 0x45a235 VPADDQ %ZMM27,%ZMM16,%ZMM17 |
(807) 0x45a23b VPADDQ %ZMM27,%ZMM26,%ZMM26 |
(807) 0x45a241 ADD $0x8,%R13 |
(807) 0x45a245 CMP %RSI,%R13 |
(807) 0x45a248 JAE 45a631 |
(807) 0x45a24e VPADDQ 0x100(%RSP),%ZMM26,%ZMM16 |
(807) 0x45a256 MOV 0x40(%RSP),%RAX |
(807) 0x45a25b SUB %R12D,%EAX |
(807) 0x45a25e VPBROADCASTQ %RAX,%ZMM18 |
(807) 0x45a264 VMOVDQA64 %ZMM16,%ZMM0 |
(807) 0x45a26a VMOVDQA64 %ZMM18,%ZMM1 |
(807) 0x45a270 LEA 0x16c9(%RIP),%RAX |
(807) 0x45a277 CALL %RAX |
(807) 0x45a279 LEA 0x1(%R12),%EAX |
(807) 0x45a27e VPBROADCASTD %EAX,%ZMM1 |
(807) 0x45a284 VPMOVQD %ZMM0,%YMM22 |
(807) 0x45a28a VPADDQ %ZMM0,%ZMM1,%ZMM0 |
(807) 0x45a290 VPSLLQ $0x20,%ZMM0,%ZMM0 |
(807) 0x45a297 VPSRAQ $0x20,%ZMM0,%ZMM23 |
(807) 0x45a29e VMOVDQA64 %ZMM17,%ZMM0 |
(807) 0x45a2a4 VMOVDQA64 %ZMM18,%ZMM1 |
(807) 0x45a2aa CALLQ 0x31cf8(%RIP) |
(807) 0x45a2b0 VPMOVQD %ZMM0,%YMM1 |
(807) 0x45a2b6 VPADDD 0x60(%RSP),%YMM1,%YMM5 |
(807) 0x45a2bc VPMOVSXDQ %YMM5,%ZMM1 |
(807) 0x45a2c2 VPXOR %XMM3,%XMM3,%XMM3 |
(807) 0x45a2c6 VPMULLQ %ZMM1,%ZMM19,%ZMM3 |
(807) 0x45a2cc VPADDQ %ZMM3,%ZMM23,%ZMM2 |
(807) 0x45a2d2 VXORPD %XMM6,%XMM6,%XMM6 |
(807) 0x45a2d6 KXNORW %K0,%K0,%K1 |
(807) 0x45a2da VGATHERQPD (%RDI,%ZMM2,8),%ZMM6{%K1} |
(807) 0x45a2e1 MOV 0x28(%RSP),%RCX |
(807) 0x45a2e6 LEA 0x1(%RCX),%EAX |
(807) 0x45a2e9 VPBROADCASTD %EAX,%ZMM2 |
(807) 0x45a2ef VPADDQ %ZMM0,%ZMM2,%ZMM2 |
(807) 0x45a2f5 VPSLLQ $0x20,%ZMM2,%ZMM2 |
(807) 0x45a2fc VPSRAQ $0x20,%ZMM2,%ZMM7 |
(807) 0x45a303 VPXOR %XMM4,%XMM4,%XMM4 |
(807) 0x45a307 VPMULLQ %ZMM7,%ZMM19,%ZMM4 |
(807) 0x45a30d VPADDQ %ZMM4,%ZMM23,%ZMM2 |
(807) 0x45a313 VXORPD %XMM8,%XMM8,%XMM8 |
(807) 0x45a318 KXNORW %K0,%K0,%K1 |
(807) 0x45a31c VGATHERQPD (%RDI,%ZMM2,8),%ZMM8{%K1} |
(807) 0x45a323 VPADDD 0xa0(%RSP),%YMM22,%YMM9 |
(807) 0x45a32b VPMOVSXDQ %YMM9,%ZMM2 |
(807) 0x45a331 VPADDQ %ZMM2,%ZMM3,%ZMM3 |
(807) 0x45a337 VXORPD %XMM10,%XMM10,%XMM10 |
(807) 0x45a33c KXNORW %K0,%K0,%K1 |
(807) 0x45a340 VGATHERQPD (%RDI,%ZMM3,8),%ZMM10{%K1} |
(807) 0x45a347 VPADDQ %ZMM2,%ZMM4,%ZMM3 |
(807) 0x45a34d VXORPD %XMM11,%XMM11,%XMM11 |
(807) 0x45a352 KXNORW %K0,%K0,%K1 |
(807) 0x45a356 VGATHERQPD (%RDI,%ZMM3,8),%ZMM11{%K1} |
(807) 0x45a35d VPXOR %XMM3,%XMM3,%XMM3 |
(807) 0x45a361 VPMULLQ %ZMM7,%ZMM20,%ZMM3 |
(807) 0x45a367 VPADDQ %ZMM2,%ZMM3,%ZMM4 |
(807) 0x45a36d VXORPD %XMM12,%XMM12,%XMM12 |
(807) 0x45a372 KXNORW %K0,%K0,%K1 |
(807) 0x45a376 VGATHERQPD (%R14,%ZMM4,8),%ZMM12{%K1} |
(807) 0x45a37d VPADDQ %ZMM3,%ZMM23,%ZMM3 |
(807) 0x45a383 VXORPD %XMM13,%XMM13,%XMM13 |
(807) 0x45a388 KXNORW %K0,%K0,%K1 |
(807) 0x45a38c VGATHERQPD (%R14,%ZMM3,8),%ZMM13{%K1} |
(807) 0x45a393 VPXOR %XMM3,%XMM3,%XMM3 |
(807) 0x45a397 VPMULLQ %ZMM1,%ZMM20,%ZMM3 |
(807) 0x45a39d VPADDQ %ZMM2,%ZMM3,%ZMM4 |
(807) 0x45a3a3 VXORPD %XMM14,%XMM14,%XMM14 |
(807) 0x45a3a8 KXNORW %K0,%K0,%K1 |
(807) 0x45a3ac VGATHERQPD (%R14,%ZMM4,8),%ZMM14{%K1} |
(807) 0x45a3b3 VPADDQ %ZMM3,%ZMM23,%ZMM3 |
(807) 0x45a3b9 VXORPD %XMM15,%XMM15,%XMM15 |
(807) 0x45a3be KXNORW %K0,%K0,%K1 |
(807) 0x45a3c2 VGATHERQPD (%R14,%ZMM3,8),%ZMM15{%K1} |
(807) 0x45a3c9 VXORPD %XMM3,%XMM3,%XMM3 |
(807) 0x45a3cd KXNORW %K0,%K0,%K1 |
(807) 0x45a3d1 MOV 0x38(%RSP),%RDX |
(807) 0x45a3d6 VGATHERDPD (%RDX,%YMM9,8),%ZMM3{%K1} |
(807) 0x45a3dd VXORPD %XMM4,%XMM4,%XMM4 |
(807) 0x45a3e1 KXNORW %K0,%K0,%K1 |
(807) 0x45a3e5 VGATHERDPD (%RBX,%YMM5,8),%ZMM4{%K1} |
(807) 0x45a3ec VBROADCASTSD 0x12c52(%RIP),%ZMM25 |
(807) 0x45a3f6 VDIVPD %ZMM3,%ZMM25,%ZMM17 |
(807) 0x45a3fc VADDPD %ZMM14,%ZMM12,%ZMM18 |
(807) 0x45a402 VADDPD %ZMM13,%ZMM15,%ZMM24 |
(807) 0x45a408 VDIVPD %ZMM4,%ZMM25,%ZMM25 |
(807) 0x45a40e VSUBPD %ZMM18,%ZMM24,%ZMM18 |
(807) 0x45a414 VPXORD %XMM24,%XMM24,%XMM24 |
(807) 0x45a41a VPMULLQ %ZMM1,%ZMM21,%ZMM24 |
(807) 0x45a420 VPADDQ %ZMM24,%ZMM23,%ZMM23 |
(807) 0x45a426 VXORPD %XMM29,%XMM29,%XMM29 |
(807) 0x45a42c KXNORW %K0,%K0,%K1 |
(807) 0x45a430 VGATHERQPD (%R15,%ZMM23,8),%ZMM29{%K1} |
(807) 0x45a437 LEA -0x1(%R12),%EAX |
(807) 0x45a43c VPBROADCASTD %EAX,%YMM23 |
(807) 0x45a442 VPADDD %YMM22,%YMM23,%YMM22 |
(807) 0x45a448 VPMOVSXDQ %YMM22,%ZMM22 |
(807) 0x45a44e VPADDQ %ZMM22,%ZMM24,%ZMM22 |
(807) 0x45a454 VPXORD %XMM23,%XMM23,%XMM23 |
(807) 0x45a45a KXNORW %K0,%K0,%K1 |
(807) 0x45a45e VGATHERQPD (%R15,%ZMM22,8),%ZMM23{%K1} |
(807) 0x45a465 VMULPD %ZMM17,%ZMM28,%ZMM22 |
(807) 0x45a46b VMULPD %ZMM22,%ZMM18,%ZMM18 |
(807) 0x45a471 VADDPD %ZMM10,%ZMM6,%ZMM22 |
(807) 0x45a477 VADDPD %ZMM8,%ZMM11,%ZMM24 |
(807) 0x45a47d VSUBPD %ZMM22,%ZMM24,%ZMM22 |
(807) 0x45a483 VMULPD %ZMM28,%ZMM25,%ZMM24 |
(807) 0x45a489 VFMADD213PD %ZMM18,%ZMM22,%ZMM24 |
(807) 0x45a48f VADDPD %ZMM6,%ZMM8,%ZMM6 |
(807) 0x45a495 VADDPD %ZMM11,%ZMM10,%ZMM8 |
(807) 0x45a49b VSUBPD %ZMM8,%ZMM6,%ZMM8 |
(807) 0x45a4a1 VPCMPEQD %YMM6,%YMM6,%YMM6 |
(807) 0x45a4a5 VPSUBD %YMM6,%YMM9,%YMM6 |
(807) 0x45a4a9 VXORPD %XMM9,%XMM9,%XMM9 |
(807) 0x45a4ae KXNORW %K0,%K0,%K1 |
(807) 0x45a4b2 VGATHERDPD (%RDX,%YMM6,8),%ZMM9{%K1} |
(807) 0x45a4b9 VADDPD %ZMM12,%ZMM13,%ZMM10 |
(807) 0x45a4bf VPXOR %XMM6,%XMM6,%XMM6 |
(807) 0x45a4c3 VPMULLQ %ZMM7,%ZMM21,%ZMM6 |
(807) 0x45a4c9 VPADDQ %ZMM2,%ZMM6,%ZMM6 |
(807) 0x45a4cf VPXOR %XMM7,%XMM7,%XMM7 |
(807) 0x45a4d3 KXNORW %K0,%K0,%K1 |
(807) 0x45a4d7 VGATHERQPD (%R15,%ZMM6,8),%ZMM7{%K1} |
(807) 0x45a4de VADDPD %ZMM15,%ZMM14,%ZMM11 |
(807) 0x45a4e4 VSUBPD %ZMM23,%ZMM29,%ZMM6 |
(807) 0x45a4ea MOV $-0x1,%EAX |
(807) 0x45a4ef LEA (%RAX,%RCX,1),%EAX |
(807) 0x45a4f2 VPBROADCASTD %EAX,%ZMM12 |
(807) 0x45a4f8 VPADDQ %ZMM0,%ZMM12,%ZMM0 |
(807) 0x45a4fe VPSLLQ $0x20,%ZMM0,%ZMM0 |
(807) 0x45a505 VPSRAQ $0x20,%ZMM0,%ZMM0 |
(807) 0x45a50c VPMULLQ %ZMM0,%ZMM21,%ZMM0 |
(807) 0x45a512 VPADDQ %ZMM2,%ZMM0,%ZMM0 |
(807) 0x45a518 VPXOR %XMM12,%XMM12,%XMM12 |
(807) 0x45a51d KXNORW %K0,%K0,%K1 |
(807) 0x45a521 VGATHERQPD (%R15,%ZMM0,8),%ZMM12{%K1} |
(807) 0x45a528 VADDPD %ZMM3,%ZMM9,%ZMM0 |
(807) 0x45a52e VDIVPD %ZMM0,%ZMM6,%ZMM6 |
(807) 0x45a534 VPADDD 0x142ca(%RIP){1to8},%YMM5,%YMM0 |
(807) 0x45a53e VXORPD %XMM5,%XMM5,%XMM5 |
(807) 0x45a542 KXNORW %K0,%K0,%K1 |
(807) 0x45a546 VGATHERDPD (%RBX,%YMM0,8),%ZMM5{%K1} |
(807) 0x45a54d VSUBPD %ZMM11,%ZMM10,%ZMM0 |
(807) 0x45a553 VSUBPD %ZMM12,%ZMM7,%ZMM7 |
(807) 0x45a559 VADDPD %ZMM4,%ZMM5,%ZMM5 |
(807) 0x45a55f VDIVPD %ZMM5,%ZMM7,%ZMM7 |
(807) 0x45a565 VMULPD %ZMM24,%ZMM7,%ZMM5 |
(807) 0x45a56b VMULPD %ZMM28,%ZMM8,%ZMM9 |
(807) 0x45a571 VMULPD %ZMM17,%ZMM6,%ZMM10 |
(807) 0x45a577 VFMADD213PD %ZMM5,%ZMM9,%ZMM10 |
(807) 0x45a57d VMULPD %ZMM7,%ZMM7,%ZMM5 |
(807) 0x45a583 VMULPD %ZMM6,%ZMM10,%ZMM9 |
(807) 0x45a589 VMULPD %ZMM28,%ZMM0,%ZMM10 |
(807) 0x45a58f VMULPD %ZMM5,%ZMM25,%ZMM11 |
(807) 0x45a595 VFMADD213PD %ZMM9,%ZMM10,%ZMM11 |
(807) 0x45a59b VMULPD %ZMM8,%ZMM3,%ZMM8 |
(807) 0x45a5a1 VFMADD231PD %ZMM0,%ZMM4,%ZMM8 |
(807) 0x45a5a7 VFMADD231PD %ZMM6,%ZMM6,%ZMM5 |
(807) 0x45a5ad VMAXPD %ZMM31,%ZMM5,%ZMM0 |
(807) 0x45a5b3 VDIVPD %ZMM0,%ZMM11,%ZMM0 |
(807) 0x45a5b9 VFPCLASSPD $0x56,%ZMM0,%K1 |
(807) 0x45a5c0 VFPCLASSPD $0x50,%ZMM8,%K1{%K1} |
(807) 0x45a5c7 KNOTB %K1,%K2 |
(807) 0x45a5cb VMOVDQU64 0x140(%RSP),%ZMM5 |
(807) 0x45a5d3 VPMULLQ %ZMM1,%ZMM5,%ZMM5 |
(807) 0x45a5d9 VPADDQ %ZMM2,%ZMM5,%ZMM5 |
(807) 0x45a5df MOV 0x18(%RSP),%RAX |
(807) 0x45a5e4 VXORPD %XMM8,%XMM8,%XMM8 |
(807) 0x45a5e9 VSCATTERQPD %ZMM8,(%RAX,%ZMM5,8){%K2} |
(807) 0x45a5f0 KORTESTB %K1,%K1 |
(807) 0x45a5f4 JE 45a235 |
(807) 0x45a5fa KORTESTB %K1,%K1 |
(807) 0x45a5fe JE 45a60a |
(807) 0x45a600 MOV 0x20(%RSP),%RAX |
(807) 0x45a605 VMOVQ 0x10(%RAX),%XMM8 |
(807) 0x45a60a KORTESTB %K1,%K1 |
(807) 0x45a60e JE 45a160 |
(807) 0x45a614 MOV 0x20(%RSP),%RAX |
(807) 0x45a619 VMOVQ (%RAX),%XMM9 |
(807) 0x45a61d JMP 45a160 |
0x45a622 LEA -0x28(%RBP),%RSP |
0x45a626 POP %RBX |
0x45a627 POP %R12 |
0x45a629 POP %R13 |
0x45a62b POP %R14 |
0x45a62d POP %R15 |
0x45a62f POP %RBP |
0x45a630 RET |
0x45a631 MOV 0x80(%RSP),%RDX |
0x45a639 CMP %RSI,%RDX |
0x45a63c MOV 0xc0(%RSP),%RCX |
0x45a644 JNE 45a66c |
0x45a646 MOV $0x4905f0,%EDI |
0x45a64b MOV 0x30(%RSP),%ESI |
0x45a64f LEA -0x28(%RBP),%RSP |
0x45a653 POP %RBX |
0x45a654 POP %R12 |
0x45a656 POP %R13 |
0x45a658 POP %R14 |
0x45a65a POP %R15 |
0x45a65c POP %RBP |
0x45a65d VZEROUPPER |
0x45a660 JMP 403050 |
0x45a665 XOR %ESI,%ESI |
0x45a667 MOV %RCX,%RAX |
0x45a66a JMP 45a670 |
0x45a66c LEA (%RCX,%RSI,1),%RAX |
0x45a670 VPBROADCASTQ %RAX,%ZMM0 |
0x45a676 VMOVDQU64 0x14100(%RIP),%ZMM25 |
0x45a680 VPADDQ %ZMM25,%ZMM0,%ZMM16 |
0x45a686 SUB %RSI,%RDX |
0x45a689 VPBROADCASTQ %RDX,%ZMM26 |
0x45a68f VPBROADCASTQ 0x129df(%RIP),%ZMM28 |
0x45a699 ADD %RSI,%RCX |
0x45a69c VPBROADCASTQ %RCX,%ZMM0 |
0x45a6a2 VMOVDQU64 %ZMM0,0xc0(%RSP) |
0x45a6aa LEA 0x128f(%RIP),%RSI |
0x45a6b1 LEA 0x1408(%RIP),%R13 |
0x45a6b8 VBROADCASTSD 0x129a6(%RIP),%ZMM31 |
0x45a6c2 LEA -0x1(%R12),%EAX |
0x45a6c7 VPBROADCASTD %EAX,%YMM0 |
0x45a6cd VMOVDQU %YMM0,0x80(%RSP) |
0x45a6d6 JMP 45a7ec |
0x45a6db NOPL (%RAX,%RAX,1) |
(806) 0x45a6e0 VBROADCASTSD 0x1296e(%RIP),%ZMM12 |
(806) 0x45a6ea VANDPD %ZMM12,%ZMM6,%ZMM10 |
(806) 0x45a6f0 VBROADCASTSD 0x1291e(%RIP),%ZMM11 |
(806) 0x45a6fa VMAXPD %ZMM11,%ZMM10,%ZMM10 |
(806) 0x45a700 VFPCLASSPD $0x50,%ZMM6,%K2 |
(806) 0x45a707 VBROADCASTSD 0x1297f(%RIP),%ZMM13 |
(806) 0x45a711 VXORPD %ZMM13,%ZMM10,%ZMM10{%K2} |
(806) 0x45a717 VANDPD %ZMM12,%ZMM7,%ZMM6 |
(806) 0x45a71d VMAXPD %ZMM11,%ZMM6,%ZMM6 |
(806) 0x45a723 VFPCLASSPD $0x50,%ZMM10,%K2 |
(806) 0x45a72a VXORPD %ZMM13,%ZMM6,%ZMM6{%K2} |
(806) 0x45a730 VMULPD %ZMM10,%ZMM10,%ZMM7 |
(806) 0x45a736 VFMADD231PD %ZMM6,%ZMM6,%ZMM7 |
(806) 0x45a73c VSQRTPD %ZMM7,%ZMM7 |
(806) 0x45a742 VMULPD %ZMM3,%ZMM7,%ZMM3 |
(806) 0x45a748 VDIVPD %ZMM10,%ZMM3,%ZMM3 |
(806) 0x45a74e VANDPD %ZMM12,%ZMM3,%ZMM3 |
(806) 0x45a754 VMULPD %ZMM4,%ZMM7,%ZMM4 |
(806) 0x45a75a VDIVPD %ZMM6,%ZMM4,%ZMM4 |
(806) 0x45a760 VANDPD %ZMM12,%ZMM4,%ZMM4 |
(806) 0x45a766 VPBROADCASTQ %XMM8,%ZMM6 |
(806) 0x45a76c VPBROADCASTQ %XMM9,%ZMM7 |
(806) 0x45a772 VPMULLQ %ZMM1,%ZMM7,%ZMM1 |
(806) 0x45a778 VPADDQ %ZMM2,%ZMM1,%ZMM1 |
(806) 0x45a77e VPSLLQ $0x3,%ZMM1,%ZMM1 |
(806) 0x45a785 VPADDQ %ZMM1,%ZMM6,%ZMM1 |
(806) 0x45a78b KMOVQ %K1,%K2 |
(806) 0x45a790 VPXOR %XMM2,%XMM2,%XMM2 |
(806) 0x45a794 VGATHERQPD (,%ZMM1,1),%ZMM2{%K2} |
(806) 0x45a79f VMINPD %ZMM4,%ZMM3,%ZMM1 |
(806) 0x45a7a5 VMULPD %ZMM0,%ZMM1,%ZMM0 |
(806) 0x45a7ab VMULPD %ZMM0,%ZMM0,%ZMM0 |
(806) 0x45a7b1 VADDPD %ZMM2,%ZMM2,%ZMM1 |
(806) 0x45a7b7 VMULPD %ZMM1,%ZMM0,%ZMM0 |
(806) 0x45a7bd MOV 0x18(%RSP),%RAX |
(806) 0x45a7c2 VSCATTERQPD %ZMM0,(%RAX,%ZMM5,8){%K1} |
(806) 0x45a7c9 VPADDQ %ZMM28,%ZMM17,%ZMM0 |
(806) 0x45a7cf VMOVDQA64 %ZMM0,%ZMM16{%K3} |
(806) 0x45a7d5 VPADDQ %ZMM28,%ZMM25,%ZMM25 |
(806) 0x45a7db VPCMPLTUQ %ZMM26,%ZMM25,%K0 |
(806) 0x45a7e2 KORTESTB %K0,%K0 |
(806) 0x45a7e6 JE 45a646 |
(806) 0x45a7ec VPCMPLTUQ %ZMM26,%ZMM25,%K3 |
(806) 0x45a7f3 KORTESTB %K3,%K3 |
(806) 0x45a7f7 VPXOR %XMM0,%XMM0,%XMM0 |
(806) 0x45a7fb JE 45a7cf |
(806) 0x45a7fd VPADDQ 0xc0(%RSP),%ZMM25,%ZMM17 |
(806) 0x45a805 MOV 0x40(%RSP),%RAX |
(806) 0x45a80a SUB %R12D,%EAX |
(806) 0x45a80d VPBROADCASTQ 0x12811(%RIP),%ZMM18 |
(806) 0x45a817 VPBROADCASTQ %RAX,%ZMM18{%K3} |
(806) 0x45a81d VMOVDQA64 %ZMM17,%ZMM0 |
(806) 0x45a823 VMOVDQA64 %ZMM18,%ZMM1 |
(806) 0x45a829 KMOVW %K3,0x100(%RSP) |
(806) 0x45a832 CALL %RSI |
(806) 0x45a834 LEA 0x1(%R12),%EAX |
(806) 0x45a839 VPBROADCASTD %EAX,%ZMM1 |
(806) 0x45a83f VPMOVQD %ZMM0,%YMM22 |
(806) 0x45a845 VPADDQ %ZMM0,%ZMM1,%ZMM0 |
(806) 0x45a84b VPSLLQ $0x20,%ZMM0,%ZMM0 |
(806) 0x45a852 VPSRAQ $0x20,%ZMM0,%ZMM23 |
(806) 0x45a859 VMOVDQA64 %ZMM16,%ZMM0 |
(806) 0x45a85f VMOVDQA64 %ZMM18,%ZMM1 |
(806) 0x45a865 CALL %R13 |
(806) 0x45a868 KMOVW 0x100(%RSP),%K3 |
(806) 0x45a871 VPMOVQD %ZMM0,%YMM1 |
(806) 0x45a877 VPADDD 0x60(%RSP),%YMM1,%YMM5 |
(806) 0x45a87d VPMOVSXDQ %YMM5,%ZMM1 |
(806) 0x45a883 VPXOR %XMM3,%XMM3,%XMM3 |
(806) 0x45a887 VPMULLQ %ZMM1,%ZMM19,%ZMM3 |
(806) 0x45a88d VPADDQ %ZMM3,%ZMM23,%ZMM2 |
(806) 0x45a893 KMOVQ %K3,%K1 |
(806) 0x45a898 VXORPD %XMM6,%XMM6,%XMM6 |
(806) 0x45a89c VGATHERQPD (%RDI,%ZMM2,8),%ZMM6{%K1} |
(806) 0x45a8a3 MOV 0x28(%RSP),%RCX |
(806) 0x45a8a8 LEA 0x1(%RCX),%EAX |
(806) 0x45a8ab VPBROADCASTD %EAX,%ZMM2 |
(806) 0x45a8b1 VPADDQ %ZMM0,%ZMM2,%ZMM2 |
(806) 0x45a8b7 VPSLLQ $0x20,%ZMM2,%ZMM2 |
(806) 0x45a8be VPSRAQ $0x20,%ZMM2,%ZMM7 |
(806) 0x45a8c5 VPXOR %XMM4,%XMM4,%XMM4 |
(806) 0x45a8c9 VPMULLQ %ZMM7,%ZMM19,%ZMM4 |
(806) 0x45a8cf VPADDQ %ZMM4,%ZMM23,%ZMM2 |
(806) 0x45a8d5 KMOVQ %K3,%K1 |
(806) 0x45a8da VXORPD %XMM8,%XMM8,%XMM8 |
(806) 0x45a8df VGATHERQPD (%RDI,%ZMM2,8),%ZMM8{%K1} |
(806) 0x45a8e6 VPADDD 0xa0(%RSP),%YMM22,%YMM9 |
(806) 0x45a8ee VPMOVSXDQ %YMM9,%ZMM2 |
(806) 0x45a8f4 VPADDQ %ZMM2,%ZMM3,%ZMM3 |
(806) 0x45a8fa KMOVQ %K3,%K1 |
(806) 0x45a8ff VXORPD %XMM10,%XMM10,%XMM10 |
(806) 0x45a904 VGATHERQPD (%RDI,%ZMM3,8),%ZMM10{%K1} |
(806) 0x45a90b VPADDQ %ZMM2,%ZMM4,%ZMM3 |
(806) 0x45a911 KMOVQ %K3,%K1 |
(806) 0x45a916 VXORPD %XMM11,%XMM11,%XMM11 |
(806) 0x45a91b VGATHERQPD (%RDI,%ZMM3,8),%ZMM11{%K1} |
(806) 0x45a922 VPXOR %XMM3,%XMM3,%XMM3 |
(806) 0x45a926 VPMULLQ %ZMM7,%ZMM20,%ZMM3 |
(806) 0x45a92c VPADDQ %ZMM2,%ZMM3,%ZMM4 |
(806) 0x45a932 KMOVQ %K3,%K1 |
(806) 0x45a937 VXORPD %XMM12,%XMM12,%XMM12 |
(806) 0x45a93c VGATHERQPD (%R14,%ZMM4,8),%ZMM12{%K1} |
(806) 0x45a943 VPADDQ %ZMM3,%ZMM23,%ZMM3 |
(806) 0x45a949 KMOVQ %K3,%K1 |
(806) 0x45a94e VXORPD %XMM13,%XMM13,%XMM13 |
(806) 0x45a953 VGATHERQPD (%R14,%ZMM3,8),%ZMM13{%K1} |
(806) 0x45a95a VPXOR %XMM3,%XMM3,%XMM3 |
(806) 0x45a95e VPMULLQ %ZMM1,%ZMM20,%ZMM3 |
(806) 0x45a964 VPADDQ %ZMM2,%ZMM3,%ZMM4 |
(806) 0x45a96a KMOVQ %K3,%K1 |
(806) 0x45a96f VXORPD %XMM14,%XMM14,%XMM14 |
(806) 0x45a974 VGATHERQPD (%R14,%ZMM4,8),%ZMM14{%K1} |
(806) 0x45a97b VPADDQ %ZMM3,%ZMM23,%ZMM3 |
(806) 0x45a981 KMOVQ %K3,%K1 |
(806) 0x45a986 VXORPD %XMM15,%XMM15,%XMM15 |
(806) 0x45a98b VGATHERQPD (%R14,%ZMM3,8),%ZMM15{%K1} |
(806) 0x45a992 KMOVQ %K3,%K1 |
(806) 0x45a997 VXORPD %XMM3,%XMM3,%XMM3 |
(806) 0x45a99b MOV 0x38(%RSP),%RAX |
(806) 0x45a9a0 VGATHERDPD (%RAX,%YMM9,8),%ZMM3{%K1} |
(806) 0x45a9a7 KMOVQ %K3,%K1 |
(806) 0x45a9ac VXORPD %XMM4,%XMM4,%XMM4 |
(806) 0x45a9b0 VGATHERDPD (%RBX,%YMM5,8),%ZMM4{%K1} |
(806) 0x45a9b7 VBROADCASTSD 0x12687(%RIP),%ZMM27 |
(806) 0x45a9c1 VDIVPD %ZMM3,%ZMM27,%ZMM18 |
(806) 0x45a9c7 VADDPD %ZMM14,%ZMM12,%ZMM24 |
(806) 0x45a9cd VPMULLQ %ZMM1,%ZMM21,%ZMM30 |
(806) 0x45a9d3 VPADDQ %ZMM30,%ZMM23,%ZMM23 |
(806) 0x45a9d9 KMOVQ %K3,%K1 |
(806) 0x45a9de VXORPD %XMM29,%XMM29,%XMM29 |
(806) 0x45a9e4 VGATHERQPD (%R15,%ZMM23,8),%ZMM29{%K1} |
(806) 0x45a9eb VADDPD %ZMM13,%ZMM15,%ZMM23 |
(806) 0x45a9f1 VSUBPD %ZMM24,%ZMM23,%ZMM23 |
(806) 0x45a9f7 VMULPD %ZMM18,%ZMM31,%ZMM24 |
(806) 0x45a9fd VPADDD 0x80(%RSP),%YMM22,%YMM22 |
(806) 0x45aa05 VPMOVSXDQ %YMM22,%ZMM22 |
(806) 0x45aa0b VPADDQ %ZMM22,%ZMM30,%ZMM22 |
(806) 0x45aa11 KMOVQ %K3,%K1 |
(806) 0x45aa16 VPXORD %XMM30,%XMM30,%XMM30 |
(806) 0x45aa1c VGATHERQPD (%R15,%ZMM22,8),%ZMM30{%K1} |
(806) 0x45aa23 VMULPD %ZMM24,%ZMM23,%ZMM22 |
(806) 0x45aa29 VDIVPD %ZMM4,%ZMM27,%ZMM23 |
(806) 0x45aa2f VADDPD %ZMM10,%ZMM6,%ZMM24 |
(806) 0x45aa35 VADDPD %ZMM8,%ZMM11,%ZMM27 |
(806) 0x45aa3b VSUBPD %ZMM24,%ZMM27,%ZMM24 |
(806) 0x45aa41 VMULPD %ZMM31,%ZMM23,%ZMM27 |
(806) 0x45aa47 VFMADD213PD %ZMM22,%ZMM24,%ZMM27 |
(806) 0x45aa4d VADDPD %ZMM6,%ZMM8,%ZMM6 |
(806) 0x45aa53 VADDPD %ZMM11,%ZMM10,%ZMM8 |
(806) 0x45aa59 VPCMPEQD %YMM10,%YMM10,%YMM10 |
(806) 0x45aa5e VPSUBD %YMM10,%YMM9,%YMM9 |
(806) 0x45aa63 KMOVQ %K3,%K1 |
(806) 0x45aa68 VPXOR %XMM10,%XMM10,%XMM10 |
(806) 0x45aa6d VGATHERDPD (%RAX,%YMM9,8),%ZMM10{%K1} |
(806) 0x45aa74 VSUBPD %ZMM8,%ZMM6,%ZMM8 |
(806) 0x45aa7a VADDPD %ZMM12,%ZMM13,%ZMM6 |
(806) 0x45aa80 VPMULLQ %ZMM7,%ZMM21,%ZMM7 |
(806) 0x45aa86 VPADDQ %ZMM2,%ZMM7,%ZMM7 |
(806) 0x45aa8c KMOVQ %K3,%K1 |
(806) 0x45aa91 VXORPD %XMM9,%XMM9,%XMM9 |
(806) 0x45aa96 VGATHERQPD (%R15,%ZMM7,8),%ZMM9{%K1} |
(806) 0x45aa9d VADDPD %ZMM15,%ZMM14,%ZMM7 |
(806) 0x45aaa3 VSUBPD %ZMM7,%ZMM6,%ZMM11 |
(806) 0x45aaa9 VSUBPD %ZMM30,%ZMM29,%ZMM6 |
(806) 0x45aaaf MOV $-0x1,%EAX |
(806) 0x45aab4 LEA (%RAX,%RCX,1),%EAX |
(806) 0x45aab7 VPBROADCASTD %EAX,%ZMM7 |
(806) 0x45aabd VPADDQ %ZMM0,%ZMM7,%ZMM0 |
(806) 0x45aac3 VPSLLQ $0x20,%ZMM0,%ZMM0 |
(806) 0x45aaca VPSRAQ $0x20,%ZMM0,%ZMM0 |
(806) 0x45aad1 VPMULLQ %ZMM0,%ZMM21,%ZMM0 |
(806) 0x45aad7 VPADDQ %ZMM2,%ZMM0,%ZMM0 |
(806) 0x45aadd KMOVQ %K3,%K1 |
(806) 0x45aae2 VPXOR %XMM7,%XMM7,%XMM7 |
(806) 0x45aae6 VGATHERQPD (%R15,%ZMM0,8),%ZMM7{%K1} |
(806) 0x45aaed VADDPD %ZMM3,%ZMM10,%ZMM0 |
(806) 0x45aaf3 VDIVPD %ZMM0,%ZMM6,%ZMM6 |
(806) 0x45aaf9 VSUBPD %ZMM7,%ZMM9,%ZMM0 |
(806) 0x45aaff VPADDD 0x13cff(%RIP){1to8},%YMM5,%YMM5 |
(806) 0x45ab09 KMOVQ %K3,%K1 |
(806) 0x45ab0e VXORPD %XMM7,%XMM7,%XMM7 |
(806) 0x45ab12 VGATHERDPD (%RBX,%YMM5,8),%ZMM7{%K1} |
(806) 0x45ab19 VADDPD %ZMM4,%ZMM7,%ZMM5 |
(806) 0x45ab1f VDIVPD %ZMM5,%ZMM0,%ZMM7 |
(806) 0x45ab25 VMULPD %ZMM27,%ZMM7,%ZMM0 |
(806) 0x45ab2b VMULPD %ZMM31,%ZMM8,%ZMM5 |
(806) 0x45ab31 VMULPD %ZMM18,%ZMM6,%ZMM9 |
(806) 0x45ab37 VFMADD213PD %ZMM0,%ZMM5,%ZMM9 |
(806) 0x45ab3d VMULPD %ZMM7,%ZMM7,%ZMM0 |
(806) 0x45ab43 VMULPD %ZMM6,%ZMM9,%ZMM5 |
(806) 0x45ab49 VMULPD %ZMM31,%ZMM11,%ZMM9 |
(806) 0x45ab4f VMULPD %ZMM0,%ZMM23,%ZMM10 |
(806) 0x45ab55 VFMADD213PD %ZMM5,%ZMM9,%ZMM10 |
(806) 0x45ab5b VFMADD231PD %ZMM6,%ZMM6,%ZMM0 |
(806) 0x45ab61 VMAXPD 0x124ad(%RIP){1to8},%ZMM0,%ZMM0 |
(806) 0x45ab6b VDIVPD %ZMM0,%ZMM10,%ZMM0 |
(806) 0x45ab71 VMULPD %ZMM8,%ZMM3,%ZMM5 |
(806) 0x45ab77 VFMADD231PD %ZMM11,%ZMM4,%ZMM5 |
(806) 0x45ab7d VFPCLASSPD $0x56,%ZMM0,%K1 |
(806) 0x45ab84 VFPCLASSPD $0x50,%ZMM5,%K0{%K1} |
(806) 0x45ab8b KANDNB %K3,%K0,%K1 |
(806) 0x45ab8f VMOVDQU64 0x140(%RSP),%ZMM5 |
(806) 0x45ab97 VPMULLQ %ZMM1,%ZMM5,%ZMM5 |
(806) 0x45ab9d VPADDQ %ZMM2,%ZMM5,%ZMM5 |
(806) 0x45aba3 MOV 0x18(%RSP),%RAX |
(806) 0x45aba8 VXORPD %XMM8,%XMM8,%XMM8 |
(806) 0x45abad VSCATTERQPD %ZMM8,(%RAX,%ZMM5,8){%K1} |
(806) 0x45abb4 KANDB %K0,%K3,%K1 |
(806) 0x45abb8 KORTESTB %K1,%K1 |
(806) 0x45abbc JE 45a7c9 |
(806) 0x45abc2 KORTESTB %K1,%K1 |
(806) 0x45abc6 JE 45abd2 |
(806) 0x45abc8 MOV 0x20(%RSP),%RAX |
(806) 0x45abcd VMOVQ 0x10(%RAX),%XMM8 |
(806) 0x45abd2 KORTESTB %K1,%K1 |
(806) 0x45abd6 JE 45a6e0 |
(806) 0x45abdc MOV 0x20(%RSP),%RAX |
(806) 0x45abe1 VMOVQ (%RAX),%XMM9 |
(806) 0x45abe5 JMP 45a6e0 |
0x45abea NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | viscosity.cpp:36-64 |
Module | exec |
nb instructions | 137 |
nb uops | 139 |
loop length | 644 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 1 |
used zmm registers | 12 |
nb stack references | 25 |
micro-operation queue | 23.17 cycles |
front end | 23.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 5.80 | 15.67 | 15.67 | 15.00 | 11.00 | 5.70 | 15.00 | 15.00 | 15.00 | 5.80 | 15.67 |
cycles | 5.70 | 5.80 | 15.67 | 15.67 | 15.00 | 11.00 | 5.70 | 15.00 | 15.00 | 15.00 | 5.80 | 15.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.98 |
Stall cycles | 0.00 |
Front-end | 23.17 |
Dispatch | 15.67 |
Overall L1 | 23.17 |
all | 22% |
load | 33% |
store | 30% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 20% |
load | 20% |
store | 30% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 4% |
all | 26% |
load | 41% |
store | 30% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 47% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 25% |
load | 30% |
store | 30% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 47% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 45a622 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x682> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x60(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x58(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x50(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4905d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031d0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 45a646 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6a6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV 0x10(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %R12D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
TEST $-0x8,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 45a665 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RCX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x14671(%RIP),%ZMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPADDQ %ZMM26,%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x12f31(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x12ed7(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x12f2d(%RIP),%ZMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x12f03(%RIP),%ZMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 45a24e <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x2ae> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV 0x80(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xc0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 45a66c <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6cc> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x4905f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 45a670 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6d0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RCX,%RSI,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x14100(%RIP),%ZMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPADDQ %ZMM25,%ZMM0,%ZMM16 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RDX,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x129df(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
ADD %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RCX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
LEA 0x128f(%RIP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x1408(%RIP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x129a6(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
LEA -0x1(%R12),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JMP 45a7ec <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x84c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | viscosity.cpp:36-64 |
Module | exec |
nb instructions | 137 |
nb uops | 139 |
loop length | 644 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 1 |
used zmm registers | 12 |
nb stack references | 25 |
micro-operation queue | 23.17 cycles |
front end | 23.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 5.80 | 15.67 | 15.67 | 15.00 | 11.00 | 5.70 | 15.00 | 15.00 | 15.00 | 5.80 | 15.67 |
cycles | 5.70 | 5.80 | 15.67 | 15.67 | 15.00 | 11.00 | 5.70 | 15.00 | 15.00 | 15.00 | 5.80 | 15.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.98 |
Stall cycles | 0.00 |
Front-end | 23.17 |
Dispatch | 15.67 |
Overall L1 | 23.17 |
all | 22% |
load | 33% |
store | 30% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 20% |
load | 20% |
store | 30% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 4% |
all | 26% |
load | 41% |
store | 30% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 47% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 25% |
load | 30% |
store | 30% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 47% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 45a622 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x682> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x60(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x58(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x50(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4905d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031d0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 45a646 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6a6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%RCX),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV 0x10(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %R12D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
TEST $-0x8,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 45a665 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RCX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x14671(%RIP),%ZMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPADDQ %ZMM26,%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x12f31(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x12ed7(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x12f2d(%RIP),%ZMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x12f03(%RIP),%ZMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 45a24e <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x2ae> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV 0x80(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xc0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 45a66c <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6cc> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x4905f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 45a670 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x6d0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RCX,%RSI,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 0x14100(%RIP),%ZMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPADDQ %ZMM25,%ZMM0,%ZMM16 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RDX,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x129df(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
ADD %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RCX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
LEA 0x128f(%RIP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x1408(%RIP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x129a6(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
LEA -0x1(%R12),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
JMP 45a7ec <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x84c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted– | 2.98 | 2.43 |
○Loop 807 - viscosity.cpp:36-64 - exec | 2.98 | 2.42 |
○Loop 806 - viscosity.cpp:37-64 - exec | 0 | 0 |