Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:157-202 [...] | Coverage: 2.56% |
---|
Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:157-202 [...] | Coverage: 2.56% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 157 - 202 |
-------------------------------------------------------------------------------- |
157: #pragma omp parallel for simd collapse(2) |
158: for (int j = (y_min + 1); j < (y_max + 2 + 2); j++) { |
159: for (int i = (x_min + 1); i < (x_max + 2); i++) |
160: ({ |
161: int upwind, donor, downwind, dif; |
162: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
163: if (vol_flux_y(i, j) > 0.0) { |
164: upwind = j - 2; |
165: donor = j - 1; |
166: downwind = j; |
167: dif = donor; |
168: } else { |
169: upwind = std::min(j + 1, y_max + 2); |
170: donor = j; |
171: downwind = j - 1; |
172: dif = upwind; |
173: } |
174: sigmat = std::fabs(vol_flux_y(i, j)) / pre_vol(i, donor); |
175: sigma3 = (1.0 + sigmat) * (vertexdy[j] / vertexdy[dif]); |
176: sigma4 = 2.0 - sigmat; |
177: sigmav = sigmat; |
178: diffuw = density1(i, donor) - density1(i, upwind); |
179: diffdw = density1(i, downwind) - density1(i, donor); |
180: wind = 1.0; |
181: if (diffdw <= 0.0) wind = -1.0; |
182: if (diffuw * diffdw > 0.0) { |
183: limiter = (1.0 - sigmav) * wind * |
184: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
185: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
186: } else { |
187: limiter = 0.0; |
188: } |
189: mass_flux_y(i, j) = vol_flux_y(i, j) * (density1(i, donor) + limiter); |
190: sigmam = std::fabs(mass_flux_y(i, j)) / (density1(i, donor) * pre_vol(i, donor)); |
191: diffuw = energy1(i, donor) - energy1(i, upwind); |
192: diffdw = energy1(i, downwind) - energy1(i, donor); |
193: wind = 1.0; |
194: if (diffdw <= 0.0) wind = -1.0; |
195: if (diffuw * diffdw > 0.0) { |
196: limiter = (1.0 - sigmam) * wind * |
197: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
198: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
199: } else { |
200: limiter = 0.0; |
201: } |
202: ener_flux(i, j) = mass_flux_y(i, j) * (energy1(i, donor) + limiter); |
0x41cf80 PUSH %RBP |
0x41cf81 MOV %RSP,%RBP |
0x41cf84 PUSH %R15 |
0x41cf86 PUSH %R14 |
0x41cf88 PUSH %R13 |
0x41cf8a PUSH %R12 |
0x41cf8c PUSH %RBX |
0x41cf8d AND $-0x40,%RSP |
0x41cf91 SUB $0x180,%RSP |
0x41cf98 MOV 0x50(%RBP),%RAX |
0x41cf9c MOV 0x40(%RBP),%R15 |
0x41cfa0 MOV 0x38(%RBP),%RSI |
0x41cfa4 MOV 0x28(%RBP),%R14 |
0x41cfa8 MOV 0x20(%RBP),%RBX |
0x41cfac MOV 0x18(%RBP),%R12 |
0x41cfb0 MOV 0x10(%RBP),%R13 |
0x41cfb4 MOV 0x30(%RBP),%R10D |
0x41cfb8 MOV %R10D,0x14(%RSP) |
0x41cfbd MOVL $0,0x3c(%RSP) |
0x41cfc5 TEST %RAX,%RAX |
0x41cfc8 JS 41d55a |
0x41cfce MOV %RSI,0x18(%RSP) |
0x41cfd3 MOV %RDX,0x30(%RSP) |
0x41cfd8 MOV %RCX,0x20(%RSP) |
0x41cfdd MOV %R8,0x28(%RSP) |
0x41cfe2 MOV %R9,0x8(%RSP) |
0x41cfe7 MOV (%RDI),%ESI |
0x41cfe9 MOVQ $0,0x78(%RSP) |
0x41cff2 MOV %RAX,0x70(%RSP) |
0x41cff7 MOVQ $0x1,0x98(%RSP) |
0x41d003 SUB $0x8,%RSP |
0x41d007 LEA 0xa0(%RSP),%RAX |
0x41d00f LEA 0x44(%RSP),%RCX |
0x41d014 LEA 0x80(%RSP),%R8 |
0x41d01c LEA 0x78(%RSP),%R9 |
0x41d021 MOV $0x48c470,%EDI |
0x41d026 MOV %ESI,0x40(%RSP) |
0x41d02a MOV $0x22,%EDX |
0x41d02f PUSH $0x1 |
0x41d031 PUSH $0x1 |
0x41d033 PUSH %RAX |
0x41d034 CALL 4031d0 <__kmpc_for_static_init_8@plt> |
0x41d039 ADD $0x20,%RSP |
0x41d03d MOV 0x78(%RSP),%RSI |
0x41d042 MOV 0x70(%RSP),%RAX |
0x41d047 MOV %RAX,0x60(%RSP) |
0x41d04c CMP %RAX,%RSI |
0x41d04f JA 41d578 |
0x41d055 MOV %R15,%RDX |
0x41d058 SUB 0x18(%RSP),%EDX |
0x41d05c MOV (%R12),%R11 |
0x41d060 MOV 0x10(%R12),%RAX |
0x41d065 MOV %RAX,0x68(%RSP) |
0x41d06a MOV (%RBX),%RAX |
0x41d06d MOV (%RAX),%RCX |
0x41d070 MOV %RCX,0x58(%RSP) |
0x41d075 MOV 0x10(%RAX),%R8 |
0x41d079 MOV 0x20(%RSP),%RAX |
0x41d07e MOV 0x8(%RAX),%R12 |
0x41d082 MOV 0x28(%RSP),%RAX |
0x41d087 MOV (%RAX),%RAX |
0x41d08a MOV (%RAX),%RBX |
0x41d08d MOV 0x10(%RAX),%RDI |
0x41d091 MOV (%R13),%RAX |
0x41d095 MOV %RAX,0x50(%RSP) |
0x41d09a MOV 0x10(%R13),%RAX |
0x41d09e MOV %RAX,0x28(%RSP) |
0x41d0a3 MOV 0x30(%RSP),%RCX |
0x41d0a8 ADD $0x2,%ECX |
0x41d0ab LEA 0x1(%RSI),%RAX |
0x41d0af MOV 0x60(%RSP),%R9 |
0x41d0b4 MOV %R14,%R10 |
0x41d0b7 LEA 0x1(%R9),%R14 |
0x41d0bb CMP %R14,%RAX |
0x41d0be CMOVG %RAX,%R14 |
0x41d0c2 MOV 0x8(%RSP),%RAX |
0x41d0c7 MOV (%RAX),%R9 |
0x41d0ca MOV 0x10(%RAX),%R15 |
0x41d0ce MOV (%R10),%R13 |
0x41d0d1 MOV 0x10(%R10),%RAX |
0x41d0d5 MOV %R8,%R10 |
0x41d0d8 MOV %RAX,0x20(%RSP) |
0x41d0dd SUB %RSI,%R14 |
0x41d0e0 TEST $-0x8,%R14D |
0x41d0e7 MOV %RCX,0x30(%RSP) |
0x41d0ec MOV %RDX,0x48(%RSP) |
0x41d0f1 MOV %R11,0x40(%RSP) |
0x41d0f6 MOV %R8,0x8(%RSP) |
0x41d0fb MOV %R9,0x90(%RSP) |
0x41d103 MOV %R13,0x88(%RSP) |
0x41d10b JE 41d597 |
0x41d111 MOV %R11,%R8 |
0x41d114 VPBROADCASTQ %RDX,%ZMM16 |
0x41d11a MOV 0x14(%RSP),%EAX |
0x41d11e VPBROADCASTD %EAX,%YMM0 |
0x41d124 VMOVDQU %YMM0,0xa0(%RSP) |
0x41d12d MOV 0x18(%RSP),%RAX |
0x41d132 VPBROADCASTQ %RAX,%ZMM19 |
0x41d138 VPBROADCASTQ %R11,%ZMM20 |
0x41d13e VPBROADCASTD %ECX,%YMM21 |
0x41d144 MOV 0x58(%RSP),%RAX |
0x41d149 VPBROADCASTQ %RAX,%ZMM22 |
0x41d14f VPBROADCASTQ %RBX,%ZMM23 |
0x41d155 MOV 0x50(%RSP),%RAX |
0x41d15a VPBROADCASTQ %RAX,%ZMM24 |
0x41d160 VPBROADCASTQ %R9,%ZMM25 |
0x41d166 VPBROADCASTQ %R13,%ZMM26 |
0x41d16c VPBROADCASTQ %RSI,%ZMM0 |
0x41d172 VPADDQ 0x50404(%RIP),%ZMM0,%ZMM17 |
0x41d17c MOV %R14,0x80(%RSP) |
0x41d184 MOV %R14D,%R13D |
0x41d187 AND $-0x8,%R13D |
0x41d18b XOR %R14D,%R14D |
0x41d18e VBROADCASTSD 0x4fec0(%RIP),%ZMM28 |
0x41d198 VPBROADCASTQ %R10,%ZMM0 |
0x41d19e VMOVDQU64 %ZMM0,0x100(%RSP) |
0x41d1a6 VBROADCASTSD 0x4fe98(%RIP),%ZMM31 |
0x41d1b0 VPBROADCASTQ %RDI,%ZMM0 |
0x41d1b6 VMOVDQU64 %ZMM0,0xc0(%RSP) |
0x41d1be VBROADCASTSD 0x4fec0(%RIP),%ZMM29 |
0x41d1c8 VBROADCASTSD 0x4febe(%RIP),%ZMM18 |
0x41d1d2 VXORPD %XMM27,%XMM27,%XMM27 |
0x41d1d8 JMP 41d21f |
0x41d1da NOPW (%RAX,%RAX,1) |
(154) 0x41d1e0 VADDPD %ZMM10,%ZMM7,%ZMM7{%K1} |
(154) 0x41d1e6 VMULPD %ZMM5,%ZMM7,%ZMM2 |
(154) 0x41d1ec VPMULLQ %ZMM1,%ZMM26,%ZMM1 |
(154) 0x41d1f2 VPADDQ %ZMM0,%ZMM1,%ZMM0 |
(154) 0x41d1f8 KXNORW %K0,%K0,%K1 |
(154) 0x41d1fc MOV 0x20(%RSP),%RAX |
(154) 0x41d201 VSCATTERQPD %ZMM2,(%RAX,%ZMM0,8){%K1} |
(154) 0x41d208 VPADDQ 0x4fe66(%RIP){1to8},%ZMM17,%ZMM17 |
(154) 0x41d212 ADD $0x8,%R14 |
(154) 0x41d216 CMP %R13,%R14 |
(154) 0x41d219 JAE 41d569 |
(154) 0x41d21f VMOVDQA64 %ZMM17,%ZMM0 |
(154) 0x41d225 VMOVDQA64 %ZMM16,%ZMM1 |
(154) 0x41d22b LEA 0x3e88e(%RIP),%RAX |
(154) 0x41d232 CALL %RAX |
(154) 0x41d234 VPMOVQD %ZMM0,%YMM0 |
(154) 0x41d23a VPADDD 0xa0(%RSP),%YMM0,%YMM30 |
(154) 0x41d242 VMOVDQA64 %ZMM17,%ZMM0 |
(154) 0x41d248 VMOVDQA64 %ZMM16,%ZMM1 |
(154) 0x41d24e CALLQ 0x6ed7c(%RIP) |
(154) 0x41d254 VPADDQ %ZMM19,%ZMM0,%ZMM0 |
(154) 0x41d25a VPSLLQ $0x20,%ZMM0,%ZMM0 |
(154) 0x41d261 VPSRAQ $0x20,%ZMM0,%ZMM0 |
(154) 0x41d268 VPMOVSXDQ %YMM30,%ZMM1 |
(154) 0x41d26e VPXOR %XMM2,%XMM2,%XMM2 |
(154) 0x41d272 VPMULLQ %ZMM1,%ZMM20,%ZMM2 |
(154) 0x41d278 VPADDQ %ZMM0,%ZMM2,%ZMM2 |
(154) 0x41d27e VXORPD %XMM5,%XMM5,%XMM5 |
(154) 0x41d282 KXNORW %K0,%K0,%K1 |
(154) 0x41d286 MOV 0x68(%RSP),%RAX |
(154) 0x41d28b VGATHERQPD (%RAX,%ZMM2,8),%ZMM5{%K1} |
(154) 0x41d292 VCMPPD $0x1,%ZMM5,%ZMM27,%K1 |
(154) 0x41d299 VPCMPEQD %YMM6,%YMM6,%YMM6 |
(154) 0x41d29d VPADDD %YMM6,%YMM30,%YMM2 |
(154) 0x41d2a3 VPMOVSXDQ %YMM2,%ZMM4 |
(154) 0x41d2a9 VPBLENDMQ %ZMM4,%ZMM1,%ZMM7{%K1} |
(154) 0x41d2af VPXOR %XMM2,%XMM2,%XMM2 |
(154) 0x41d2b3 VPMULLQ %ZMM7,%ZMM22,%ZMM2 |
(154) 0x41d2b9 VPADDQ %ZMM0,%ZMM2,%ZMM3 |
(154) 0x41d2bf VPXOR %XMM2,%XMM2,%XMM2 |
(154) 0x41d2c3 KXNORW %K0,%K0,%K2 |
(154) 0x41d2c7 MOV 0x8(%RSP),%RAX |
(154) 0x41d2cc VGATHERQPD (%RAX,%ZMM3,8),%ZMM2{%K2} |
(154) 0x41d2d3 VPSUBD %YMM6,%YMM30,%YMM6 |
(154) 0x41d2d9 VPMINSD %YMM6,%YMM21,%YMM6 |
(154) 0x41d2df VPMOVSXDQ %YMM6,%ZMM6 |
(154) 0x41d2e5 VMOVDQA64 %ZMM6,%ZMM8 |
(154) 0x41d2eb VXORPD %XMM9,%XMM9,%XMM9 |
(154) 0x41d2f0 KXNORW %K0,%K0,%K2 |
(154) 0x41d2f4 VGATHERDPD (%R12,%YMM30,8),%ZMM9{%K2} |
(154) 0x41d2fb VMOVDQA64 %ZMM4,%ZMM6{%K1} |
(154) 0x41d301 VANDPD %ZMM28,%ZMM5,%ZMM10 |
(154) 0x41d307 VDIVPD %ZMM2,%ZMM10,%ZMM10 |
(154) 0x41d30d VXORPD %XMM2,%XMM2,%XMM2 |
(154) 0x41d311 KXNORW %K0,%K0,%K2 |
(154) 0x41d315 VGATHERQPD (%R12,%ZMM6,8),%ZMM2{%K2} |
(154) 0x41d31c VFMADD213PD %ZMM9,%ZMM10,%ZMM9 |
(154) 0x41d322 VDIVPD %ZMM2,%ZMM9,%ZMM2 |
(154) 0x41d328 VPADDD 0x514ce(%RIP){1to8},%YMM30,%YMM9 |
(154) 0x41d332 VPXOR %XMM6,%XMM6,%XMM6 |
(154) 0x41d336 VPMULLQ %ZMM7,%ZMM23,%ZMM6 |
(154) 0x41d33c VPADDQ %ZMM0,%ZMM6,%ZMM6 |
(154) 0x41d342 VXORPD %XMM11,%XMM11,%XMM11 |
(154) 0x41d347 KXNORW %K0,%K0,%K2 |
(154) 0x41d34b VGATHERQPD (%RDI,%ZMM6,8),%ZMM11{%K2} |
(154) 0x41d352 VPMOVSXDQ %YMM9,%ZMM8{%K1} |
(154) 0x41d358 VPXOR %XMM9,%XMM9,%XMM9 |
(154) 0x41d35d VPMULLQ %ZMM8,%ZMM23,%ZMM9 |
(154) 0x41d363 VPADDQ %ZMM0,%ZMM9,%ZMM9 |
(154) 0x41d369 VXORPD %XMM12,%XMM12,%XMM12 |
(154) 0x41d36e KXNORW %K0,%K0,%K2 |
(154) 0x41d372 VGATHERQPD (%RDI,%ZMM9,8),%ZMM12{%K2} |
(154) 0x41d379 VPBLENDMQ %ZMM1,%ZMM4,%ZMM9{%K1} |
(154) 0x41d37f VPXOR %XMM4,%XMM4,%XMM4 |
(154) 0x41d383 VPMULLQ %ZMM9,%ZMM23,%ZMM4 |
(154) 0x41d389 VPADDQ %ZMM0,%ZMM4,%ZMM4 |
(154) 0x41d38f VXORPD %XMM13,%XMM13,%XMM13 |
(154) 0x41d394 KXNORW %K0,%K0,%K1 |
(154) 0x41d398 VGATHERQPD (%RDI,%ZMM4,8),%ZMM13{%K1} |
(154) 0x41d39f VBROADCASTSD 0x4fcd7(%RIP),%ZMM4 |
(154) 0x41d3a9 VSUBPD %ZMM10,%ZMM4,%ZMM4 |
(154) 0x41d3af VSUBPD %ZMM12,%ZMM11,%ZMM12 |
(154) 0x41d3b5 VSUBPD %ZMM11,%ZMM13,%ZMM13 |
(154) 0x41d3bb VMULPD %ZMM12,%ZMM13,%ZMM14 |
(154) 0x41d3c1 VCMPPD $0x1,%ZMM14,%ZMM27,%K1 |
(154) 0x41d3c8 VANDPD %ZMM28,%ZMM12,%ZMM12 |
(154) 0x41d3ce VANDPD %ZMM28,%ZMM13,%ZMM14 |
(154) 0x41d3d4 VMINPD %ZMM14,%ZMM12,%ZMM15 |
(154) 0x41d3da VMULPD %ZMM2,%ZMM12,%ZMM12 |
(154) 0x41d3e0 VFMADD231PD %ZMM14,%ZMM4,%ZMM12 |
(154) 0x41d3e6 VMULPD %ZMM29,%ZMM12,%ZMM12 |
(154) 0x41d3ec VMINPD %ZMM12,%ZMM15,%ZMM12 |
(154) 0x41d3f2 VCMPPD $0x1,%ZMM13,%ZMM27,%K2 |
(154) 0x41d3f9 VSUBPD %ZMM10,%ZMM31,%ZMM10 |
(154) 0x41d3ff VXORPD %ZMM18,%ZMM10,%ZMM13 |
(154) 0x41d405 VMOVAPD %ZMM10,%ZMM13{%K2} |
(154) 0x41d40b VFMADD231PD %ZMM13,%ZMM12,%ZMM11{%K1} |
(154) 0x41d411 VMULPD %ZMM5,%ZMM11,%ZMM5 |
(154) 0x41d417 VPXOR %XMM10,%XMM10,%XMM10 |
(154) 0x41d41c VPMULLQ %ZMM1,%ZMM24,%ZMM10 |
(154) 0x41d422 VPADDQ %ZMM0,%ZMM10,%ZMM10 |
(154) 0x41d428 KXNORW %K0,%K0,%K1 |
(154) 0x41d42c MOV 0x28(%RSP),%RAX |
(154) 0x41d431 VSCATTERQPD %ZMM5,(%RAX,%ZMM10,8){%K1} |
(154) 0x41d438 VPMULLQ %ZMM7,%ZMM25,%ZMM7 |
(154) 0x41d43e VPADDQ %ZMM0,%ZMM7,%ZMM10 |
(154) 0x41d444 VPXOR %XMM7,%XMM7,%XMM7 |
(154) 0x41d448 KXNORW %K0,%K0,%K1 |
(154) 0x41d44c VGATHERQPD (%R15,%ZMM10,8),%ZMM7{%K1} |
(154) 0x41d453 VPMULLQ %ZMM8,%ZMM25,%ZMM8 |
(154) 0x41d459 VPADDQ %ZMM0,%ZMM8,%ZMM8 |
(154) 0x41d45f VXORPD %XMM10,%XMM10,%XMM10 |
(154) 0x41d464 KXNORW %K0,%K0,%K1 |
(154) 0x41d468 VGATHERQPD (%R15,%ZMM8,8),%ZMM10{%K1} |
(154) 0x41d46f VPXOR %XMM8,%XMM8,%XMM8 |
(154) 0x41d474 VPMULLQ %ZMM9,%ZMM25,%ZMM8 |
(154) 0x41d47a VPADDQ %ZMM0,%ZMM8,%ZMM8 |
(154) 0x41d480 VXORPD %XMM11,%XMM11,%XMM11 |
(154) 0x41d485 KXNORW %K0,%K0,%K1 |
(154) 0x41d489 VGATHERQPD (%R15,%ZMM8,8),%ZMM11{%K1} |
(154) 0x41d490 VSUBPD %ZMM10,%ZMM7,%ZMM9 |
(154) 0x41d496 VSUBPD %ZMM7,%ZMM11,%ZMM8 |
(154) 0x41d49c VMULPD %ZMM9,%ZMM8,%ZMM10 |
(154) 0x41d4a2 VCMPPD $0x1,%ZMM10,%ZMM27,%K1 |
(154) 0x41d4a9 KORTESTB %K1,%K1 |
(154) 0x41d4ad VXORPD %XMM10,%XMM10,%XMM10 |
(154) 0x41d4b2 JE 41d1e0 |
(154) 0x41d4b8 VPSLLQ $0x3,%ZMM3,%ZMM3 |
(154) 0x41d4bf VPADDQ 0x100(%RSP),%ZMM3,%ZMM3 |
(154) 0x41d4c7 VPSLLQ $0x3,%ZMM6,%ZMM6 |
(154) 0x41d4ce VPADDQ 0xc0(%RSP),%ZMM6,%ZMM6 |
(154) 0x41d4d6 KMOVQ %K1,%K2 |
(154) 0x41d4db VGATHERQPD (,%ZMM6,1),%ZMM10{%K2} |
(154) 0x41d4e6 VXORPD %XMM6,%XMM6,%XMM6 |
(154) 0x41d4ea KMOVQ %K1,%K2 |
(154) 0x41d4ef VGATHERQPD (,%ZMM3,1),%ZMM6{%K2} |
(154) 0x41d4fa VANDPD %ZMM28,%ZMM5,%ZMM3 |
(154) 0x41d500 VMULPD %ZMM10,%ZMM6,%ZMM6 |
(154) 0x41d506 VDIVPD %ZMM6,%ZMM3,%ZMM3 |
(154) 0x41d50c VCMPPD $0x1,%ZMM8,%ZMM27,%K2 |
(154) 0x41d513 VSUBPD %ZMM3,%ZMM31,%ZMM3 |
(154) 0x41d519 VXORPD %ZMM18,%ZMM3,%ZMM6 |
(154) 0x41d51f VMOVAPD %ZMM3,%ZMM6{%K2} |
(154) 0x41d525 VANDPD %ZMM28,%ZMM9,%ZMM3 |
(154) 0x41d52b VANDPD %ZMM28,%ZMM8,%ZMM8 |
(154) 0x41d531 VMINPD %ZMM8,%ZMM3,%ZMM9 |
(154) 0x41d537 VMULPD %ZMM2,%ZMM3,%ZMM2 |
(154) 0x41d53d VFMADD213PD %ZMM2,%ZMM8,%ZMM4 |
(154) 0x41d543 VMULPD %ZMM29,%ZMM4,%ZMM2 |
(154) 0x41d549 VMINPD %ZMM2,%ZMM9,%ZMM2 |
(154) 0x41d54f VMULPD %ZMM2,%ZMM6,%ZMM10 |
(154) 0x41d555 JMP 41d1e0 |
0x41d55a LEA -0x28(%RBP),%RSP |
0x41d55e POP %RBX |
0x41d55f POP %R12 |
0x41d561 POP %R13 |
0x41d563 POP %R14 |
0x41d565 POP %R15 |
0x41d567 POP %RBP |
0x41d568 RET |
0x41d569 CMP %R13,0x80(%RSP) |
0x41d571 MOV 0x8(%RSP),%R14 |
0x41d576 JNE 41d59c |
0x41d578 MOV $0x48c490,%EDI |
0x41d57d MOV 0x38(%RSP),%ESI |
0x41d581 LEA -0x28(%RBP),%RSP |
0x41d585 POP %RBX |
0x41d586 POP %R12 |
0x41d588 POP %R13 |
0x41d58a POP %R14 |
0x41d58c POP %R15 |
0x41d58e POP %RBP |
0x41d58f VZEROUPPER |
0x41d592 JMP 403050 |
0x41d597 MOV %R10,%R14 |
0x41d59a JMP 41d59f |
0x41d59c ADD %R13,%RSI |
0x41d59f VPXOR %XMM0,%XMM0,%XMM0 |
0x41d5a3 VMOVDDUP 0x4faad(%RIP),%XMM1 |
0x41d5ab VMOVSD 0x4facd(%RIP),%XMM2 |
0x41d5b3 VMOVSD 0x4fa8d(%RIP),%XMM3 |
0x41d5bb VMOVDDUP 0x4facd(%RIP),%XMM4 |
0x41d5c3 VMOVDDUP 0x4fa8d(%RIP),%XMM5 |
0x41d5cb VMOVSD 0x4fab5(%RIP),%XMM6 |
0x41d5d3 JMP 41d610 |
0x41d5d5 NOPW %CS:(%RAX,%RAX,1) |
(153) 0x41d5e0 VADDSD %XMM11,%XMM10,%XMM8 |
(153) 0x41d5e5 VMULSD %XMM7,%XMM8,%XMM7 |
(153) 0x41d5e9 IMUL 0x88(%RSP),%RDX |
(153) 0x41d5f2 ADD %RAX,%RDX |
(153) 0x41d5f5 MOV 0x20(%RSP),%RAX |
(153) 0x41d5fa VMOVSD %XMM7,(%RAX,%RDX,8) |
(153) 0x41d5ff INC %RSI |
(153) 0x41d602 CMP 0x60(%RSP),%RSI |
(153) 0x41d607 MOV %R13,%RBX |
(153) 0x41d60a JG 41d578 |
(153) 0x41d610 MOV %RSI,%R8 |
(153) 0x41d613 SHR $0x20,%R8 |
(153) 0x41d617 JE 41d630 |
(153) 0x41d619 MOV %RSI,%RAX |
(153) 0x41d61c XOR %EDX,%EDX |
(153) 0x41d61e MOV 0x48(%RSP),%R9 |
(153) 0x41d623 DIV %R9 |
(153) 0x41d626 MOV %RAX,%RCX |
(153) 0x41d629 JMP 41d63e |
0x41d62b NOPL (%RAX,%RAX,1) |
(153) 0x41d630 MOV %ESI,%EAX |
(153) 0x41d632 XOR %EDX,%EDX |
(153) 0x41d634 MOV 0x48(%RSP),%R9 |
(153) 0x41d639 DIV %R9D |
(153) 0x41d63c MOV %EAX,%ECX |
(153) 0x41d63e MOV 0x18(%RSP),%R10 |
(153) 0x41d643 MOV 0x40(%RSP),%R11 |
(153) 0x41d648 TEST %R8,%R8 |
(153) 0x41d64b JE 41d660 |
(153) 0x41d64d MOV %RSI,%RAX |
(153) 0x41d650 CQTO |
(153) 0x41d652 IDIV %R9 |
(153) 0x41d655 JMP 41d667 |
0x41d657 NOPW (%RAX,%RAX,1) |
(153) 0x41d660 MOV %ESI,%EAX |
(153) 0x41d662 XOR %EDX,%EDX |
(153) 0x41d664 DIV %R9D |
(153) 0x41d667 ADD 0x14(%RSP),%ECX |
(153) 0x41d66b ADD %R10D,%EDX |
(153) 0x41d66e MOVSXD %EDX,%RAX |
(153) 0x41d671 MOVSXD %ECX,%RDX |
(153) 0x41d674 MOV %R11,%R8 |
(153) 0x41d677 IMUL %RDX,%R8 |
(153) 0x41d67b ADD %RAX,%R8 |
(153) 0x41d67e MOV 0x68(%RSP),%R9 |
(153) 0x41d683 VMOVSD (%R9,%R8,8),%XMM7 |
(153) 0x41d689 VUCOMISD %XMM7,%XMM0 |
(153) 0x41d68d JAE 41d6b0 |
(153) 0x41d68f LEA -0x2(%RCX),%R8D |
(153) 0x41d693 DEC %ECX |
(153) 0x41d695 MOVSXD %ECX,%R10 |
(153) 0x41d698 MOVSXD %R8D,%R8 |
(153) 0x41d69b MOV %RDX,%RCX |
(153) 0x41d69e MOV %R10,%R11 |
(153) 0x41d6a1 JMP 41d6ce |
0x41d6a3 NOPW %CS:(%RAX,%RAX,1) |
(153) 0x41d6b0 LEA 0x1(%RCX),%R8D |
(153) 0x41d6b4 MOV 0x30(%RSP),%R9 |
(153) 0x41d6b9 CMP %R8D,%R9D |
(153) 0x41d6bc CMOVL %R9D,%R8D |
(153) 0x41d6c0 DEC %ECX |
(153) 0x41d6c2 MOVSXD %ECX,%RCX |
(153) 0x41d6c5 MOVSXD %R8D,%R8 |
(153) 0x41d6c8 MOV %R8,%R10 |
(153) 0x41d6cb MOV %RDX,%R11 |
(153) 0x41d6ce VANDPD %XMM1,%XMM7,%XMM8 |
(153) 0x41d6d2 MOV 0x58(%RSP),%R9 |
(153) 0x41d6d7 IMUL %R11,%R9 |
(153) 0x41d6db ADD %RAX,%R9 |
(153) 0x41d6de VDIVSD (%R14,%R9,8),%XMM8,%XMM12 |
(153) 0x41d6e4 VMOVSD (%R12,%RDX,8),%XMM8 |
(153) 0x41d6ea VFMADD213SD %XMM8,%XMM12,%XMM8 |
(153) 0x41d6ef VDIVSD (%R12,%R10,8),%XMM8,%XMM9 |
(153) 0x41d6f5 VSUBSD %XMM12,%XMM2,%XMM8 |
(153) 0x41d6fa MOV %RBX,%R10 |
(153) 0x41d6fd IMUL %R11,%R10 |
(153) 0x41d701 ADD %RAX,%R10 |
(153) 0x41d704 VMOVSD (%RDI,%R10,8),%XMM11 |
(153) 0x41d70a MOV %RBX,%R14 |
(153) 0x41d70d IMUL %R8,%R14 |
(153) 0x41d711 ADD %RAX,%R14 |
(153) 0x41d714 VSUBSD (%RDI,%R14,8),%XMM11,%XMM13 |
(153) 0x41d71a MOV %RBX,%R13 |
(153) 0x41d71d MOV %RBX,%R14 |
(153) 0x41d720 IMUL %RCX,%R14 |
(153) 0x41d724 ADD %RAX,%R14 |
(153) 0x41d727 VMOVSD (%RDI,%R14,8),%XMM10 |
(153) 0x41d72d VSUBSD %XMM11,%XMM10,%XMM14 |
(153) 0x41d732 VMULSD %XMM13,%XMM14,%XMM15 |
(153) 0x41d737 VXORPD %XMM10,%XMM10,%XMM10 |
(153) 0x41d73c VUCOMISD %XMM10,%XMM15 |
(153) 0x41d741 VXORPD %XMM15,%XMM15,%XMM15 |
(153) 0x41d746 JBE 41d783 |
(153) 0x41d748 VSUBSD %XMM12,%XMM3,%XMM12 |
(153) 0x41d74d VXORPD %XMM4,%XMM12,%XMM15 |
(153) 0x41d751 VCMPSD $0x1,%XMM14,%XMM0,%K1 |
(153) 0x41d758 VMOVSD %XMM12,%XMM15,%XMM15{%K1} |
(153) 0x41d75e VANDPD %XMM5,%XMM13,%XMM12 |
(153) 0x41d762 VANDPD %XMM5,%XMM14,%XMM13 |
(153) 0x41d766 VMINSD %XMM13,%XMM12,%XMM14 |
(153) 0x41d76b VMULSD %XMM9,%XMM12,%XMM12 |
(153) 0x41d770 VFMADD231SD %XMM13,%XMM8,%XMM12 |
(153) 0x41d775 VMULSD %XMM6,%XMM12,%XMM12 |
(153) 0x41d779 VMINSD %XMM12,%XMM14,%XMM12 |
(153) 0x41d77e VMULSD %XMM15,%XMM12,%XMM15 |
(153) 0x41d783 VADDSD %XMM11,%XMM15,%XMM11 |
(153) 0x41d788 VMULSD %XMM7,%XMM11,%XMM7 |
(153) 0x41d78c MOV 0x50(%RSP),%R14 |
(153) 0x41d791 IMUL %RDX,%R14 |
(153) 0x41d795 ADD %RAX,%R14 |
(153) 0x41d798 MOV 0x28(%RSP),%RBX |
(153) 0x41d79d VMOVSD %XMM7,(%RBX,%R14,8) |
(153) 0x41d7a3 MOV 0x90(%RSP),%RBX |
(153) 0x41d7ab IMUL %RBX,%R11 |
(153) 0x41d7af ADD %RAX,%R11 |
(153) 0x41d7b2 VMOVSD (%R15,%R11,8),%XMM11 |
(153) 0x41d7b8 IMUL %RBX,%R8 |
(153) 0x41d7bc ADD %RAX,%R8 |
(153) 0x41d7bf VSUBSD (%R15,%R8,8),%XMM11,%XMM12 |
(153) 0x41d7c5 IMUL %RBX,%RCX |
(153) 0x41d7c9 ADD %RAX,%RCX |
(153) 0x41d7cc VMOVSD (%R15,%RCX,8),%XMM13 |
(153) 0x41d7d2 VSUBSD %XMM11,%XMM13,%XMM13 |
(153) 0x41d7d7 VMULSD %XMM12,%XMM13,%XMM14 |
(153) 0x41d7dc VUCOMISD %XMM10,%XMM14 |
(153) 0x41d7e1 MOV 0x8(%RSP),%R14 |
(153) 0x41d7e6 JBE 41d5e0 |
(153) 0x41d7ec VANDPD %XMM5,%XMM7,%XMM10 |
(153) 0x41d7f0 VMOVSD (%R14,%R9,8),%XMM14 |
(153) 0x41d7f6 VMULSD (%RDI,%R10,8),%XMM14,%XMM14 |
(153) 0x41d7fc VDIVSD %XMM14,%XMM10,%XMM10 |
(153) 0x41d801 VSUBSD %XMM10,%XMM3,%XMM10 |
(153) 0x41d806 VXORPD %XMM4,%XMM10,%XMM14 |
(153) 0x41d80a VCMPSD $0x1,%XMM13,%XMM0,%K1 |
(153) 0x41d811 VMOVSD %XMM10,%XMM14,%XMM14{%K1} |
(153) 0x41d817 VANDPD %XMM5,%XMM12,%XMM10 |
(153) 0x41d81b VANDPD %XMM5,%XMM13,%XMM12 |
(153) 0x41d81f VMINSD %XMM12,%XMM10,%XMM13 |
(153) 0x41d824 VMULSD %XMM9,%XMM10,%XMM9 |
(153) 0x41d829 VFMADD213SD %XMM9,%XMM12,%XMM8 |
(153) 0x41d82e VMULSD %XMM6,%XMM8,%XMM8 |
(153) 0x41d832 VMINSD %XMM8,%XMM13,%XMM8 |
(153) 0x41d837 VMULSD %XMM8,%XMM14,%XMM10 |
(153) 0x41d83c JMP 41d5e0 |
0x41d841 NOPW %CS:(%RAX,%RAX,1) |
0x41d84b NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | advec_cell.cpp:157-202 |
Module | exec |
nb instructions | 163 |
nb uops | 165 |
loop length | 784 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 2 |
used zmm registers | 14 |
nb stack references | 33 |
micro-operation queue | 27.50 cycles |
front end | 27.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.60 | 21.00 | 21.00 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.00 |
cycles | 5.50 | 5.60 | 21.00 | 21.00 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 27.32 |
Stall cycles | 0.00 |
Front-end | 27.50 |
Dispatch | 21.00 |
Overall L1 | 27.50 |
all | 9% |
load | 9% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 9% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 9% |
load | 4% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
all | 16% |
load | 19% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 31% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 15% |
load | 16% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 31% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x180,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41d55a <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x5da> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x44(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x80(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x78(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x48c470,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031d0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x78(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41d578 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x5f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x18(%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x60(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R14,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x1(%R9),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R10),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R10),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
TEST $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV %RCX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41d597 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x617> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RDX,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %ECX,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R13,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x50404(%RIP),%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
MOV %R14,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x4fec0(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R10,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x4fe98(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %RDI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x4fec0(%RIP),%ZMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x4febe(%RIP),%ZMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VXORPD %XMM27,%XMM27,%XMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41d21f <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x29f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %R13,0x80(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41d59c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x61c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x48c490,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R10,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 41d59f <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x61f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
ADD %R13,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x4faad(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4facd(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4fa8d(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4facd(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4fa8d(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4fab5(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41d610 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x690> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_cell.cpp:157-202 |
Module | exec |
nb instructions | 163 |
nb uops | 165 |
loop length | 784 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 2 |
used zmm registers | 14 |
nb stack references | 33 |
micro-operation queue | 27.50 cycles |
front end | 27.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.60 | 21.00 | 21.00 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.00 |
cycles | 5.50 | 5.60 | 21.00 | 21.00 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 27.32 |
Stall cycles | 0.00 |
Front-end | 27.50 |
Dispatch | 21.00 |
Overall L1 | 27.50 |
all | 9% |
load | 9% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 9% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 9% |
load | 4% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 9% |
all | 16% |
load | 19% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 31% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 15% |
load | 16% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 31% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x180,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41d55a <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x5da> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x44(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x80(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x78(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x48c470,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031d0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x78(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41d578 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x5f8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x18(%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x60(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R14,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x1(%R9),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R10),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R10),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
TEST $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV %RCX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41d597 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x617> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RDX,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %ECX,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R13,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x50404(%RIP),%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
MOV %R14,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x4fec0(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R10,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x4fe98(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %RDI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x4fec0(%RIP),%ZMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x4febe(%RIP),%ZMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VXORPD %XMM27,%XMM27,%XMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41d21f <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x29f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %R13,0x80(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41d59c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x61c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x48c490,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R10,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 41d59f <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x61f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
ADD %R13,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x4faad(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4facd(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4fa8d(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4facd(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4fa8d(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x4fab5(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41d610 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7+0x690> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.7– | 2.56 | 2.09 |
○Loop 154 - advec_cell.cpp:157-202 - exec | 2.56 | 2.09 |
○Loop 153 - advec_cell.cpp:157-202 - exec | 0 | 0 |