Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:208-217 [...] | Coverage: 0.76% |
---|
Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:208-217 [...] | Coverage: 0.76% |
---|
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 208 - 217 |
-------------------------------------------------------------------------------- |
208: #pragma omp parallel for simd collapse(2) |
209: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
210: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
211: double pre_mass_s = density1(i, j) * pre_vol(i, j); |
212: double post_mass_s = pre_mass_s + mass_flux_y(i, j) - mass_flux_y(i + 0, j + 1); |
213: double post_ener_s = (energy1(i, j) * pre_mass_s + ener_flux(i, j) - ener_flux(i + 0, j + 1)) / post_mass_s; |
214: double advec_vol_s = pre_vol(i, j) + vol_flux_y(i, j) - vol_flux_y(i + 0, j + 1); |
215: density1(i, j) = post_mass_s / advec_vol_s; |
216: energy1(i, j) = post_ener_s; |
217: } |
0x41e020 PUSH %RBP |
0x41e021 MOV %RSP,%RBP |
0x41e024 PUSH %R15 |
0x41e026 PUSH %R14 |
0x41e028 PUSH %R13 |
0x41e02a PUSH %R12 |
0x41e02c PUSH %RBX |
0x41e02d AND $-0x20,%RSP |
0x41e031 SUB $0x120,%RSP |
0x41e038 MOV %RCX,%RSI |
0x41e03b MOV 0x40(%RBP),%RAX |
0x41e03f MOV 0x30(%RBP),%R15 |
0x41e043 MOV 0x28(%RBP),%R11 |
0x41e047 MOV 0x18(%RBP),%RBX |
0x41e04b MOV 0x10(%RBP),%R14 |
0x41e04f MOV 0x20(%RBP),%ECX |
0x41e052 MOV %ECX,0x14(%RSP) |
0x41e056 MOVL $0,0x3c(%RSP) |
0x41e05e TEST %RAX,%RAX |
0x41e061 JS 41e5e2 |
0x41e067 MOV %R8,%R12 |
0x41e06a MOV %RDX,%R13 |
0x41e06d MOV %RSI,0x18(%RSP) |
0x41e072 MOV %R9,0x20(%RSP) |
0x41e077 MOV (%RDI),%ESI |
0x41e079 MOVQ $0,0x58(%RSP) |
0x41e082 MOV %RAX,0x50(%RSP) |
0x41e087 MOVQ $0x1,0x98(%RSP) |
0x41e093 SUB $0x8,%RSP |
0x41e097 LEA 0xa0(%RSP),%RAX |
0x41e09f LEA 0x44(%RSP),%RCX |
0x41e0a4 LEA 0x60(%RSP),%R8 |
0x41e0a9 LEA 0x58(%RSP),%R9 |
0x41e0ae MOV $0x6823f0,%EDI |
0x41e0b3 MOV %ESI,0x40(%RSP) |
0x41e0b7 MOV $0x22,%EDX |
0x41e0bc PUSH $0x1 |
0x41e0be PUSH $0x1 |
0x41e0c0 PUSH %RAX |
0x41e0c1 MOV %R11,0x48(%RSP) |
0x41e0c6 CALL 403020 <__kmpc_for_static_init_8@plt> |
0x41e0cb ADD $0x20,%RSP |
0x41e0cf MOV 0x58(%RSP),%RSI |
0x41e0d4 MOV 0x50(%RSP),%RAX |
0x41e0d9 MOV %RAX,0x48(%RSP) |
0x41e0de CMP %RAX,%RSI |
0x41e0e1 JA 41e5c3 |
0x41e0e7 MOV %R15,%R10 |
0x41e0ea SUB 0x28(%RSP),%R10D |
0x41e0ef MOV (%R13),%RAX |
0x41e0f3 MOV %RAX,0x40(%RSP) |
0x41e0f8 MOV 0x10(%R13),%R13 |
0x41e0fc MOV (%R14),%RDI |
0x41e0ff MOV 0x10(%R14),%RAX |
0x41e103 MOV %RAX,0x30(%RSP) |
0x41e108 MOV (%R12),%R9 |
0x41e10c MOV 0x10(%R12),%R12 |
0x41e111 MOV 0x18(%RSP),%RAX |
0x41e116 MOV (%RAX),%R8 |
0x41e119 MOV 0x10(%RAX),%R15 |
0x41e11d MOV (%RBX),%R11 |
0x41e120 MOV 0x10(%RBX),%R14 |
0x41e124 LEA 0x1(%RSI),%RAX |
0x41e128 MOV 0x48(%RSP),%RCX |
0x41e12d LEA 0x1(%RCX),%RDX |
0x41e131 CMP %RDX,%RAX |
0x41e134 CMOVG %RAX,%RDX |
0x41e138 MOV 0x20(%RSP),%RAX |
0x41e13d MOV (%RAX),%RCX |
0x41e140 MOV 0x10(%RAX),%RBX |
0x41e144 SUB %RSI,%RDX |
0x41e147 MOV $-0x8,%EAX |
0x41e14c MOV %RDX,0x70(%RSP) |
0x41e151 AND %RDX,%RAX |
0x41e154 MOV %RDI,0x20(%RSP) |
0x41e159 MOV %R9,0x18(%RSP) |
0x41e15e MOV %R11,0x88(%RSP) |
0x41e166 MOV %RCX,0x80(%RSP) |
0x41e16e MOV %R8,0x90(%RSP) |
0x41e176 JE 41e6dd |
0x41e17c MOV %RAX,%RDX |
0x41e17f MOV %R10,0x68(%RSP) |
0x41e184 VPBROADCASTQ %R10,%YMM8 |
0x41e18a MOV 0x14(%RSP),%EAX |
0x41e18e VPBROADCASTD %EAX,%YMM0 |
0x41e194 VMOVDQU %YMM0,0xe0(%RSP) |
0x41e19d MOV 0x28(%RSP),%RAX |
0x41e1a2 VPBROADCASTQ %RAX,%YMM0 |
0x41e1a8 VMOVDQU %YMM0,0xc0(%RSP) |
0x41e1b1 MOV 0x40(%RSP),%RAX |
0x41e1b6 VPBROADCASTQ %RAX,%YMM0 |
0x41e1bc VMOVDQU %YMM0,0xa0(%RSP) |
0x41e1c5 VPBROADCASTQ %RDI,%YMM15 |
0x41e1cb VPBROADCASTQ %R9,%YMM16 |
0x41e1d1 VPBROADCASTQ %R8,%YMM17 |
0x41e1d7 VPBROADCASTQ %R11,%YMM18 |
0x41e1dd MOV %RSI,0x60(%RSP) |
0x41e1e2 VPBROADCASTQ %RSI,%YMM0 |
0x41e1e8 VPADDQ 0x48090(%RIP),%YMM0,%YMM9 |
0x41e1f0 VPADDQ 0x47f08(%RIP),%YMM0,%YMM10 |
0x41e1f8 VPBROADCASTQ %RCX,%YMM19 |
0x41e1fe XOR %ESI,%ESI |
0x41e200 MOV %RBX,0x78(%RSP) |
0x41e205 NOPW %CS:(%RAX,%RAX,1) |
(103) 0x41e210 VMOVDQA %YMM10,%YMM0 |
(103) 0x41e214 VMOVDQA %YMM8,%YMM1 |
(103) 0x41e218 MOV %R14,%RBX |
(103) 0x41e21b MOV %R15,%R14 |
(103) 0x41e21e MOV %R12,%R15 |
(103) 0x41e221 MOV %R13,%R12 |
(103) 0x41e224 MOV %RDX,%R13 |
(103) 0x41e227 MOV $0x454690,%RDI |
(103) 0x41e22e CALL %RDI |
(103) 0x41e230 VMOVDQA %YMM0,%YMM11 |
(103) 0x41e234 VMOVDQA %YMM9,%YMM0 |
(103) 0x41e238 VMOVDQA %YMM8,%YMM1 |
(103) 0x41e23c CALL %RDI |
(103) 0x41e23e VPMOVQD %YMM11,%XMM1 |
(103) 0x41e244 VPMOVQD %YMM0,%XMM0 |
(103) 0x41e24a VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(103) 0x41e250 VPADDD 0xe0(%RSP),%YMM0,%YMM21 |
(103) 0x41e258 VMOVDQA %YMM10,%YMM0 |
(103) 0x41e25c VMOVDQA %YMM8,%YMM1 |
(103) 0x41e260 MOV $0x454460,%RDI |
(103) 0x41e267 CALL %RDI |
(103) 0x41e269 VMOVDQA %YMM0,%YMM11 |
(103) 0x41e26d VMOVDQA %YMM9,%YMM0 |
(103) 0x41e271 VMOVDQA %YMM8,%YMM1 |
(103) 0x41e275 CALL %RDI |
(103) 0x41e277 MOV %R13,%RDX |
(103) 0x41e27a MOV %R12,%R13 |
(103) 0x41e27d MOV %R15,%R12 |
(103) 0x41e280 MOV %R14,%R15 |
(103) 0x41e283 MOV %RBX,%R14 |
(103) 0x41e286 MOV 0x78(%RSP),%RBX |
(103) 0x41e28b VEXTRACTI32X4 $0x1,%YMM21,%XMM1 |
(103) 0x41e292 VPMOVSXDQ %XMM1,%YMM3 |
(103) 0x41e297 VMOVDQU 0xa0(%RSP),%YMM1 |
(103) 0x41e2a0 VXORPS %XMM6,%XMM6,%XMM6 |
(103) 0x41e2a4 VPMULLQ %YMM3,%YMM1,%YMM6 |
(103) 0x41e2aa VPMOVSXDQ %XMM21,%YMM24 |
(103) 0x41e2b0 VPMULLQ %YMM24,%YMM1,%YMM1 |
(103) 0x41e2b6 VMOVDQU 0xc0(%RSP),%YMM12 |
(103) 0x41e2bf VPADDQ %YMM12,%YMM11,%YMM2 |
(103) 0x41e2c4 VPSLLQ $0x20,%YMM2,%YMM2 |
(103) 0x41e2c9 VPSRAQ $0x20,%YMM2,%YMM11 |
(103) 0x41e2d0 VXORPS %XMM4,%XMM4,%XMM4 |
(103) 0x41e2d4 VPMULLQ %YMM24,%YMM15,%YMM4 |
(103) 0x41e2da VPADDQ %YMM1,%YMM11,%YMM1 |
(103) 0x41e2de KXNORW %K0,%K0,%K2 |
(103) 0x41e2e2 VPXOR %XMM2,%XMM2,%XMM2 |
(103) 0x41e2e6 VPMULLQ %YMM24,%YMM16,%YMM5 |
(103) 0x41e2ec VPADDQ %YMM4,%YMM11,%YMM7 |
(103) 0x41e2f0 KXNORW %K0,%K0,%K3 |
(103) 0x41e2f4 VPXOR %XMM4,%XMM4,%XMM4 |
(103) 0x41e2f8 VPADDQ %YMM11,%YMM5,%YMM23 |
(103) 0x41e2fe KXNORW %K0,%K0,%K1 |
(103) 0x41e302 VPXOR %XMM5,%XMM5,%XMM5 |
(103) 0x41e306 VGATHERQPD (%R13,%YMM1,8),%YMM2{%K2} |
(103) 0x41e30e VPCMPEQD %YMM13,%YMM13,%YMM13 |
(103) 0x41e313 VPSUBD %YMM13,%YMM21,%YMM22 |
(103) 0x41e319 VPMOVSXDQ %XMM22,%YMM25 |
(103) 0x41e31f VPMULLQ %YMM25,%YMM16,%YMM26 |
(103) 0x41e325 MOV 0x30(%RSP),%RCX |
(103) 0x41e32a VGATHERQPD (%RCX,%YMM7,8),%YMM4{%K3} |
(103) 0x41e331 VXORPS %XMM7,%XMM7,%XMM7 |
(103) 0x41e335 VPMULLQ %YMM24,%YMM17,%YMM7 |
(103) 0x41e33b VPADDQ %YMM7,%YMM11,%YMM7 |
(103) 0x41e33f KXNORW %K0,%K0,%K2 |
(103) 0x41e343 VGATHERQPD (%R12,%YMM23,8),%YMM5{%K1} |
(103) 0x41e34a VPXORD %XMM21,%XMM21,%XMM21 |
(103) 0x41e350 VPXORD %XMM23,%XMM23,%XMM23 |
(103) 0x41e356 VPMULLQ %YMM24,%YMM18,%YMM23 |
(103) 0x41e35c VPADDQ %YMM11,%YMM23,%YMM27 |
(103) 0x41e362 VGATHERQPD (%R15,%YMM7,8),%YMM21{%K2} |
(103) 0x41e369 KXNORW %K0,%K0,%K1 |
(103) 0x41e36d VPXORD %XMM23,%XMM23,%XMM23 |
(103) 0x41e373 VPMULLQ %YMM24,%YMM19,%YMM24 |
(103) 0x41e379 VGATHERQPD (%R14,%YMM27,8),%YMM23{%K1} |
(103) 0x41e380 VPADDQ %YMM11,%YMM26,%YMM26 |
(103) 0x41e386 KXNORW %K0,%K0,%K1 |
(103) 0x41e38a VPADDQ %YMM11,%YMM24,%YMM27 |
(103) 0x41e390 KXNORW %K0,%K0,%K2 |
(103) 0x41e394 VPXORD %XMM24,%XMM24,%XMM24 |
(103) 0x41e39a VGATHERQPD (%RBX,%YMM27,8),%YMM24{%K2} |
(103) 0x41e3a1 VXORPD %XMM27,%XMM27,%XMM27 |
(103) 0x41e3a7 VPMULLQ %YMM25,%YMM18,%YMM28 |
(103) 0x41e3ad VPADDQ %YMM11,%YMM28,%YMM28 |
(103) 0x41e3b3 VGATHERQPD (%R12,%YMM26,8),%YMM27{%K1} |
(103) 0x41e3ba KXNORW %K0,%K0,%K1 |
(103) 0x41e3be VXORPD %XMM26,%XMM26,%XMM26 |
(103) 0x41e3c4 VPMULLQ %YMM25,%YMM19,%YMM25 |
(103) 0x41e3ca VGATHERQPD (%R14,%YMM28,8),%YMM26{%K1} |
(103) 0x41e3d1 VPADDQ %YMM0,%YMM12,%YMM0 |
(103) 0x41e3d5 VPSLLQ $0x20,%YMM0,%YMM0 |
(103) 0x41e3da VPADDQ %YMM11,%YMM25,%YMM11 |
(103) 0x41e3e0 KXNORW %K0,%K0,%K1 |
(103) 0x41e3e4 VPXORD %XMM25,%XMM25,%XMM25 |
(103) 0x41e3ea VGATHERQPD (%RBX,%YMM11,8),%YMM25{%K1} |
(103) 0x41e3f1 VPSRAQ $0x20,%YMM0,%YMM11 |
(103) 0x41e3f8 KXNORW %K0,%K0,%K1 |
(103) 0x41e3fc VPXORD %XMM28,%XMM28,%XMM28 |
(103) 0x41e402 VPMULLQ %YMM3,%YMM15,%YMM28 |
(103) 0x41e408 VPADDQ %YMM6,%YMM11,%YMM0 |
(103) 0x41e40c VPXOR %XMM6,%XMM6,%XMM6 |
(103) 0x41e410 VPADDQ %YMM11,%YMM28,%YMM28 |
(103) 0x41e416 VPMULLQ %YMM3,%YMM16,%YMM29 |
(103) 0x41e41c VGATHERQPD (%R13,%YMM0,8),%YMM6{%K1} |
(103) 0x41e424 KXNORW %K0,%K0,%K1 |
(103) 0x41e428 VXORPD %XMM30,%XMM30,%XMM30 |
(103) 0x41e42e VPADDQ %YMM11,%YMM29,%YMM29 |
(103) 0x41e434 KXNORW %K0,%K0,%K2 |
(103) 0x41e438 VXORPD %XMM31,%XMM31,%XMM31 |
(103) 0x41e43e VGATHERQPD (%RCX,%YMM28,8),%YMM30{%K1} |
(103) 0x41e445 VPXORD %XMM28,%XMM28,%XMM28 |
(103) 0x41e44b VPMULLQ %YMM3,%YMM17,%YMM28 |
(103) 0x41e451 VPADDQ %YMM11,%YMM28,%YMM28 |
(103) 0x41e457 KXNORW %K0,%K0,%K1 |
(103) 0x41e45b VGATHERQPD (%R12,%YMM29,8),%YMM31{%K2} |
(103) 0x41e462 VXORPD %XMM29,%XMM29,%XMM29 |
(103) 0x41e468 VPMULLQ %YMM3,%YMM18,%YMM12 |
(103) 0x41e46e VPADDQ %YMM11,%YMM12,%YMM12 |
(103) 0x41e473 VGATHERQPD (%R15,%YMM28,8),%YMM29{%K1} |
(103) 0x41e47a KXNORW %K0,%K0,%K1 |
(103) 0x41e47e VXORPD %XMM20,%XMM20,%XMM20 |
(103) 0x41e484 VPMULLQ %YMM3,%YMM19,%YMM3 |
(103) 0x41e48a VGATHERQPD (%R14,%YMM12,8),%YMM20{%K1} |
(103) 0x41e491 VEXTRACTI32X4 $0x1,%YMM22,%XMM12 |
(103) 0x41e498 VPADDQ %YMM3,%YMM11,%YMM3 |
(103) 0x41e49c KXNORW %K0,%K0,%K1 |
(103) 0x41e4a0 VPXORD %XMM22,%XMM22,%XMM22 |
(103) 0x41e4a6 VGATHERQPD (%RBX,%YMM3,8),%YMM22{%K1} |
(103) 0x41e4ad VPMOVSXDQ %XMM12,%YMM3 |
(103) 0x41e4b2 VXORPS %XMM12,%XMM12,%XMM12 |
(103) 0x41e4b7 VPMULLQ %YMM3,%YMM16,%YMM12 |
(103) 0x41e4bd KXNORW %K0,%K0,%K1 |
(103) 0x41e4c1 VPADDQ %YMM11,%YMM12,%YMM12 |
(103) 0x41e4c6 VPXOR %XMM13,%XMM13,%XMM13 |
(103) 0x41e4cb VPMULLQ %YMM3,%YMM18,%YMM14 |
(103) 0x41e4d1 VPADDQ %YMM11,%YMM14,%YMM14 |
(103) 0x41e4d6 VGATHERQPD (%R12,%YMM12,8),%YMM13{%K1} |
(103) 0x41e4dd KXNORW %K0,%K0,%K1 |
(103) 0x41e4e1 VXORPD %XMM12,%XMM12,%XMM12 |
(103) 0x41e4e6 VPMULLQ %YMM3,%YMM19,%YMM3 |
(103) 0x41e4ec VGATHERQPD (%R14,%YMM14,8),%YMM12{%K1} |
(103) 0x41e4f3 VPADDQ %YMM3,%YMM11,%YMM3 |
(103) 0x41e4f7 KXNORW %K0,%K0,%K1 |
(103) 0x41e4fb VPXOR %XMM11,%XMM11,%XMM11 |
(103) 0x41e500 VGATHERQPD (%RBX,%YMM3,8),%YMM11{%K1} |
(103) 0x41e507 VMULPD %YMM2,%YMM4,%YMM2 |
(103) 0x41e50b VADDPD %YMM2,%YMM5,%YMM3 |
(103) 0x41e50f VSUBPD %YMM27,%YMM3,%YMM3 |
(103) 0x41e515 VMULPD %YMM6,%YMM30,%YMM5 |
(103) 0x41e51b VADDPD %YMM5,%YMM31,%YMM6 |
(103) 0x41e521 VSUBPD %YMM13,%YMM6,%YMM6 |
(103) 0x41e526 VFMADD231PD %YMM29,%YMM5,%YMM20 |
(103) 0x41e52c VFMADD231PD %YMM21,%YMM2,%YMM23 |
(103) 0x41e532 VSUBPD %YMM12,%YMM20,%YMM2 |
(103) 0x41e538 VSUBPD %YMM26,%YMM23,%YMM5 |
(103) 0x41e53e VADDPD %YMM30,%YMM22,%YMM12 |
(103) 0x41e544 VDIVPD %YMM3,%YMM5,%YMM5 |
(103) 0x41e548 VADDPD %YMM4,%YMM24,%YMM4 |
(103) 0x41e54e VSUBPD %YMM11,%YMM12,%YMM11 |
(103) 0x41e553 VSUBPD %YMM25,%YMM4,%YMM4 |
(103) 0x41e559 VDIVPD %YMM11,%YMM6,%YMM11 |
(103) 0x41e55e VDIVPD %YMM4,%YMM3,%YMM3 |
(103) 0x41e562 VDIVPD %YMM6,%YMM2,%YMM2 |
(103) 0x41e566 KXNORW %K0,%K0,%K1 |
(103) 0x41e56a VSCATTERQPD %YMM3,(%R13,%YMM1,8){%K1} |
(103) 0x41e572 KXNORW %K0,%K0,%K1 |
(103) 0x41e576 VSCATTERQPD %YMM11,(%R13,%YMM0,8){%K1} |
(103) 0x41e57e KXNORW %K0,%K0,%K1 |
(103) 0x41e582 VSCATTERQPD %YMM5,(%R15,%YMM7,8){%K1} |
(103) 0x41e589 KXNORW %K0,%K0,%K1 |
(103) 0x41e58d VSCATTERQPD %YMM2,(%R15,%YMM28,8){%K1} |
(103) 0x41e594 VPBROADCASTQ 0x47d03(%RIP),%YMM0 |
(103) 0x41e59d VPADDQ %YMM0,%YMM10,%YMM10 |
(103) 0x41e5a1 VPADDQ %YMM0,%YMM9,%YMM9 |
(103) 0x41e5a5 ADD $0x8,%RSI |
(103) 0x41e5a9 CMP %RDX,%RSI |
(103) 0x41e5ac JB 41e210 |
0x41e5b2 CMP %RDX,0x70(%RSP) |
0x41e5b7 MOV 0x68(%RSP),%R10 |
0x41e5bc MOV 0x60(%RSP),%RSI |
0x41e5c1 JNE 41e5f1 |
0x41e5c3 MOV $0x682410,%EDI |
0x41e5c8 MOV 0x38(%RSP),%ESI |
0x41e5cc LEA -0x28(%RBP),%RSP |
0x41e5d0 POP %RBX |
0x41e5d1 POP %R12 |
0x41e5d3 POP %R13 |
0x41e5d5 POP %R14 |
0x41e5d7 POP %R15 |
0x41e5d9 POP %RBP |
0x41e5da VZEROUPPER |
0x41e5dd JMP 402e90 |
0x41e5e2 LEA -0x28(%RBP),%RSP |
0x41e5e6 POP %RBX |
0x41e5e7 POP %R12 |
0x41e5e9 POP %R13 |
0x41e5eb POP %R14 |
0x41e5ed POP %R15 |
0x41e5ef POP %RBP |
0x41e5f0 RET |
0x41e5f1 ADD %RDX,%RSI |
0x41e5f4 JMP 41e6dd |
0x41e5f9 NOPL (%RAX) |
(102) 0x41e600 MOV %RSI,%RAX |
(102) 0x41e603 CQTO |
(102) 0x41e605 IDIV %R10 |
(102) 0x41e608 MOV 0x20(%RSP),%RDI |
(102) 0x41e60d ADD 0x14(%RSP),%ECX |
(102) 0x41e611 ADD %R11D,%EDX |
(102) 0x41e614 MOVSXD %EDX,%RDX |
(102) 0x41e617 MOVSXD %ECX,%RCX |
(102) 0x41e61a MOV 0x40(%RSP),%RAX |
(102) 0x41e61f IMUL %RCX,%RAX |
(102) 0x41e623 ADD %RDX,%RAX |
(102) 0x41e626 IMUL %RCX,%RDI |
(102) 0x41e62a ADD %RDX,%RDI |
(102) 0x41e62d VMOVSD (%R8,%RDI,8),%XMM0 |
(102) 0x41e633 VMULSD (%R13,%RAX,8),%XMM0,%XMM1 |
(102) 0x41e63a MOV %R9,%RDI |
(102) 0x41e63d IMUL %RCX,%RDI |
(102) 0x41e641 ADD %RDX,%RDI |
(102) 0x41e644 VADDSD (%R12,%RDI,8),%XMM1,%XMM2 |
(102) 0x41e64a LEA 0x1(%RCX),%EDI |
(102) 0x41e64d MOVSXD %EDI,%RDI |
(102) 0x41e650 MOV %R9,%R8 |
(102) 0x41e653 IMUL %RDI,%R8 |
(102) 0x41e657 ADD %RDX,%R8 |
(102) 0x41e65a VSUBSD (%R12,%R8,8),%XMM2,%XMM2 |
(102) 0x41e660 MOV 0x90(%RSP),%R8 |
(102) 0x41e668 IMUL %RCX,%R8 |
(102) 0x41e66c ADD %RDX,%R8 |
(102) 0x41e66f VMOVSD (%R15,%R8,8),%XMM3 |
(102) 0x41e675 MOV 0x88(%RSP),%R11 |
(102) 0x41e67d MOV %R11,%R9 |
(102) 0x41e680 IMUL %RCX,%R9 |
(102) 0x41e684 ADD %RDX,%R9 |
(102) 0x41e687 VFMADD213SD (%R14,%R9,8),%XMM1,%XMM3 |
(102) 0x41e68d IMUL %RDI,%R11 |
(102) 0x41e691 ADD %RDX,%R11 |
(102) 0x41e694 VSUBSD (%R14,%R11,8),%XMM3,%XMM1 |
(102) 0x41e69a MOV 0x80(%RSP),%R9 |
(102) 0x41e6a2 IMUL %R9,%RCX |
(102) 0x41e6a6 ADD %RDX,%RCX |
(102) 0x41e6a9 VADDSD (%RBX,%RCX,8),%XMM0,%XMM0 |
(102) 0x41e6ae IMUL %R9,%RDI |
(102) 0x41e6b2 ADD %RDX,%RDI |
(102) 0x41e6b5 VSUBSD (%RBX,%RDI,8),%XMM0,%XMM0 |
(102) 0x41e6ba VDIVSD %XMM0,%XMM2,%XMM0 |
(102) 0x41e6be VMOVSD %XMM0,(%R13,%RAX,8) |
(102) 0x41e6c5 VDIVSD %XMM2,%XMM1,%XMM0 |
(102) 0x41e6c9 VMOVSD %XMM0,(%R15,%R8,8) |
(102) 0x41e6cf INC %RSI |
(102) 0x41e6d2 CMP 0x48(%RSP),%RSI |
(102) 0x41e6d7 JG 41e5c3 |
(102) 0x41e6dd MOV %RSI,%RDI |
(102) 0x41e6e0 SHR $0x20,%RDI |
(102) 0x41e6e4 JE 41e700 |
(102) 0x41e6e6 MOV %RSI,%RAX |
(102) 0x41e6e9 XOR %EDX,%EDX |
(102) 0x41e6eb DIV %R10 |
(102) 0x41e6ee MOV %RAX,%RCX |
(102) 0x41e6f1 JMP 41e709 |
0x41e6f3 NOPW %CS:(%RAX,%RAX,1) |
(102) 0x41e700 MOV %ESI,%EAX |
(102) 0x41e702 XOR %EDX,%EDX |
(102) 0x41e704 DIV %R10D |
(102) 0x41e707 MOV %EAX,%ECX |
(102) 0x41e709 MOV 0x30(%RSP),%R8 |
(102) 0x41e70e MOV 0x18(%RSP),%R9 |
(102) 0x41e713 MOV 0x28(%RSP),%R11 |
(102) 0x41e718 TEST %RDI,%RDI |
(102) 0x41e71b JNE 41e600 |
(102) 0x41e721 MOV %ESI,%EAX |
(102) 0x41e723 XOR %EDX,%EDX |
(102) 0x41e725 DIV %R10D |
(102) 0x41e728 JMP 41e608 |
0x41e72d NOPL (%RAX) |
Path / |
Source file and lines | advec_cell.cpp:208-217 |
Module | exec |
nb instructions | 132 |
nb uops | 134 |
loop length | 590 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 9 |
used zmm registers | 0 |
nb stack references | 30 |
micro-operation queue | 22.33 cycles |
front end | 22.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 15.67 | 15.67 | 17.00 | 10.00 | 5.40 | 17.00 | 17.00 | 17.00 | 5.40 | 15.67 |
cycles | 5.60 | 5.60 | 15.67 | 15.67 | 17.00 | 10.00 | 5.40 | 17.00 | 17.00 | 17.00 | 5.40 | 15.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.14 |
Stall cycles | 0.00 |
Front-end | 22.33 |
Dispatch | 17.00 |
Overall L1 | 22.33 |
all | 12% |
load | 18% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
all | 15% |
load | 18% |
store | 15% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 35% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x120,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %ECX,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41e5e2 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x5c2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x44(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x58(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6823f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R11,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41e5c3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x5a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x28(%RSP),%R10D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x48(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41e6dd <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x6bd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R10,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDI,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x48090(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x47f08(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPBROADCASTQ %RCX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,0x70(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41e5f1 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x5d1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x682410,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 41e6dd <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x6bd> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_cell.cpp:208-217 |
Module | exec |
nb instructions | 132 |
nb uops | 134 |
loop length | 590 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 9 |
used zmm registers | 0 |
nb stack references | 30 |
micro-operation queue | 22.33 cycles |
front end | 22.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 15.67 | 15.67 | 17.00 | 10.00 | 5.40 | 17.00 | 17.00 | 17.00 | 5.40 | 15.67 |
cycles | 5.60 | 5.60 | 15.67 | 15.67 | 17.00 | 10.00 | 5.40 | 17.00 | 17.00 | 17.00 | 5.40 | 15.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.14 |
Stall cycles | 0.00 |
Front-end | 22.33 |
Dispatch | 17.00 |
Overall L1 | 22.33 |
all | 12% |
load | 18% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
all | 15% |
load | 18% |
store | 15% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 35% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x120,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %ECX,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41e5e2 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x5c2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x44(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x58(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6823f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R11,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41e5c3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x5a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x28(%RSP),%R10D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x48(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41e6dd <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x6bd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R10,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x14(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDI,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x48090(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x47f08(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPBROADCASTQ %RCX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,0x70(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41e5f1 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x5d1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x682410,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 41e6dd <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted+0x6bd> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 0.76 | 1.21 |
○Loop 103 - advec_cell.cpp:209-217 - exec | 0.76 | 1.2 |
○Loop 102 - advec_cell.cpp:209-217 - exec | 0 | 0 |