Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:218-221 [...] | Coverage: 3.45% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:218-221 [...] | Coverage: 3.45% |
---|
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 218 - 221 |
-------------------------------------------------------------------------------- |
218: #pragma omp parallel for simd collapse(2) |
219: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
220: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
221: vel1(i, j) = (vel1(i, j) * node_mass_pre(i, j) + mom_flux(i + 0, j - 1) - mom_flux(i, j)) / node_mass_post(i, j); |
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x420e10 PUSH %RBP |
0x420e11 MOV %RSP,%RBP |
0x420e14 PUSH %R15 |
0x420e16 PUSH %R14 |
0x420e18 PUSH %R13 |
0x420e1a PUSH %R12 |
0x420e1c PUSH %RBX |
0x420e1d AND $-0x40,%RSP |
0x420e21 ADD $-0x80,%RSP |
0x420e25 MOV 0x28(%RDI),%EAX |
0x420e28 MOV 0x2c(%RDI),%EDX |
0x420e2b MOV 0x20(%RDI),%ESI |
0x420e2e MOV 0x24(%RDI),%EBX |
0x420e31 ADD $0x3,%EDX |
0x420e34 LEA 0x1(%RAX),%R15D |
0x420e38 INC %ESI |
0x420e3a MOV %EDX,0x3c(%RSP) |
0x420e3e MOV %ESI,0x38(%RSP) |
0x420e42 CMP %EDX,%R15D |
0x420e45 JGE 421313 |
0x420e4b LEA 0x3(%RBX),%R14D |
0x420e4f MOV %EDX,%EBX |
0x420e51 SUB %R15D,%EBX |
0x420e54 CMP %R14D,%ESI |
0x420e57 JGE 421313 |
0x420e5d MOV %R14D,%ECX |
0x420e60 MOV %RDI,%R12 |
0x420e63 SUB %ESI,%ECX |
0x420e65 MOV %ECX,0x58(%RSP) |
0x420e69 CALL 4046c0 <omp_get_num_threads@plt> |
0x420e6e MOV %EAX,%R13D |
0x420e71 CALL 4045b0 <omp_get_thread_num@plt> |
0x420e76 XOR %EDX,%EDX |
0x420e78 MOV %EAX,%EDI |
0x420e7a MOV 0x58(%RSP),%EAX |
0x420e7e IMUL %EBX,%EAX |
0x420e81 DIV %R13D |
0x420e84 MOV %EAX,%ECX |
0x420e86 CMP %EDX,%EDI |
0x420e88 JB 421333 |
0x420e8e IMUL %ECX,%EDI |
0x420e91 LEA (%RDI,%RDX,1),%R11D |
0x420e95 LEA (%RCX,%R11,1),%R8D |
0x420e99 MOV %R8D,0x34(%RSP) |
0x420e9e CMP %R8D,%R11D |
0x420ea1 JAE 421313 |
0x420ea7 MOV %R11D,%EAX |
0x420eaa XOR %EDX,%EDX |
0x420eac MOV 0x38(%RSP),%R9D |
0x420eb1 MOV (%R12),%R10 |
0x420eb5 DIVL 0x58(%RSP) |
0x420eb9 MOV 0x18(%R12),%RSI |
0x420ebe MOV %R10,0x28(%RSP) |
0x420ec3 MOV %RSI,0x18(%RSP) |
0x420ec8 ADD %EDX,%R9D |
0x420ecb ADD %R15D,%EAX |
0x420ece MOV %R14D,%EDX |
0x420ed1 MOV 0x10(%R12),%R15 |
0x420ed6 MOV 0x8(%R12),%R14 |
0x420edb MOV %R9D,0x74(%RSP) |
0x420ee0 SUB %R9D,%EDX |
0x420ee3 MOVSXD %EAX,%R12 |
0x420ee6 MOV %EAX,0x70(%RSP) |
0x420eea MOV %R15,0x20(%RSP) |
0x420eef MOV %R14,0x10(%RSP) |
0x420ef4 NOPL (%RAX) |
(138) 0x420ef8 CMP %EDX,%ECX |
(138) 0x420efa CMOVBE %ECX,%EDX |
(138) 0x420efd LEA (%R11,%RDX,1),%EBX |
(138) 0x420f01 MOV %EBX,0x5c(%RSP) |
(138) 0x420f05 CMP %EBX,%R11D |
(138) 0x420f08 JAE 4212e4 |
(138) 0x420f0e MOV 0x20(%RSP),%RDI |
(138) 0x420f13 MOV 0x28(%RSP),%RCX |
(138) 0x420f18 MOV 0x18(%RSP),%R8 |
(138) 0x420f1d MOV 0x70(%RSP),%EAX |
(138) 0x420f21 MOV (%RDI),%RBX |
(138) 0x420f24 MOV 0x10(%RDI),%R14 |
(138) 0x420f28 MOV 0x10(%RSP),%RDI |
(138) 0x420f2d MOV (%R8),%R9 |
(138) 0x420f30 DEC %EAX |
(138) 0x420f32 MOV (%RCX),%R13 |
(138) 0x420f35 MOV 0x10(%R8),%RSI |
(138) 0x420f39 MOVSXD %EAX,%R10 |
(138) 0x420f3c IMUL %R12,%RBX |
(138) 0x420f40 MOV (%RDI),%R8 |
(138) 0x420f43 IMUL %R9,%R10 |
(138) 0x420f47 MOV 0x10(%RCX),%R15 |
(138) 0x420f4b LEA -0x1(%RDX),%EAX |
(138) 0x420f4e IMUL %R12,%R13 |
(138) 0x420f52 MOV 0x10(%RDI),%RCX |
(138) 0x420f56 IMUL %R12,%R9 |
(138) 0x420f5a MOV %RBX,0x48(%RSP) |
(138) 0x420f5f IMUL %R12,%R8 |
(138) 0x420f63 MOV %R10,0x50(%RSP) |
(138) 0x420f68 MOV %R13,0x40(%RSP) |
(138) 0x420f6d MOV %R9,0x68(%RSP) |
(138) 0x420f72 MOV %RCX,0x78(%RSP) |
(138) 0x420f77 MOV %R8,0x60(%RSP) |
(138) 0x420f7c CMP $0x6,%EAX |
(138) 0x420f7f JBE 421328 |
(138) 0x420f85 MOVSXD 0x74(%RSP),%RAX |
(138) 0x420f8a LEA (%RAX,%R13,1),%R13 |
(138) 0x420f8e ADD %RAX,%RBX |
(138) 0x420f91 LEA (%RAX,%R9,1),%R9 |
(138) 0x420f95 LEA (%R15,%R13,8),%RCX |
(138) 0x420f99 LEA (%R14,%RBX,8),%R13 |
(138) 0x420f9d LEA (%RAX,%R10,1),%RBX |
(138) 0x420fa1 ADD %R8,%RAX |
(138) 0x420fa4 MOV 0x78(%RSP),%R8 |
(138) 0x420fa9 LEA (%RSI,%RBX,8),%R10 |
(138) 0x420fad LEA (%RSI,%R9,8),%RBX |
(138) 0x420fb1 LEA (%R8,%RAX,8),%R9 |
(138) 0x420fb5 MOV %EDX,%R8D |
(138) 0x420fb8 XOR %EAX,%EAX |
(138) 0x420fba SHR $0x3,%R8D |
(138) 0x420fbe SAL $0x6,%R8 |
(138) 0x420fc2 LEA -0x40(%R8),%RDI |
(138) 0x420fc6 SHR $0x6,%RDI |
(138) 0x420fca INC %RDI |
(138) 0x420fcd AND $0x3,%EDI |
(138) 0x420fd0 JE 421073 |
(138) 0x420fd6 CMP $0x1,%RDI |
(138) 0x420fda JE 42103b |
(138) 0x420fdc CMP $0x2,%RDI |
(138) 0x420fe0 JE 42100c |
(138) 0x420fe2 VMOVUPD (%RCX),%ZMM0 |
(138) 0x420fe8 VMOVUPD (%RBX),%ZMM3 |
(138) 0x420fee MOV $0x40,%EAX |
(138) 0x420ff3 VFMSUB132PD (%R13),%ZMM3,%ZMM0 |
(138) 0x420ffa VADDPD (%R10),%ZMM0,%ZMM1 |
(138) 0x421000 VDIVPD (%R9),%ZMM1,%ZMM2 |
(138) 0x421006 VMOVUPD %ZMM2,(%RCX) |
(138) 0x42100c VMOVUPD (%RCX,%RAX,1),%ZMM4 |
(138) 0x421013 VMOVUPD (%RBX,%RAX,1),%ZMM5 |
(138) 0x42101a VFMSUB132PD (%R13,%RAX,1),%ZMM5,%ZMM4 |
(138) 0x421022 VADDPD (%R10,%RAX,1),%ZMM4,%ZMM6 |
(138) 0x421029 VDIVPD (%R9,%RAX,1),%ZMM6,%ZMM7 |
(138) 0x421030 VMOVUPD %ZMM7,(%RCX,%RAX,1) |
(138) 0x421037 ADD $0x40,%RAX |
(138) 0x42103b VMOVUPD (%RCX,%RAX,1),%ZMM8 |
(138) 0x421042 VMOVUPD (%RBX,%RAX,1),%ZMM9 |
(138) 0x421049 VFMSUB132PD (%R13,%RAX,1),%ZMM9,%ZMM8 |
(138) 0x421051 VADDPD (%R10,%RAX,1),%ZMM8,%ZMM10 |
(138) 0x421058 VDIVPD (%R9,%RAX,1),%ZMM10,%ZMM11 |
(138) 0x42105f VMOVUPD %ZMM11,(%RCX,%RAX,1) |
(138) 0x421066 ADD $0x40,%RAX |
(138) 0x42106a CMP %RAX,%R8 |
(138) 0x42106d JE 42113d |
(139) 0x421073 VMOVUPD (%RBX,%RAX,1),%ZMM13 |
(139) 0x42107a VMOVUPD (%RCX,%RAX,1),%ZMM12 |
(139) 0x421081 VMOVUPD 0x40(%RCX,%RAX,1),%ZMM0 |
(139) 0x421089 VMOVUPD 0x80(%RCX,%RAX,1),%ZMM5 |
(139) 0x421091 VFMSUB132PD (%R13,%RAX,1),%ZMM13,%ZMM12 |
(139) 0x421099 VMOVUPD 0xc0(%RCX,%RAX,1),%ZMM8 |
(139) 0x4210a1 VADDPD (%R10,%RAX,1),%ZMM12,%ZMM14 |
(139) 0x4210a8 VDIVPD (%R9,%RAX,1),%ZMM14,%ZMM15 |
(139) 0x4210af VMOVUPD %ZMM15,(%RCX,%RAX,1) |
(139) 0x4210b6 VMOVUPD 0x40(%RBX,%RAX,1),%ZMM3 |
(139) 0x4210be VFMSUB132PD 0x40(%R13,%RAX,1),%ZMM3,%ZMM0 |
(139) 0x4210c6 VADDPD 0x40(%R10,%RAX,1),%ZMM0,%ZMM1 |
(139) 0x4210ce VDIVPD 0x40(%R9,%RAX,1),%ZMM1,%ZMM2 |
(139) 0x4210d6 VMOVUPD %ZMM2,0x40(%RCX,%RAX,1) |
(139) 0x4210de VMOVUPD 0x80(%RBX,%RAX,1),%ZMM4 |
(139) 0x4210e6 VFMSUB132PD 0x80(%R13,%RAX,1),%ZMM4,%ZMM5 |
(139) 0x4210ee VADDPD 0x80(%R10,%RAX,1),%ZMM5,%ZMM6 |
(139) 0x4210f6 VDIVPD 0x80(%R9,%RAX,1),%ZMM6,%ZMM7 |
(139) 0x4210fe VMOVUPD %ZMM7,0x80(%RCX,%RAX,1) |
(139) 0x421106 VMOVUPD 0xc0(%RBX,%RAX,1),%ZMM9 |
(139) 0x42110e VFMSUB132PD 0xc0(%R13,%RAX,1),%ZMM9,%ZMM8 |
(139) 0x421116 VADDPD 0xc0(%R10,%RAX,1),%ZMM8,%ZMM10 |
(139) 0x42111e VDIVPD 0xc0(%R9,%RAX,1),%ZMM10,%ZMM11 |
(139) 0x421126 VMOVUPD %ZMM11,0xc0(%RCX,%RAX,1) |
(139) 0x42112e ADD $0x100,%RAX |
(139) 0x421134 CMP %RAX,%R8 |
(139) 0x421137 JNE 421073 |
(138) 0x42113d MOV 0x74(%RSP),%R13D |
(138) 0x421142 MOV %EDX,%ECX |
(138) 0x421144 AND $-0x8,%ECX |
(138) 0x421147 ADD %ECX,%R11D |
(138) 0x42114a LEA (%RCX,%R13,1),%EDI |
(138) 0x42114e TEST $0x7,%DL |
(138) 0x421151 JE 4212df |
(138) 0x421157 SUB %ECX,%EDX |
(138) 0x421159 LEA -0x1(%RDX),%R10D |
(138) 0x42115d CMP $0x2,%R10D |
(138) 0x421161 JBE 4211da |
(138) 0x421163 MOVSXD 0x74(%RSP),%RAX |
(138) 0x421168 MOV 0x40(%RSP),%RBX |
(138) 0x42116d MOV 0x60(%RSP),%R8 |
(138) 0x421172 MOV 0x48(%RSP),%R10 |
(138) 0x421177 LEA (%RBX,%RAX,1),%R9 |
(138) 0x42117b MOV 0x68(%RSP),%R13 |
(138) 0x421180 ADD %RCX,%R9 |
(138) 0x421183 LEA (%R8,%RAX,1),%R8 |
(138) 0x421187 ADD %RAX,%R10 |
(138) 0x42118a LEA (%R15,%R9,8),%RBX |
(138) 0x42118e MOV 0x50(%RSP),%R9 |
(138) 0x421193 ADD %RCX,%R10 |
(138) 0x421196 ADD %RCX,%R8 |
(138) 0x421199 VMOVUPD (%RBX),%YMM12 |
(138) 0x42119d ADD %RAX,%R9 |
(138) 0x4211a0 ADD %R13,%RAX |
(138) 0x4211a3 ADD %RCX,%RAX |
(138) 0x4211a6 ADD %RCX,%R9 |
(138) 0x4211a9 MOV 0x78(%RSP),%RCX |
(138) 0x4211ae VMOVUPD (%RSI,%RAX,8),%YMM13 |
(138) 0x4211b3 VFMSUB132PD (%R14,%R10,8),%YMM13,%YMM12 |
(138) 0x4211b9 VADDPD (%RSI,%R9,8),%YMM12,%YMM14 |
(138) 0x4211bf VDIVPD (%RCX,%R8,8),%YMM14,%YMM15 |
(138) 0x4211c5 VMOVUPD %YMM15,(%RBX) |
(138) 0x4211c9 TEST $0x3,%DL |
(138) 0x4211cc JE 4212df |
(138) 0x4211d2 AND $-0x4,%EDX |
(138) 0x4211d5 ADD %EDX,%R11D |
(138) 0x4211d8 ADD %EDX,%EDI |
(138) 0x4211da MOV 0x48(%RSP),%R13 |
(138) 0x4211df MOV 0x68(%RSP),%R8 |
(138) 0x4211e4 MOVSXD %EDI,%RAX |
(138) 0x4211e7 MOV 0x40(%RSP),%RBX |
(138) 0x4211ec MOV 0x50(%RSP),%R10 |
(138) 0x4211f1 LEA (%R13,%RAX,1),%R9 |
(138) 0x4211f6 ADD %RAX,%R8 |
(138) 0x4211f9 VMOVSD (%R14,%R9,8),%XMM0 |
(138) 0x4211ff VMOVSD (%RSI,%R8,8),%XMM3 |
(138) 0x421205 LEA (%RBX,%RAX,1),%RDX |
(138) 0x421209 LEA (%R10,%RAX,1),%RCX |
(138) 0x42120d LEA (%R15,%RDX,8),%RDX |
(138) 0x421211 MOV 0x78(%RSP),%R9 |
(138) 0x421216 MOV 0x5c(%RSP),%R8D |
(138) 0x42121b VFMSUB132SD (%RDX),%XMM3,%XMM0 |
(138) 0x421220 VADDSD (%RSI,%RCX,8),%XMM0,%XMM1 |
(138) 0x421225 MOV 0x60(%RSP),%RCX |
(138) 0x42122a ADD %RCX,%RAX |
(138) 0x42122d VDIVSD (%R9,%RAX,8),%XMM1,%XMM2 |
(138) 0x421233 VMOVSD %XMM2,(%RDX) |
(138) 0x421237 LEA 0x1(%R11),%EDX |
(138) 0x42123b LEA 0x1(%RDI),%EAX |
(138) 0x42123e CMP %R8D,%EDX |
(138) 0x421241 JAE 4212df |
(138) 0x421247 MOV 0x68(%RSP),%R8 |
(138) 0x42124c CLTQ |
(138) 0x42124e ADD $0x2,%R11D |
(138) 0x421252 ADD $0x2,%EDI |
(138) 0x421255 LEA (%R13,%RAX,1),%R9 |
(138) 0x42125a LEA (%RBX,%RAX,1),%RCX |
(138) 0x42125e ADD %RAX,%R8 |
(138) 0x421261 VMOVSD (%R14,%R9,8),%XMM5 |
(138) 0x421267 LEA (%R15,%RCX,8),%RDX |
(138) 0x42126b LEA (%R10,%RAX,1),%RCX |
(138) 0x42126f VMOVSD (%RSI,%R8,8),%XMM4 |
(138) 0x421275 MOV 0x60(%RSP),%R9 |
(138) 0x42127a VFMSUB132SD (%RDX),%XMM4,%XMM5 |
(138) 0x42127f ADD %R9,%RAX |
(138) 0x421282 VADDSD (%RSI,%RCX,8),%XMM5,%XMM6 |
(138) 0x421287 MOV 0x78(%RSP),%RCX |
(138) 0x42128c VDIVSD (%RCX,%RAX,8),%XMM6,%XMM7 |
(138) 0x421291 MOV 0x5c(%RSP),%EAX |
(138) 0x421295 VMOVSD %XMM7,(%RDX) |
(138) 0x421299 CMP %EAX,%R11D |
(138) 0x42129c JAE 4212df |
(138) 0x42129e MOVSXD %EDI,%R11 |
(138) 0x4212a1 MOV 0x68(%RSP),%RDI |
(138) 0x4212a6 ADD %R11,%RBX |
(138) 0x4212a9 ADD %R11,%R13 |
(138) 0x4212ac ADD %R11,%R10 |
(138) 0x4212af ADD %R11,%R9 |
(138) 0x4212b2 LEA (%R15,%RBX,8),%R15 |
(138) 0x4212b6 ADD %R11,%RDI |
(138) 0x4212b9 VMOVSD (%R15),%XMM8 |
(138) 0x4212be VMOVSD (%RSI,%RDI,8),%XMM9 |
(138) 0x4212c3 VFMSUB132SD (%R14,%R13,8),%XMM9,%XMM8 |
(138) 0x4212c9 MOV 0x78(%RSP),%R14 |
(138) 0x4212ce VADDSD (%RSI,%R10,8),%XMM8,%XMM10 |
(138) 0x4212d4 VDIVSD (%R14,%R9,8),%XMM10,%XMM11 |
(138) 0x4212da VMOVSD %XMM11,(%R15) |
(138) 0x4212df MOV 0x5c(%RSP),%R11D |
(138) 0x4212e4 INCL 0x70(%RSP) |
(138) 0x4212e8 INC %R12 |
(138) 0x4212eb MOV 0x70(%RSP),%ESI |
(138) 0x4212ef CMP %ESI,0x3c(%RSP) |
(138) 0x4212f3 JLE 421310 |
(138) 0x4212f5 MOV 0x34(%RSP),%ECX |
(138) 0x4212f9 MOV 0x38(%RSP),%R8D |
(138) 0x4212fe MOV 0x58(%RSP),%EDX |
(138) 0x421302 MOV %R8D,0x74(%RSP) |
(138) 0x421307 SUB %R11D,%ECX |
(138) 0x42130a JMP 420ef8 |
0x42130f NOP |
0x421310 VZEROUPPER |
0x421313 LEA -0x28(%RBP),%RSP |
0x421317 POP %RBX |
0x421318 POP %R12 |
0x42131a POP %R13 |
0x42131c POP %R14 |
0x42131e POP %R15 |
0x421320 POP %RBP |
0x421321 RET |
0x421322 NOPW (%RAX,%RAX,1) |
(138) 0x421328 MOV 0x74(%RSP),%EDI |
(138) 0x42132c XOR %ECX,%ECX |
(138) 0x42132e JMP 421157 |
0x421333 INC %ECX |
0x421335 XOR %EDX,%EDX |
0x421337 JMP 420e8e |
0x42133c NOPL (%RAX) |
Path / |
Source file and lines | advec_mom.cpp:218-221 |
Module | exec |
nb instructions | 81 |
nb uops | 91 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.17 cycles |
front end | 15.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.43-15.14 |
Stall cycles | 0.00-0.37 |
Front-end | 15.17 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 9% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
INC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 421313 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x3(%RBX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 421313 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 421333 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x523> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R11,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 421313 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x58(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 420e8e <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x7e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:218-221 |
Module | exec |
nb instructions | 81 |
nb uops | 91 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.17 cycles |
front end | 15.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.43-15.14 |
Stall cycles | 0.00-0.37 |
Front-end | 15.17 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 9% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
INC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 421313 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x3(%RBX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 421313 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 421333 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x523> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R11,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 421313 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x58(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 420e8e <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x7e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 3.45 | 1.1 |
▼Loop 138 - advec_mom.cpp:220-221 - exec– | 0.01 | 0.01 |
○Loop 139 - advec_mom.cpp:221-221 - exec | 3.44 | 1.1 |