Function: _Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0 | Module: exec | Source: ideal_gas.cpp:37-45 [...] | Coverage: 4.86% |
---|
Function: _Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0 | Module: exec | Source: ideal_gas.cpp:37-45 [...] | Coverage: 4.86% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/ideal_gas.cpp: 37 - 45 |
-------------------------------------------------------------------------------- |
37: #pragma omp parallel for simd collapse(2) |
38: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
39: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
40: double v = 1.0 / density(i, j); |
41: pressure(i, j) = (1.4 - 1.0) * density(i, j) * energy(i, j); |
42: double pressurebyenergy = (1.4 - 1.0) * density(i, j); |
43: double pressurebyvolume = -density(i, j) * pressure(i, j); |
44: double sound_speed_squared = v * v * (pressure(i, j) * pressurebyenergy - pressurebyvolume); |
45: soundspeed(i, j) = std::sqrt(sound_speed_squared); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x450bc0 PUSH %RBP |
0x450bc1 MOV %RSP,%RBP |
0x450bc4 PUSH %R15 |
0x450bc6 PUSH %R14 |
0x450bc8 PUSH %R13 |
0x450bca PUSH %R12 |
0x450bcc PUSH %RBX |
0x450bcd AND $-0x40,%RSP |
0x450bd1 ADD $-0x80,%RSP |
0x450bd5 MOV 0x28(%RDI),%EAX |
0x450bd8 MOV 0x2c(%RDI),%EDX |
0x450bdb MOV 0x20(%RDI),%EBX |
0x450bde MOV 0x24(%RDI),%ECX |
0x450be1 ADD $0x2,%EDX |
0x450be4 LEA 0x1(%RAX),%R15D |
0x450be8 LEA 0x1(%RBX),%ESI |
0x450beb MOV %EDX,0x50(%RSP) |
0x450bef MOV %ESI,0x4c(%RSP) |
0x450bf3 CMP %EDX,%R15D |
0x450bf6 JGE 4511cb |
0x450bfc MOV %EDX,%EBX |
0x450bfe LEA 0x2(%RCX),%R14D |
0x450c02 SUB %R15D,%EBX |
0x450c05 CMP %R14D,%ESI |
0x450c08 JGE 4511cb |
0x450c0e MOV %RDI,%R13 |
0x450c11 MOV %R14D,%EDI |
0x450c14 SUB %ESI,%EDI |
0x450c16 MOV %EDI,0x54(%RSP) |
0x450c1a CALL 4046c0 <omp_get_num_threads@plt> |
0x450c1f MOV %EAX,%R12D |
0x450c22 CALL 4045b0 <omp_get_thread_num@plt> |
0x450c27 XOR %EDX,%EDX |
0x450c29 MOV %EAX,%R8D |
0x450c2c MOV 0x54(%RSP),%EAX |
0x450c30 IMUL %EBX,%EAX |
0x450c33 DIV %R12D |
0x450c36 MOV %EAX,%R12D |
0x450c39 CMP %EDX,%R8D |
0x450c3c JB 4511eb |
0x450c42 IMUL %R12D,%R8D |
0x450c46 LEA (%R8,%RDX,1),%R9D |
0x450c4a LEA (%R12,%R9,1),%R10D |
0x450c4e MOV %R10D,0x48(%RSP) |
0x450c53 CMP %R10D,%R9D |
0x450c56 JAE 4511cb |
0x450c5c MOV %R9D,%EAX |
0x450c5f XOR %EDX,%EDX |
0x450c61 MOV 0x4c(%RSP),%R11D |
0x450c66 MOV 0x8(%R13),%RSI |
0x450c6a DIVL 0x54(%RSP) |
0x450c6e MOV 0x18(%R13),%RBX |
0x450c72 VMOVSD 0x1295e(%RIP),%XMM7 |
0x450c7a VMOVSD 0x1295e(%RIP),%XMM6 |
0x450c82 VMOVSD 0x1295e(%RIP),%XMM5 |
0x450c8a MOV %RSI,0x38(%RSP) |
0x450c8f MOV %RBX,0x28(%RSP) |
0x450c94 VBROADCASTSD %XMM7,%YMM10 |
0x450c99 VBROADCASTSD %XMM6,%YMM9 |
0x450c9e VBROADCASTSD %XMM5,%YMM8 |
0x450ca3 VBROADCASTSD %XMM7,%ZMM4 |
0x450ca9 VBROADCASTSD %XMM6,%ZMM3 |
0x450caf VBROADCASTSD %XMM5,%ZMM2 |
0x450cb5 ADD %EDX,%R11D |
0x450cb8 ADD %R15D,%EAX |
0x450cbb MOV %R14D,%EDX |
0x450cbe MOV (%R13),%R15 |
0x450cc2 MOV 0x10(%R13),%R14 |
0x450cc6 MOV %R11D,0x7c(%RSP) |
0x450ccb SUB %R11D,%EDX |
0x450cce MOVSXD %EAX,%RBX |
0x450cd1 MOV %R15,0x40(%RSP) |
0x450cd6 MOV %R14,0x30(%RSP) |
0x450cdb NOPL (%RAX,%RAX,1) |
(417) 0x450ce0 CMP %EDX,%R12D |
(417) 0x450ce3 CMOVBE %R12D,%EDX |
(417) 0x450ce7 LEA (%R9,%RDX,1),%ECX |
(417) 0x450ceb MOV %ECX,0x78(%RSP) |
(417) 0x450cef CMP %ECX,%R9D |
(417) 0x450cf2 JAE 45119c |
(417) 0x450cf8 MOV 0x40(%RSP),%R13 |
(417) 0x450cfd MOV 0x38(%RSP),%RDI |
(417) 0x450d02 LEA -0x1(%RDX),%EAX |
(417) 0x450d05 MOV 0x30(%RSP),%R12 |
(417) 0x450d0a MOV 0x28(%RSP),%R8 |
(417) 0x450d0f MOV (%R13),%R11 |
(417) 0x450d13 MOV (%RDI),%RSI |
(417) 0x450d16 MOV (%R12),%R10 |
(417) 0x450d1a MOV 0x10(%RDI),%R14 |
(417) 0x450d1e MOV (%R8),%RDI |
(417) 0x450d21 IMUL %RBX,%R11 |
(417) 0x450d25 MOV 0x10(%R13),%R15 |
(417) 0x450d29 IMUL %RBX,%RSI |
(417) 0x450d2d MOV 0x10(%R12),%R13 |
(417) 0x450d32 MOV 0x10(%R8),%R12 |
(417) 0x450d36 IMUL %RBX,%R10 |
(417) 0x450d3a IMUL %RBX,%RDI |
(417) 0x450d3e MOV %R11,0x58(%RSP) |
(417) 0x450d43 MOV %RSI,0x60(%RSP) |
(417) 0x450d48 MOV %R10,0x68(%RSP) |
(417) 0x450d4d MOV %RDI,0x70(%RSP) |
(417) 0x450d52 CMP $0x6,%EAX |
(417) 0x450d55 JBE 4511e0 |
(417) 0x450d5b MOVSXD 0x7c(%RSP),%R8 |
(417) 0x450d60 LEA (%RSI,%R8,1),%RAX |
(417) 0x450d64 LEA (%R11,%R8,1),%R11 |
(417) 0x450d68 LEA (%R10,%R8,1),%R10 |
(417) 0x450d6c ADD %RDI,%R8 |
(417) 0x450d6f LEA (%R15,%R11,8),%RCX |
(417) 0x450d73 MOV %EDX,%EDI |
(417) 0x450d75 SHR $0x3,%EDI |
(417) 0x450d78 LEA (%R14,%RAX,8),%R11 |
(417) 0x450d7c LEA (%R13,%R10,8),%R10 |
(417) 0x450d81 XOR %EAX,%EAX |
(417) 0x450d83 SAL $0x6,%RDI |
(417) 0x450d87 LEA (%R12,%R8,8),%R8 |
(417) 0x450d8b LEA -0x40(%RDI),%RSI |
(417) 0x450d8f SHR $0x6,%RSI |
(417) 0x450d93 INC %RSI |
(417) 0x450d96 AND $0x3,%ESI |
(417) 0x450d99 JE 450e95 |
(417) 0x450d9f CMP $0x1,%RSI |
(417) 0x450da3 JE 450e41 |
(417) 0x450da9 CMP $0x2,%RSI |
(417) 0x450dad JE 450df6 |
(417) 0x450daf VMOVUPD (%RCX),%ZMM1 |
(417) 0x450db5 MOV $0x40,%EAX |
(417) 0x450dba VMULPD (%R11),%ZMM1,%ZMM0 |
(417) 0x450dc0 VDIVPD %ZMM1,%ZMM4,%ZMM11 |
(417) 0x450dc6 VMULPD %ZMM11,%ZMM11,%ZMM14 |
(417) 0x450dcc VMULPD %ZMM3,%ZMM0,%ZMM12 |
(417) 0x450dd2 VMOVUPD %ZMM12,(%R10) |
(417) 0x450dd8 VMULPD (%RCX),%ZMM2,%ZMM13 |
(417) 0x450dde VMULPD %ZMM14,%ZMM13,%ZMM15 |
(417) 0x450de4 VMULPD %ZMM12,%ZMM15,%ZMM1 |
(417) 0x450dea VSQRTPD %ZMM1,%ZMM11 |
(417) 0x450df0 VMOVUPD %ZMM11,(%R8) |
(417) 0x450df6 VMOVUPD (%RCX,%RAX,1),%ZMM0 |
(417) 0x450dfd VMULPD (%R11,%RAX,1),%ZMM0,%ZMM13 |
(417) 0x450e04 VDIVPD %ZMM0,%ZMM4,%ZMM12 |
(417) 0x450e0a VMULPD %ZMM12,%ZMM12,%ZMM1 |
(417) 0x450e10 VMULPD %ZMM3,%ZMM13,%ZMM14 |
(417) 0x450e16 VMOVUPD %ZMM14,(%R10,%RAX,1) |
(417) 0x450e1d VMULPD (%RCX,%RAX,1),%ZMM2,%ZMM15 |
(417) 0x450e24 VMULPD %ZMM1,%ZMM15,%ZMM11 |
(417) 0x450e2a VMULPD %ZMM14,%ZMM11,%ZMM0 |
(417) 0x450e30 VSQRTPD %ZMM0,%ZMM12 |
(417) 0x450e36 VMOVUPD %ZMM12,(%R8,%RAX,1) |
(417) 0x450e3d ADD $0x40,%RAX |
(417) 0x450e41 VMOVUPD (%RCX,%RAX,1),%ZMM13 |
(417) 0x450e48 VMULPD (%R11,%RAX,1),%ZMM13,%ZMM15 |
(417) 0x450e4f VDIVPD %ZMM13,%ZMM4,%ZMM14 |
(417) 0x450e55 VMULPD %ZMM14,%ZMM14,%ZMM11 |
(417) 0x450e5b VMULPD %ZMM3,%ZMM15,%ZMM1 |
(417) 0x450e61 VMOVUPD %ZMM1,(%R10,%RAX,1) |
(417) 0x450e68 VMULPD (%RCX,%RAX,1),%ZMM2,%ZMM0 |
(417) 0x450e6f VMULPD %ZMM11,%ZMM0,%ZMM12 |
(417) 0x450e75 VMULPD %ZMM1,%ZMM12,%ZMM13 |
(417) 0x450e7b VSQRTPD %ZMM13,%ZMM14 |
(417) 0x450e81 VMOVUPD %ZMM14,(%R8,%RAX,1) |
(417) 0x450e88 ADD $0x40,%RAX |
(417) 0x450e8c CMP %RDI,%RAX |
(417) 0x450e8f JE 450fcf |
(418) 0x450e95 VMOVUPD (%RCX,%RAX,1),%ZMM15 |
(418) 0x450e9c VMULPD (%R11,%RAX,1),%ZMM15,%ZMM1 |
(418) 0x450ea3 VDIVPD %ZMM15,%ZMM4,%ZMM11 |
(418) 0x450ea9 VMULPD %ZMM11,%ZMM11,%ZMM13 |
(418) 0x450eaf VMULPD %ZMM3,%ZMM1,%ZMM12 |
(418) 0x450eb5 VMOVUPD %ZMM12,(%R10,%RAX,1) |
(418) 0x450ebc VMULPD (%RCX,%RAX,1),%ZMM2,%ZMM0 |
(418) 0x450ec3 VMULPD %ZMM13,%ZMM0,%ZMM14 |
(418) 0x450ec9 VMULPD %ZMM12,%ZMM14,%ZMM15 |
(418) 0x450ecf VSQRTPD %ZMM15,%ZMM11 |
(418) 0x450ed5 VMOVUPD %ZMM11,(%R8,%RAX,1) |
(418) 0x450edc VMOVUPD 0x40(%RCX,%RAX,1),%ZMM1 |
(418) 0x450ee4 VMULPD 0x40(%R11,%RAX,1),%ZMM1,%ZMM0 |
(418) 0x450eec VDIVPD %ZMM1,%ZMM4,%ZMM12 |
(418) 0x450ef2 VMULPD %ZMM12,%ZMM12,%ZMM15 |
(418) 0x450ef8 VMULPD %ZMM3,%ZMM0,%ZMM13 |
(418) 0x450efe VMOVUPD %ZMM13,0x40(%R10,%RAX,1) |
(418) 0x450f06 VMULPD 0x40(%RCX,%RAX,1),%ZMM2,%ZMM14 |
(418) 0x450f0e VMULPD %ZMM15,%ZMM14,%ZMM11 |
(418) 0x450f14 VMULPD %ZMM13,%ZMM11,%ZMM1 |
(418) 0x450f1a VSQRTPD %ZMM1,%ZMM12 |
(418) 0x450f20 VMOVUPD %ZMM12,0x40(%R8,%RAX,1) |
(418) 0x450f28 VMOVUPD 0x80(%RCX,%RAX,1),%ZMM0 |
(418) 0x450f30 VMULPD 0x80(%R11,%RAX,1),%ZMM0,%ZMM14 |
(418) 0x450f38 VDIVPD %ZMM0,%ZMM4,%ZMM13 |
(418) 0x450f3e VMULPD %ZMM13,%ZMM13,%ZMM11 |
(418) 0x450f44 VMULPD %ZMM3,%ZMM14,%ZMM15 |
(418) 0x450f4a VMOVUPD %ZMM15,0x80(%R10,%RAX,1) |
(418) 0x450f52 VMULPD 0x80(%RCX,%RAX,1),%ZMM2,%ZMM1 |
(418) 0x450f5a VMULPD %ZMM11,%ZMM1,%ZMM12 |
(418) 0x450f60 VMULPD %ZMM15,%ZMM12,%ZMM0 |
(418) 0x450f66 VSQRTPD %ZMM0,%ZMM13 |
(418) 0x450f6c VMOVUPD %ZMM13,0x80(%R8,%RAX,1) |
(418) 0x450f74 VMOVUPD 0xc0(%RCX,%RAX,1),%ZMM14 |
(418) 0x450f7c VMULPD 0xc0(%R11,%RAX,1),%ZMM14,%ZMM1 |
(418) 0x450f84 VDIVPD %ZMM14,%ZMM4,%ZMM15 |
(418) 0x450f8a VMULPD %ZMM15,%ZMM15,%ZMM11 |
(418) 0x450f90 VMULPD %ZMM3,%ZMM1,%ZMM12 |
(418) 0x450f96 VMOVUPD %ZMM12,0xc0(%R10,%RAX,1) |
(418) 0x450f9e VMULPD 0xc0(%RCX,%RAX,1),%ZMM2,%ZMM0 |
(418) 0x450fa6 VMULPD %ZMM11,%ZMM0,%ZMM13 |
(418) 0x450fac VMULPD %ZMM12,%ZMM13,%ZMM14 |
(418) 0x450fb2 VSQRTPD %ZMM14,%ZMM15 |
(418) 0x450fb8 VMOVUPD %ZMM15,0xc0(%R8,%RAX,1) |
(418) 0x450fc0 ADD $0x100,%RAX |
(418) 0x450fc6 CMP %RDI,%RAX |
(418) 0x450fc9 JNE 450e95 |
(417) 0x450fcf MOV 0x7c(%RSP),%R11D |
(417) 0x450fd4 MOV %EDX,%ECX |
(417) 0x450fd6 AND $-0x8,%ECX |
(417) 0x450fd9 ADD %ECX,%R9D |
(417) 0x450fdc LEA (%RCX,%R11,1),%ESI |
(417) 0x450fe0 TEST $0x7,%DL |
(417) 0x450fe3 JE 451197 |
(417) 0x450fe9 SUB %ECX,%EDX |
(417) 0x450feb LEA -0x1(%RDX),%R10D |
(417) 0x450fef CMP $0x2,%R10D |
(417) 0x450ff3 JBE 451079 |
(417) 0x450ff9 MOVSXD 0x7c(%RSP),%RAX |
(417) 0x450ffe MOV 0x58(%RSP),%R8 |
(417) 0x451003 MOV 0x60(%RSP),%R11 |
(417) 0x451008 MOV 0x70(%RSP),%RDI |
(417) 0x45100d ADD %RAX,%R8 |
(417) 0x451010 ADD %RCX,%R8 |
(417) 0x451013 ADD %RAX,%RDI |
(417) 0x451016 LEA (%R15,%R8,8),%R10 |
(417) 0x45101a LEA (%R11,%RAX,1),%R8 |
(417) 0x45101e MOV 0x68(%RSP),%R11 |
(417) 0x451023 ADD %RCX,%RDI |
(417) 0x451026 VMOVUPD (%R10),%YMM1 |
(417) 0x45102b ADD %RCX,%R8 |
(417) 0x45102e ADD %R11,%RAX |
(417) 0x451031 VDIVPD %YMM1,%YMM10,%YMM12 |
(417) 0x451035 ADD %RCX,%RAX |
(417) 0x451038 VMULPD (%R14,%R8,8),%YMM1,%YMM0 |
(417) 0x45103e VMULPD %YMM9,%YMM0,%YMM13 |
(417) 0x451043 VMOVUPD %YMM13,(%R13,%RAX,8) |
(417) 0x45104a VMULPD (%R10),%YMM8,%YMM14 |
(417) 0x45104f VMULPD %YMM12,%YMM12,%YMM11 |
(417) 0x451054 VMULPD %YMM11,%YMM14,%YMM15 |
(417) 0x451059 VMULPD %YMM13,%YMM15,%YMM1 |
(417) 0x45105e VSQRTPD %YMM1,%YMM12 |
(417) 0x451062 VMOVUPD %YMM12,(%R12,%RDI,8) |
(417) 0x451068 TEST $0x3,%DL |
(417) 0x45106b JE 451197 |
(417) 0x451071 AND $-0x4,%EDX |
(417) 0x451074 ADD %EDX,%R9D |
(417) 0x451077 ADD %EDX,%ESI |
(417) 0x451079 MOV 0x58(%RSP),%RDI |
(417) 0x45107e MOVSXD %ESI,%RAX |
(417) 0x451081 MOV 0x60(%RSP),%R11 |
(417) 0x451086 MOV 0x68(%RSP),%R10 |
(417) 0x45108b LEA (%RDI,%RAX,1),%RDX |
(417) 0x45108f LEA (%R11,%RAX,1),%RCX |
(417) 0x451093 LEA (%R15,%RDX,8),%R8 |
(417) 0x451097 LEA (%R10,%RAX,1),%RDX |
(417) 0x45109b VMOVSD (%R8),%XMM0 |
(417) 0x4510a0 VDIVSD %XMM0,%XMM7,%XMM13 |
(417) 0x4510a4 VMULSD (%R14,%RCX,8),%XMM0,%XMM14 |
(417) 0x4510aa MOV 0x78(%RSP),%ECX |
(417) 0x4510ae VMULSD %XMM6,%XMM14,%XMM15 |
(417) 0x4510b2 VMOVSD %XMM15,(%R13,%RDX,8) |
(417) 0x4510b9 LEA 0x1(%R9),%EDX |
(417) 0x4510bd VMULSD (%R8),%XMM5,%XMM1 |
(417) 0x4510c2 MOV 0x70(%RSP),%R8 |
(417) 0x4510c7 ADD %R8,%RAX |
(417) 0x4510ca VMULSD %XMM13,%XMM13,%XMM11 |
(417) 0x4510cf VMULSD %XMM11,%XMM1,%XMM12 |
(417) 0x4510d4 VMULSD %XMM15,%XMM12,%XMM0 |
(417) 0x4510d9 VSQRTSD %XMM0,%XMM0,%XMM0 |
(417) 0x4510dd VMOVSD %XMM0,(%R12,%RAX,8) |
(417) 0x4510e3 LEA 0x1(%RSI),%EAX |
(417) 0x4510e6 CMP %ECX,%EDX |
(417) 0x4510e8 JAE 451197 |
(417) 0x4510ee CLTQ |
(417) 0x4510f0 ADD $0x2,%R9D |
(417) 0x4510f4 ADD $0x2,%ESI |
(417) 0x4510f7 LEA (%RDI,%RAX,1),%RDX |
(417) 0x4510fb LEA (%R11,%RAX,1),%RCX |
(417) 0x4510ff LEA (%R15,%RDX,8),%RDX |
(417) 0x451103 VMOVSD (%RDX),%XMM13 |
(417) 0x451107 VDIVSD %XMM13,%XMM7,%XMM14 |
(417) 0x45110c VMULSD (%R14,%RCX,8),%XMM13,%XMM15 |
(417) 0x451112 LEA (%R10,%RAX,1),%RCX |
(417) 0x451116 ADD %R8,%RAX |
(417) 0x451119 VMULSD %XMM6,%XMM15,%XMM1 |
(417) 0x45111d VMOVSD %XMM1,(%R13,%RCX,8) |
(417) 0x451124 VMULSD (%RDX),%XMM5,%XMM12 |
(417) 0x451128 VMULSD %XMM14,%XMM14,%XMM11 |
(417) 0x45112d VMULSD %XMM11,%XMM12,%XMM0 |
(417) 0x451132 VMULSD %XMM1,%XMM0,%XMM13 |
(417) 0x451136 VSQRTSD %XMM13,%XMM13,%XMM13 |
(417) 0x45113b VMOVSD %XMM13,(%R12,%RAX,8) |
(417) 0x451141 MOV 0x78(%RSP),%EAX |
(417) 0x451145 CMP %EAX,%R9D |
(417) 0x451148 JAE 451197 |
(417) 0x45114a MOVSXD %ESI,%R9 |
(417) 0x45114d ADD %R9,%RDI |
(417) 0x451150 ADD %R9,%R11 |
(417) 0x451153 ADD %R9,%R10 |
(417) 0x451156 ADD %R9,%R8 |
(417) 0x451159 LEA (%R15,%RDI,8),%R15 |
(417) 0x45115d VMOVSD (%R15),%XMM14 |
(417) 0x451162 VDIVSD %XMM14,%XMM7,%XMM15 |
(417) 0x451167 VMULSD (%R14,%R11,8),%XMM14,%XMM1 |
(417) 0x45116d VMULSD %XMM6,%XMM1,%XMM12 |
(417) 0x451171 VMOVSD %XMM12,(%R13,%R10,8) |
(417) 0x451178 VMULSD (%R15),%XMM5,%XMM0 |
(417) 0x45117d VMULSD %XMM15,%XMM15,%XMM11 |
(417) 0x451182 VMULSD %XMM11,%XMM0,%XMM13 |
(417) 0x451187 VMULSD %XMM12,%XMM13,%XMM14 |
(417) 0x45118c VSQRTSD %XMM14,%XMM14,%XMM14 |
(417) 0x451191 VMOVSD %XMM14,(%R12,%R8,8) |
(417) 0x451197 MOV 0x78(%RSP),%R9D |
(417) 0x45119c INC %RBX |
(417) 0x45119f LEA (%RBX),%R14D |
(417) 0x4511a2 CMP %R14D,0x50(%RSP) |
(417) 0x4511a7 JLE 4511c8 |
(417) 0x4511a9 MOV 0x48(%RSP),%R12D |
(417) 0x4511ae MOV 0x4c(%RSP),%R13D |
(417) 0x4511b3 MOV 0x54(%RSP),%EDX |
(417) 0x4511b7 MOV %R13D,0x7c(%RSP) |
(417) 0x4511bc SUB %R9D,%R12D |
(417) 0x4511bf JMP 450ce0 |
0x4511c4 NOPL (%RAX) |
0x4511c8 VZEROUPPER |
0x4511cb LEA -0x28(%RBP),%RSP |
0x4511cf POP %RBX |
0x4511d0 POP %R12 |
0x4511d2 POP %R13 |
0x4511d4 POP %R14 |
0x4511d6 POP %R15 |
0x4511d8 POP %RBP |
0x4511d9 RET |
0x4511da NOPW (%RAX,%RAX,1) |
(417) 0x4511e0 MOV 0x7c(%RSP),%ESI |
(417) 0x4511e4 XOR %ECX,%ECX |
(417) 0x4511e6 JMP 450fe9 |
0x4511eb INC %R12D |
0x4511ee XOR %EDX,%EDX |
0x4511f0 JMP 450c42 |
0x4511f5 NOP |
0x4511f6 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | ideal_gas.cpp:37-45 |
Module | exec |
nb instructions | 90 |
nb uops | 100 |
loop length | 337 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 3 |
used zmm registers | 3 |
nb stack references | 10 |
micro-operation queue | 16.67 cycles |
front end | 16.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.30 | 8.00 | 7.00 | 7.00 | 8.50 | 7.20 | 7.30 | 8.50 | 8.50 | 8.50 | 7.20 | 7.00 |
cycles | 7.30 | 11.73 | 7.00 | 7.00 | 8.50 | 7.20 | 7.30 | 8.50 | 8.50 | 8.50 | 7.20 | 7.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.92-16.01 |
Stall cycles | 0.00 |
Front-end | 16.67 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 16.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4511cb <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4511cb <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x54(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4511eb <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x62b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %R12D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%R12,%R9,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R10D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4511cb <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x4c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x54(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1295e(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1295e(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1295e(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM7,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11D,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R11D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%RBX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 450c42 <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x82> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | ideal_gas.cpp:37-45 |
Module | exec |
nb instructions | 90 |
nb uops | 100 |
loop length | 337 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 3 |
used zmm registers | 3 |
nb stack references | 10 |
micro-operation queue | 16.67 cycles |
front end | 16.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.30 | 8.00 | 7.00 | 7.00 | 8.50 | 7.20 | 7.30 | 8.50 | 8.50 | 8.50 | 7.20 | 7.00 |
cycles | 7.30 | 11.73 | 7.00 | 7.00 | 8.50 | 7.20 | 7.30 | 8.50 | 8.50 | 8.50 | 7.20 | 7.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.92-16.01 |
Stall cycles | 0.00 |
Front-end | 16.67 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 16.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4511cb <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4511cb <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x54(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4511eb <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x62b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %R12D,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%R12,%R9,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R10D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4511cb <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x4c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x54(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1295e(%RIP),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1295e(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x1295e(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD %XMM7,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11D,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R11D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%RBX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 450c42 <_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0+0x82> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16ideal_gas_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_._omp_fn.0.lto_priv.0– | 4.86 | 3.63 |
▼Loop 417 - ideal_gas.cpp:40-45 - exec– | 0.01 | 0.01 |
○Loop 418 - ideal_gas.cpp:40-45 - exec | 4.85 | 3.62 |