Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage: 3.47% |
---|
Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage: 3.47% |
---|
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/stl_algobase.h: 238 - 238 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 65 - 110 |
-------------------------------------------------------------------------------- |
65: #pragma omp parallel for simd collapse(2) |
66: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
67: for (int i = (x_min + 1); i < (x_max + 2 + 2); i++) |
68: ({ |
69: int upwind, donor, downwind, dif; |
70: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
71: if (vol_flux_x(i, j) > 0.0) { |
72: upwind = i - 2; |
73: donor = i - 1; |
74: downwind = i; |
75: dif = donor; |
76: } else { |
77: upwind = std::min(i + 1, x_max + 2); |
78: donor = i; |
79: downwind = i - 1; |
80: dif = upwind; |
81: } |
82: sigmat = std::fabs(vol_flux_x(i, j)) / pre_vol(donor, j); |
83: sigma3 = (1.0 + sigmat) * (vertexdx[i] / vertexdx[dif]); |
84: sigma4 = 2.0 - sigmat; |
85: sigmav = sigmat; |
86: diffuw = density1(donor, j) - density1(upwind, j); |
87: diffdw = density1(downwind, j) - density1(donor, j); |
88: wind = 1.0; |
89: if (diffdw <= 0.0) wind = -1.0; |
90: if (diffuw * diffdw > 0.0) { |
91: limiter = (1.0 - sigmav) * wind * |
92: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
93: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
94: } else { |
95: limiter = 0.0; |
96: } |
97: mass_flux_x(i, j) = vol_flux_x(i, j) * (density1(donor, j) + limiter); |
98: sigmam = std::fabs(mass_flux_x(i, j)) / (density1(donor, j) * pre_vol(donor, j)); |
99: diffuw = energy1(donor, j) - energy1(upwind, j); |
100: diffdw = energy1(downwind, j) - energy1(donor, j); |
101: wind = 1.0; |
102: if (diffdw <= 0.0) wind = -1.0; |
103: if (diffuw * diffdw > 0.0) { |
104: limiter = (1.0 - sigmam) * wind * |
105: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
106: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
107: } else { |
108: limiter = 0.0; |
109: } |
110: ener_flux(i, j) = mass_flux_x(i, j) * (energy1(donor, j) + limiter); |
0x425c20 PUSH %RBP |
0x425c21 MOV %RSP,%RBP |
0x425c24 PUSH %R15 |
0x425c26 PUSH %R14 |
0x425c28 PUSH %R13 |
0x425c2a PUSH %R12 |
0x425c2c MOV %RDI,%R12 |
0x425c2f PUSH %RBX |
0x425c30 AND $-0x40,%RSP |
0x425c34 SUB $0x1c0,%RSP |
0x425c3b MOV 0x40(%RDI),%EAX |
0x425c3e MOV 0x44(%RDI),%EDX |
0x425c41 MOV 0x3c(%RDI),%EBX |
0x425c44 MOV 0x38(%RDI),%EDI |
0x425c47 ADD $0x2,%EDX |
0x425c4a LEA 0x1(%RAX),%R13D |
0x425c4e INC %EDI |
0x425c50 MOV %EDX,0x54(%RSP) |
0x425c54 MOV %EDI,0x50(%RSP) |
0x425c58 CMP %EDX,%R13D |
0x425c5b JGE 4265e3 |
0x425c61 MOV %EDX,%R14D |
0x425c64 LEA 0x4(%RBX),%R15D |
0x425c68 SUB %R13D,%R14D |
0x425c6b CMP %R15D,%EDI |
0x425c6e JGE 4265e3 |
0x425c74 MOV %R15D,%ECX |
0x425c77 SUB %EDI,%ECX |
0x425c79 MOV %ECX,0x1b8(%RSP) |
0x425c80 CALL 4046c0 <omp_get_num_threads@plt> |
0x425c85 MOV %EAX,0x1bc(%RSP) |
0x425c8c CALL 4045b0 <omp_get_thread_num@plt> |
0x425c91 XOR %EDX,%EDX |
0x425c93 MOV %EAX,%ESI |
0x425c95 MOV 0x1b8(%RSP),%EAX |
0x425c9c IMUL %R14D,%EAX |
0x425ca0 DIVL 0x1bc(%RSP) |
0x425ca7 MOV %EAX,%ECX |
0x425ca9 CMP %EDX,%ESI |
0x425cab JB 426647 |
0x425cb1 IMUL %ECX,%ESI |
0x425cb4 LEA (%RSI,%RDX,1),%EAX |
0x425cb7 LEA (%RCX,%RAX,1),%R8D |
0x425cbb MOV %EAX,0x1a4(%RSP) |
0x425cc2 MOV %R8D,0x4c(%RSP) |
0x425cc7 CMP %R8D,%EAX |
0x425cca JAE 4265e3 |
0x425cd0 XOR %EDX,%EDX |
0x425cd2 MOV 0x50(%RSP),%R9D |
0x425cd7 MOV 0x20(%R12),%R11 |
0x425cdc LEA 0x2(%RBX),%EBX |
0x425cdf DIVL 0x1b8(%RSP) |
0x425ce6 MOV (%R12),%RDI |
0x425cea MOV 0x18(%R12),%R14 |
0x425cef MOV %EBX,0x194(%RSP) |
0x425cf6 MOV 0x10(%R12),%RSI |
0x425cfb VMOVSD 0x3cd93(%RIP),%XMM25 |
0x425d05 MOV %R11,0x38(%RSP) |
0x425d0a MOV %RDI,0x28(%RSP) |
0x425d0f MOV %R14,0x18(%RSP) |
0x425d14 MOV %RSI,0x10(%RSP) |
0x425d19 VMOVD %EBX,%XMM3 |
0x425d1d VPBROADCASTD %XMM3,%ZMM0 |
0x425d23 VMOVDQA32 %ZMM0,0x80(%RSP) |
0x425d2b ADD %EDX,%R9D |
0x425d2e LEA (%RAX,%R13,1),%R10D |
0x425d32 MOV %R15D,%EDX |
0x425d35 MOV 0x28(%R12),%R13 |
0x425d3a MOV 0x8(%R12),%R15 |
0x425d3f MOV 0x30(%R12),%R12 |
0x425d44 MOVSXD %R10D,%R8 |
0x425d47 MOV %R9D,0x1a0(%RSP) |
0x425d4f MOV %R10D,0x48(%RSP) |
0x425d54 SUB %R9D,%EDX |
0x425d57 MOV %R13,0x30(%RSP) |
0x425d5c MOV %R15,0x20(%RSP) |
0x425d61 MOV %R12,0x8(%RSP) |
0x425d66 MOV %R8,0x40(%RSP) |
0x425d6b MOV %R8,0x198(%RSP) |
0x425d73 NOPL (%RAX,%RAX,1) |
(109) 0x425d78 CMP %EDX,%ECX |
(109) 0x425d7a CMOVBE %ECX,%EDX |
(109) 0x425d7d MOV 0x1a4(%RSP),%ECX |
(109) 0x425d84 MOV %EDX,0x190(%RSP) |
(109) 0x425d8b ADD %ECX,%EDX |
(109) 0x425d8d MOV %EDX,0x1bc(%RSP) |
(109) 0x425d94 CMP %EDX,%ECX |
(109) 0x425d96 JAE 42658f |
(109) 0x425d9c MOV 0x38(%RSP),%RAX |
(109) 0x425da1 MOV 0x30(%RSP),%RDX |
(109) 0x425da6 MOV 0x18(%RSP),%R12 |
(109) 0x425dab MOV 0x28(%RSP),%R13 |
(109) 0x425db0 MOV 0x10(%RDX),%R9 |
(109) 0x425db4 MOV 0x10(%RAX),%R11 |
(109) 0x425db8 MOV (%RAX),%RSI |
(109) 0x425dbb MOV 0x20(%RSP),%R14 |
(109) 0x425dc0 MOV 0x10(%RSP),%RAX |
(109) 0x425dc5 MOV 0x198(%RSP),%R10 |
(109) 0x425dcd MOV %R9,0x1b0(%RSP) |
(109) 0x425dd5 MOV (%RDX),%RBX |
(109) 0x425dd8 MOV (%R12),%RCX |
(109) 0x425ddc MOV %R11,0x78(%RSP) |
(109) 0x425de1 MOV 0x8(%R13),%RDI |
(109) 0x425de5 MOV 0x10(%R14),%R15 |
(109) 0x425de9 IMUL %R10,%RSI |
(109) 0x425ded MOV 0x8(%RSP),%R13 |
(109) 0x425df2 MOV (%R14),%R14 |
(109) 0x425df5 IMUL %R10,%RBX |
(109) 0x425df9 MOV (%RAX),%R9 |
(109) 0x425dfc IMUL %R10,%RCX |
(109) 0x425e00 MOV 0x10(%R12),%R8 |
(109) 0x425e05 MOV 0x10(%R13),%RDX |
(109) 0x425e09 IMUL %R10,%R14 |
(109) 0x425e0d MOV %RSI,0x70(%RSP) |
(109) 0x425e12 IMUL %R10,%R9 |
(109) 0x425e16 MOV %R8,0x188(%RSP) |
(109) 0x425e1e MOV 0x10(%RAX),%R8 |
(109) 0x425e22 IMUL (%R13),%R10 |
(109) 0x425e27 MOV 0x190(%RSP),%R13D |
(109) 0x425e2f MOV %RBX,0x1a8(%RSP) |
(109) 0x425e37 MOV %RCX,0x68(%RSP) |
(109) 0x425e3c LEA -0x1(%R13),%R12D |
(109) 0x425e40 MOV %RDX,0x180(%RSP) |
(109) 0x425e48 MOV %R10,0x60(%RSP) |
(109) 0x425e4d CMP $0xe,%R12D |
(109) 0x425e51 JBE 426361 |
(109) 0x425e57 MOVSXD 0x1a0(%RSP),%RAX |
(109) 0x425e5f SHR $0x4,%R13D |
(109) 0x425e63 VPBROADCASTD 0x1a0(%RSP),%ZMM1 |
(109) 0x425e6b LEA (%R15,%R14,8),%RDX |
(109) 0x425e6f KXNORB %K1,%K1,%K1 |
(109) 0x425e73 SAL $0x7,%R13 |
(109) 0x425e77 VBROADCASTSD 0x3cbff(%RIP),%ZMM3 |
(109) 0x425e81 VBROADCASTSD 0x3cc0d(%RIP),%ZMM9 |
(109) 0x425e8b VXORPD %XMM8,%XMM8,%XMM8 |
(109) 0x425e90 LEA (%RSI,%RAX,1),%RSI |
(109) 0x425e94 MOV %R13,0x58(%RSP) |
(109) 0x425e99 MOV $0x10,%R13D |
(109) 0x425e9f ADD %RAX,%RCX |
(109) 0x425ea2 LEA (%R11,%RSI,8),%R12 |
(109) 0x425ea6 MOV 0x1b0(%RSP),%R11 |
(109) 0x425eae VPBROADCASTD %R13D,%ZMM5 |
(109) 0x425eb4 MOV $-0x2,%R13D |
(109) 0x425eba VPBROADCASTD %R13D,%ZMM2 |
(109) 0x425ec0 MOV $0x1,%R13D |
(109) 0x425ec6 VPADDD 0x3ca30(%RIP),%ZMM1,%ZMM27 |
(109) 0x425ed0 VBROADCASTSD 0x3cbb6(%RIP),%ZMM26 |
(109) 0x425eda LEA (%R11,%RBX,8),%RSI |
(109) 0x425ede MOV 0x188(%RSP),%R11 |
(109) 0x425ee6 LEA (%RDI,%RAX,8),%RBX |
(109) 0x425eea ADD %R10,%RAX |
(109) 0x425eed MOV 0x180(%RSP),%R10 |
(109) 0x425ef5 VPBROADCASTD %R13D,%ZMM4 |
(109) 0x425efb VBROADCASTSD 0x3ca3b(%RIP),%ZMM24 |
(109) 0x425f05 VMOVDQA32 %ZMM5,0x140(%RSP) |
(109) 0x425f0d VBROADCASTSD 0x3ca69(%RIP),%ZMM23 |
(109) 0x425f17 MOV 0x58(%RSP),%R13 |
(109) 0x425f1c LEA (%R11,%RCX,8),%R11 |
(109) 0x425f20 VMOVDQA32 %ZMM2,0x100(%RSP) |
(109) 0x425f28 LEA (%R10,%RAX,8),%R10 |
(109) 0x425f2c LEA (%R8,%R9,8),%RCX |
(109) 0x425f30 XOR %EAX,%EAX |
(109) 0x425f32 VMOVDQA32 %ZMM4,0xc0(%RSP) |
(109) 0x425f3a NOPW (%RAX,%RAX,1) |
(111) 0x425f40 VMOVDQA32 %ZMM27,%ZMM10 |
(111) 0x425f46 VMOVUPD 0x40(%R12,%RAX,1),%ZMM4 |
(111) 0x425f4e VMOVUPD (%R12,%RAX,1),%ZMM11 |
(111) 0x425f55 KMOVB %K1,%K4 |
(111) 0x425f59 KMOVB %K1,%K5 |
(111) 0x425f5d KMOVB %K1,%K6 |
(111) 0x425f61 VPADDD 0xc0(%RSP),%ZMM10,%ZMM14 |
(111) 0x425f69 VPADDD 0x3ca4d(%RIP),%ZMM10,%ZMM7 |
(111) 0x425f73 VEXTRACTI32X8 $0x1,%ZMM10,%YMM6 |
(111) 0x425f7a KMOVB %K1,%K7 |
(111) 0x425f7e VCMPPD $0xe,%ZMM8,%ZMM11,%K3 |
(111) 0x425f85 VCMPPD $0xe,%ZMM8,%ZMM4,%K2 |
(111) 0x425f8c VPMOVSXDQ %YMM10,%ZMM12 |
(111) 0x425f92 VPMINSD 0x80(%RSP),%ZMM14,%ZMM0 |
(111) 0x425f9a VPMOVSXDQ %YMM6,%ZMM13 |
(111) 0x425fa0 VPADDD 0x100(%RSP),%ZMM10,%ZMM15 |
(111) 0x425fa8 VEXTRACTI32X8 $0x1,%ZMM7,%YMM1 |
(111) 0x425faf VMOVDQA64 %ZMM12,%ZMM18 |
(111) 0x425fb5 VMOVDQA64 %ZMM13,%ZMM17 |
(111) 0x425fbb VPMOVSXDQ %YMM0,%ZMM14 |
(111) 0x425fc1 VEXTRACTI32X8 $0x1,%ZMM0,%YMM2 |
(111) 0x425fc8 VPMOVSXDQ %YMM7,%ZMM18{%K3} |
(111) 0x425fce VMOVDQA64 %ZMM14,%ZMM11 |
(111) 0x425fd4 VPMOVSXDQ %YMM1,%ZMM17{%K2} |
(111) 0x425fda VPMOVSXDQ %YMM2,%ZMM0 |
(111) 0x425fe0 VMOVUPD (%RBX,%RAX,1),%ZMM2 |
(111) 0x425fe7 VPMOVSXDQ %YMM7,%ZMM11{%K3} |
(111) 0x425fed VPMOVSXDQ %YMM7,%ZMM22 |
(111) 0x425ff3 VPMOVSXDQ %YMM1,%ZMM21 |
(111) 0x425ff9 VANDPD (%R12,%RAX,1),%ZMM3,%ZMM7 |
(111) 0x426000 VMOVDQA64 %ZMM12,%ZMM22{%K3} |
(111) 0x426006 VMOVDQA64 %ZMM13,%ZMM21{%K2} |
(111) 0x42600c VPMOVSXDQ %YMM15,%ZMM14{%K3} |
(111) 0x426012 VGATHERQPD (%RSI,%ZMM18,8),%ZMM13{%K4} |
(111) 0x426019 VEXTRACTI32X8 $0x1,%ZMM15,%YMM12 |
(111) 0x426020 KMOVB %K1,%K3 |
(111) 0x426024 KMOVB %K1,%K4 |
(111) 0x426028 VGATHERQPD (%RSI,%ZMM17,8),%ZMM15{%K5} |
(111) 0x42602f VANDPD %ZMM3,%ZMM4,%ZMM5 |
(111) 0x426035 VMOVDQA64 %ZMM0,%ZMM6 |
(111) 0x42603b VDIVPD %ZMM13,%ZMM7,%ZMM10 |
(111) 0x426041 KMOVB %K1,%K5 |
(111) 0x426045 VDIVPD %ZMM15,%ZMM5,%ZMM7 |
(111) 0x42604b VPMOVSXDQ %YMM1,%ZMM6{%K2} |
(111) 0x426051 VADDPD %ZMM9,%ZMM7,%ZMM13 |
(111) 0x426057 VGATHERQPD (%RDI,%ZMM11,8),%ZMM4{%K6} |
(111) 0x42605e KMOVB %K1,%K6 |
(111) 0x426062 VGATHERQPD (%RDI,%ZMM6,8),%ZMM1{%K7} |
(111) 0x426069 VADDPD %ZMM9,%ZMM10,%ZMM11 |
(111) 0x42606f VMOVUPD 0x40(%RBX,%RAX,1),%ZMM6 |
(111) 0x426077 VPMOVSXDQ %YMM12,%ZMM0{%K2} |
(111) 0x42607d VDIVPD %ZMM4,%ZMM2,%ZMM16 |
(111) 0x426083 KMOVB %K1,%K2 |
(111) 0x426087 VGATHERQPD (%RDX,%ZMM14,8),%ZMM15{%K4} |
(111) 0x42608e VGATHERQPD (%RDX,%ZMM17,8),%ZMM5{%K2} |
(111) 0x426095 VGATHERQPD (%RDX,%ZMM0,8),%ZMM4{%K5} |
(111) 0x42609c KMOVB %K1,%K7 |
(111) 0x4260a0 VDIVPD %ZMM1,%ZMM6,%ZMM12 |
(111) 0x4260a6 VGATHERQPD (%RDX,%ZMM18,8),%ZMM6{%K3} |
(111) 0x4260ad VSUBPD %ZMM7,%ZMM26,%ZMM20 |
(111) 0x4260b3 VSUBPD %ZMM10,%ZMM26,%ZMM2 |
(111) 0x4260b9 VMULPD %ZMM13,%ZMM12,%ZMM1 |
(111) 0x4260bf VGATHERQPD (%RDX,%ZMM21,8),%ZMM13{%K7} |
(111) 0x4260c6 VGATHERQPD (%RDX,%ZMM22,8),%ZMM12{%K6} |
(111) 0x4260cd KMOVB %K1,%K7 |
(111) 0x4260d1 KMOVB %K1,%K6 |
(111) 0x4260d5 VMULPD %ZMM11,%ZMM16,%ZMM16 |
(111) 0x4260db VSUBPD %ZMM5,%ZMM13,%ZMM13 |
(111) 0x4260e1 VSUBPD %ZMM15,%ZMM6,%ZMM11 |
(111) 0x4260e7 VSUBPD %ZMM4,%ZMM5,%ZMM15 |
(111) 0x4260ed VSUBPD %ZMM6,%ZMM12,%ZMM12 |
(111) 0x4260f3 VSUBPD %ZMM7,%ZMM9,%ZMM7 |
(111) 0x4260f9 VSUBPD %ZMM10,%ZMM9,%ZMM10 |
(111) 0x4260ff VCMPPD $0x2,%ZMM8,%ZMM13,%K2 |
(111) 0x426106 VPADDD 0x140(%RSP),%ZMM27,%ZMM27 |
(111) 0x42610e VMULPD %ZMM13,%ZMM15,%ZMM28 |
(111) 0x426114 VANDPD %ZMM3,%ZMM13,%ZMM13 |
(111) 0x42611a VANDPD %ZMM3,%ZMM15,%ZMM4 |
(111) 0x426120 VMULPD %ZMM13,%ZMM20,%ZMM19 |
(111) 0x426126 VCMPPD $0x2,%ZMM8,%ZMM12,%K3 |
(111) 0x42612d VMULPD %ZMM12,%ZMM11,%ZMM30 |
(111) 0x426133 VANDPD %ZMM3,%ZMM12,%ZMM12 |
(111) 0x426139 VANDPD %ZMM3,%ZMM11,%ZMM11 |
(111) 0x42613f VMULPD %ZMM12,%ZMM2,%ZMM15 |
(111) 0x426145 VBLENDMPD %ZMM24,%ZMM9,%ZMM31{%K2} |
(111) 0x42614b KMOVB %K1,%K2 |
(111) 0x42614f VBLENDMPD %ZMM24,%ZMM9,%ZMM29{%K3} |
(111) 0x426155 KMOVB %K1,%K3 |
(111) 0x426159 VCMPPD $0xe,%ZMM8,%ZMM28,%K5 |
(111) 0x426160 VFMADD231PD %ZMM4,%ZMM1,%ZMM19 |
(111) 0x426166 VMINPD %ZMM13,%ZMM4,%ZMM4 |
(111) 0x42616c VCMPPD $0xe,%ZMM8,%ZMM30,%K4 |
(111) 0x426173 VFMADD231PD %ZMM11,%ZMM16,%ZMM15 |
(111) 0x426179 VMINPD %ZMM12,%ZMM11,%ZMM11 |
(111) 0x42617f VMULPD %ZMM23,%ZMM19,%ZMM19 |
(111) 0x426185 VMULPD %ZMM23,%ZMM15,%ZMM15 |
(111) 0x42618b VMINPD %ZMM4,%ZMM19,%ZMM13 |
(111) 0x426191 VMULPD %ZMM31,%ZMM7,%ZMM4 |
(111) 0x426197 VMULPD %ZMM29,%ZMM10,%ZMM7 |
(111) 0x42619d VMINPD %ZMM11,%ZMM15,%ZMM12 |
(111) 0x4261a3 VFMADD231PD %ZMM13,%ZMM4,%ZMM5{%K5} |
(111) 0x4261a9 KMOVB %K1,%K5 |
(111) 0x4261ad VFMADD231PD %ZMM12,%ZMM7,%ZMM6{%K4} |
(111) 0x4261b3 KMOVB %K1,%K4 |
(111) 0x4261b7 VMULPD 0x40(%R12,%RAX,1),%ZMM5,%ZMM5 |
(111) 0x4261bf VMULPD (%R12,%RAX,1),%ZMM6,%ZMM6 |
(111) 0x4261c6 VMOVUPD %ZMM5,0x40(%R11,%RAX,1) |
(111) 0x4261ce VMOVUPD %ZMM6,(%R11,%RAX,1) |
(111) 0x4261d5 VGATHERQPD (%RCX,%ZMM18,8),%ZMM10{%K4} |
(111) 0x4261dc VGATHERQPD (%RSI,%ZMM18,8),%ZMM15{%K3} |
(111) 0x4261e3 VGATHERQPD (%RSI,%ZMM17,8),%ZMM13{%K2} |
(111) 0x4261ea KMOVB %K1,%K3 |
(111) 0x4261ee KMOVB %K1,%K2 |
(111) 0x4261f2 VGATHERQPD (%RCX,%ZMM17,8),%ZMM7{%K5} |
(111) 0x4261f9 VGATHERQPD (%RDX,%ZMM18,8),%ZMM12{%K6} |
(111) 0x426200 VGATHERQPD (%RDX,%ZMM17,8),%ZMM11{%K7} |
(111) 0x426207 KMOVB %K1,%K6 |
(111) 0x42620b KMOVB %K1,%K7 |
(111) 0x42620f VMULPD %ZMM15,%ZMM12,%ZMM12 |
(111) 0x426215 VGATHERQPD (%RCX,%ZMM14,8),%ZMM4{%K6} |
(111) 0x42621c VGATHERQPD (%RCX,%ZMM0,8),%ZMM18{%K7} |
(111) 0x426223 VGATHERQPD (%RCX,%ZMM22,8),%ZMM29{%K3} |
(111) 0x42622a VGATHERQPD (%RCX,%ZMM21,8),%ZMM14{%K2} |
(111) 0x426231 VSUBPD %ZMM4,%ZMM10,%ZMM0 |
(111) 0x426237 VSUBPD %ZMM7,%ZMM14,%ZMM4 |
(111) 0x42623d VSUBPD %ZMM10,%ZMM29,%ZMM31 |
(111) 0x426243 VMULPD %ZMM13,%ZMM11,%ZMM11 |
(111) 0x426249 VSUBPD %ZMM18,%ZMM7,%ZMM17 |
(111) 0x42624f VANDPD %ZMM3,%ZMM4,%ZMM28 |
(111) 0x426255 VCMPPD $0x2,%ZMM8,%ZMM4,%K5 |
(111) 0x42625c VANDPD %ZMM3,%ZMM31,%ZMM19 |
(111) 0x426262 VMULPD %ZMM28,%ZMM20,%ZMM20 |
(111) 0x426268 VCMPPD $0x2,%ZMM8,%ZMM31,%K4 |
(111) 0x42626f VANDPD %ZMM3,%ZMM17,%ZMM18 |
(111) 0x426275 VMULPD %ZMM19,%ZMM2,%ZMM2 |
(111) 0x42627b VMULPD %ZMM31,%ZMM0,%ZMM21 |
(111) 0x426281 VANDPD %ZMM3,%ZMM0,%ZMM0 |
(111) 0x426287 VBLENDMPD %ZMM24,%ZMM9,%ZMM22{%K5} |
(111) 0x42628d VMULPD %ZMM4,%ZMM17,%ZMM14 |
(111) 0x426293 VBLENDMPD %ZMM24,%ZMM9,%ZMM30{%K4} |
(111) 0x426299 VFMADD132PD %ZMM18,%ZMM20,%ZMM1 |
(111) 0x42629f VFMADD231PD %ZMM0,%ZMM16,%ZMM2 |
(111) 0x4262a5 VMINPD %ZMM28,%ZMM18,%ZMM16 |
(111) 0x4262ab VMINPD %ZMM19,%ZMM0,%ZMM0 |
(111) 0x4262b1 VCMPPD $0xe,%ZMM8,%ZMM21,%K6 |
(111) 0x4262b8 VCMPPD $0xe,%ZMM8,%ZMM14,%K7 |
(111) 0x4262bf VANDPD %ZMM3,%ZMM5,%ZMM14 |
(111) 0x4262c5 VDIVPD %ZMM11,%ZMM14,%ZMM13 |
(111) 0x4262cb VMULPD %ZMM23,%ZMM1,%ZMM4 |
(111) 0x4262d1 VMULPD %ZMM23,%ZMM2,%ZMM2 |
(111) 0x4262d7 VMINPD %ZMM16,%ZMM4,%ZMM1 |
(111) 0x4262dd VMULPD %ZMM22,%ZMM1,%ZMM4 |
(111) 0x4262e3 VMINPD %ZMM0,%ZMM2,%ZMM1 |
(111) 0x4262e9 VANDPD %ZMM3,%ZMM6,%ZMM0 |
(111) 0x4262ef VDIVPD %ZMM12,%ZMM0,%ZMM15 |
(111) 0x4262f5 VSUBPD %ZMM15,%ZMM9,%ZMM0 |
(111) 0x4262fb VMULPD %ZMM30,%ZMM1,%ZMM2 |
(111) 0x426301 VSUBPD %ZMM13,%ZMM9,%ZMM1 |
(111) 0x426307 VFMADD231PD %ZMM1,%ZMM4,%ZMM7{%K7} |
(111) 0x42630d VFMADD231PD %ZMM0,%ZMM2,%ZMM10{%K6} |
(111) 0x426313 VMULPD %ZMM5,%ZMM7,%ZMM5 |
(111) 0x426319 VMULPD %ZMM6,%ZMM10,%ZMM6 |
(111) 0x42631f VMOVUPD %ZMM5,0x40(%R10,%RAX,1) |
(111) 0x426327 VMOVUPD %ZMM6,(%R10,%RAX,1) |
(111) 0x42632e SUB $-0x80,%RAX |
(111) 0x426332 CMP %RAX,%R13 |
(111) 0x426335 JNE 425f40 |
(109) 0x42633b MOV 0x190(%RSP),%R12D |
(109) 0x426343 MOV %R12D,%ESI |
(109) 0x426346 AND $-0x10,%ESI |
(109) 0x426349 ADD %ESI,0x1a4(%RSP) |
(109) 0x426350 ADD %ESI,0x1a0(%RSP) |
(109) 0x426357 AND $0xf,%R12D |
(109) 0x42635b JE 426581 |
(109) 0x426361 MOV 0x78(%RSP),%RDX |
(109) 0x426366 MOV 0x68(%RSP),%R13 |
(109) 0x42636b VXORPD %XMM12,%XMM12,%XMM12 |
(109) 0x426370 MOV 0x70(%RSP),%RBX |
(109) 0x426375 MOV 0x188(%RSP),%R10 |
(109) 0x42637d MOVSXD 0x1a0(%RSP),%RAX |
(109) 0x426385 MOV 0x180(%RSP),%RSI |
(109) 0x42638d LEA (%R10,%R13,8),%R12 |
(109) 0x426391 LEA (%RDX,%RBX,8),%RCX |
(109) 0x426395 MOV 0x60(%RSP),%RDX |
(109) 0x42639a VMOVSD 0x3c59e(%RIP),%XMM13 |
(109) 0x4263a2 MOV %R12,0x100(%RSP) |
(109) 0x4263aa MOV 0x1a4(%RSP),%R12D |
(109) 0x4263b2 MOV %RCX,0x140(%RSP) |
(109) 0x4263ba LEA (%RSI,%RDX,8),%R13 |
(109) 0x4263be SUB %EAX,%R12D |
(109) 0x4263c1 NOPL (%RAX) |
(110) 0x4263c8 MOV 0x140(%RSP),%R11 |
(110) 0x4263d0 VMOVSD (%R11,%RAX,8),%XMM8 |
(110) 0x4263d6 VCOMISD %XMM12,%XMM8 |
(110) 0x4263db JBE 4265f8 |
(110) 0x4263e1 LEA -0x1(%RAX),%RDX |
(110) 0x4263e5 LEA -0x2(%RAX),%R11D |
(110) 0x4263e9 MOV %RAX,%RSI |
(110) 0x4263ec MOVSXD %R11D,%RCX |
(110) 0x4263ef MOV %RDX,%R10 |
(110) 0x4263f2 VMOVSD (%RDI,%RAX,8),%XMM9 |
(110) 0x4263f7 MOV 0x1a8(%RSP),%RBX |
(110) 0x4263ff VANDPD 0x3c679(%RIP),%XMM8,%XMM3 |
(110) 0x426407 VMOVSD 0x3c681(%RIP),%XMM0 |
(110) 0x42640f VDIVSD (%RDI,%R10,8),%XMM9,%XMM4 |
(110) 0x426415 LEA (%R14,%RDX,1),%R10 |
(110) 0x426419 LEA (%RBX,%RDX,1),%R11 |
(110) 0x42641d MOV 0x1b0(%RSP),%RBX |
(110) 0x426425 LEA (%R15,%R10,8),%R10 |
(110) 0x426429 VMOVSD (%R10),%XMM14 |
(110) 0x42642e LEA (%RBX,%R11,8),%R11 |
(110) 0x426432 LEA (%R14,%RCX,1),%RBX |
(110) 0x426436 VDIVSD (%R11),%XMM3,%XMM2 |
(110) 0x42643b VADDSD %XMM25,%XMM2,%XMM15 |
(110) 0x426441 VMOVSD %XMM25,%XMM25,%XMM3 |
(110) 0x426447 VSUBSD %XMM2,%XMM0,%XMM5 |
(110) 0x42644b VSUBSD (%R15,%RBX,8),%XMM14,%XMM11 |
(110) 0x426451 LEA (%R14,%RSI,1),%RBX |
(110) 0x426455 VMOVSD (%R15,%RBX,8),%XMM10 |
(110) 0x42645b VMULSD %XMM15,%XMM4,%XMM7 |
(110) 0x426460 VSUBSD %XMM14,%XMM10,%XMM1 |
(110) 0x426465 VMULSD %XMM1,%XMM11,%XMM4 |
(110) 0x426469 VCMPSD $0x6,%XMM12,%XMM1,%XMM6 |
(110) 0x42646f VBLENDVPD %XMM6,%XMM3,%XMM13,%XMM9 |
(110) 0x426475 VCOMISD %XMM12,%XMM4 |
(110) 0x42647a JBE 4264b6 |
(110) 0x42647c VANDPD 0x3c5fc(%RIP),%XMM1,%XMM0 |
(110) 0x426484 VANDPD 0x3c5f4(%RIP),%XMM11,%XMM15 |
(110) 0x42648c VSUBSD %XMM2,%XMM25,%XMM2 |
(110) 0x426492 VMULSD %XMM0,%XMM5,%XMM11 |
(110) 0x426496 VMINSD %XMM15,%XMM0,%XMM1 |
(110) 0x42649b VFMADD231SD %XMM15,%XMM7,%XMM11 |
(110) 0x4264a0 VMULSD 0x3c4d8(%RIP),%XMM11,%XMM10 |
(110) 0x4264a8 VMINSD %XMM10,%XMM1,%XMM6 |
(110) 0x4264ad VMULSD %XMM2,%XMM6,%XMM3 |
(110) 0x4264b1 VFMADD231SD %XMM9,%XMM3,%XMM14 |
(110) 0x4264b6 VMULSD %XMM8,%XMM14,%XMM15 |
(110) 0x4264bb MOV 0x100(%RSP),%RBX |
(110) 0x4264c3 ADD %R9,%RDX |
(110) 0x4264c6 ADD %R9,%RSI |
(110) 0x4264c9 ADD %R9,%RCX |
(110) 0x4264cc VMOVSD %XMM25,%XMM25,%XMM14 |
(110) 0x4264d2 VMOVSD %XMM15,(%RBX,%RAX,8) |
(110) 0x4264d7 VMOVSD (%R8,%RDX,8),%XMM6 |
(110) 0x4264dd VMOVSD (%R8,%RSI,8),%XMM8 |
(110) 0x4264e3 VMOVSD (%R10),%XMM11 |
(110) 0x4264e8 VMOVSD (%R11),%XMM1 |
(110) 0x4264ed VSUBSD %XMM6,%XMM8,%XMM0 |
(110) 0x4264f1 VSUBSD (%R8,%RCX,8),%XMM6,%XMM4 |
(110) 0x4264f7 VMULSD %XMM4,%XMM0,%XMM10 |
(110) 0x4264fb VCMPSD $0x6,%XMM12,%XMM0,%XMM9 |
(110) 0x426501 VBLENDVPD %XMM9,%XMM14,%XMM13,%XMM3 |
(110) 0x426507 VCOMISD %XMM12,%XMM10 |
(110) 0x42650c JBE 426620 |
(110) 0x426512 VMULSD %XMM11,%XMM1,%XMM11 |
(110) 0x426517 VANDPD 0x3c561(%RIP),%XMM15,%XMM2 |
(110) 0x42651f VANDPD 0x3c559(%RIP),%XMM0,%XMM0 |
(110) 0x426527 VANDPD 0x3c551(%RIP),%XMM4,%XMM8 |
(110) 0x42652f MOV 0x1bc(%RSP),%R11D |
(110) 0x426537 VMULSD %XMM0,%XMM5,%XMM5 |
(110) 0x42653b VMINSD %XMM8,%XMM0,%XMM14 |
(110) 0x426540 VDIVSD %XMM11,%XMM2,%XMM1 |
(110) 0x426545 VFMADD231SD %XMM8,%XMM7,%XMM5 |
(110) 0x42654a VMULSD 0x3c42e(%RIP),%XMM5,%XMM4 |
(110) 0x426552 VMINSD %XMM4,%XMM14,%XMM10 |
(110) 0x426556 VSUBSD %XMM1,%XMM25,%XMM9 |
(110) 0x42655c VMULSD %XMM10,%XMM9,%XMM7 |
(110) 0x426561 VFMADD132SD %XMM3,%XMM6,%XMM7 |
(110) 0x426566 VMULSD %XMM7,%XMM15,%XMM15 |
(110) 0x42656a VMOVSD %XMM15,(%R13,%RAX,8) |
(110) 0x426571 INC %RAX |
(110) 0x426574 LEA (%R12,%RAX,1),%EDX |
(110) 0x426578 CMP %R11D,%EDX |
(110) 0x42657b JB 4263c8 |
(109) 0x426581 MOV 0x1bc(%RSP),%EAX |
(109) 0x426588 MOV %EAX,0x1a4(%RSP) |
(109) 0x42658f INCQ 0x198(%RSP) |
(109) 0x426597 MOV 0x48(%RSP),%R8D |
(109) 0x42659c MOV 0x40(%RSP),%R9D |
(109) 0x4265a1 SUB %R9D,%R8D |
(109) 0x4265a4 MOV 0x198(%RSP),%RDI |
(109) 0x4265ac ADD %EDI,%R8D |
(109) 0x4265af CMP %R8D,0x54(%RSP) |
(109) 0x4265b4 JLE 4265e0 |
(109) 0x4265b6 MOV 0x4c(%RSP),%ECX |
(109) 0x4265ba MOV 0x1a4(%RSP),%R12D |
(109) 0x4265c2 MOV 0x50(%RSP),%R13D |
(109) 0x4265c7 MOV 0x1b8(%RSP),%EDX |
(109) 0x4265ce SUB %R12D,%ECX |
(109) 0x4265d1 MOV %R13D,0x1a0(%RSP) |
(109) 0x4265d9 JMP 425d78 |
0x4265de XCHG %AX,%AX |
0x4265e0 VZEROUPPER |
0x4265e3 LEA -0x28(%RBP),%RSP |
0x4265e7 POP %RBX |
0x4265e8 POP %R12 |
0x4265ea POP %R13 |
0x4265ec POP %R14 |
0x4265ee POP %R15 |
0x4265f0 POP %RBP |
0x4265f1 RET |
0x4265f2 NOPW (%RAX,%RAX,1) |
(110) 0x4265f8 MOV 0x194(%RSP),%ECX |
(110) 0x4265ff LEA 0x1(%RAX),%EBX |
(110) 0x426602 LEA -0x1(%RAX),%RSI |
(110) 0x426606 MOV %RAX,%RDX |
(110) 0x426609 CMP %ECX,%EBX |
(110) 0x42660b CMOVG %ECX,%EBX |
(110) 0x42660e MOVSXD %EBX,%R10 |
(110) 0x426611 MOV %R10,%RCX |
(110) 0x426614 JMP 4263f2 |
0x426619 NOPL (%RAX) |
(110) 0x426620 VMULSD %XMM15,%XMM6,%XMM7 |
(110) 0x426625 MOV 0x1bc(%RSP),%ECX |
(110) 0x42662c VMOVSD %XMM7,(%R13,%RAX,8) |
(110) 0x426633 INC %RAX |
(110) 0x426636 LEA (%R12,%RAX,1),%ESI |
(110) 0x42663a CMP %ECX,%ESI |
(110) 0x42663c JB 4263c8 |
(109) 0x426642 JMP 426581 |
0x426647 INC %ECX |
0x426649 XOR %EDX,%EDX |
0x42664b JMP 425cb1 |
Path / |
Source file and lines | advec_cell.cpp:65-110 |
Module | exec |
nb instructions | 95 |
nb uops | 106 |
loop length | 386 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 20 |
micro-operation queue | 17.67 cycles |
front end | 17.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.70 | 8.00 | 7.67 | 7.67 | 13.50 | 6.80 | 6.70 | 13.50 | 13.50 | 13.50 | 6.80 | 7.67 |
cycles | 6.70 | 12.13 | 7.67 | 7.67 | 13.50 | 6.80 | 6.70 | 13.50 | 13.50 | 13.50 | 6.80 | 7.67 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 16.72-16.86 |
Stall cycles | 0.00 |
Front-end | 17.67 |
Dispatch | 13.50 |
DIV/SQRT | 12.00 |
Overall L1 | 17.67 |
all | 5% |
load | 0% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 5% |
load | 0% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 11% |
all | 11% |
load | 9% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 11% |
load | 9% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x44(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x3c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RDI),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDX,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4265e3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2.lto_priv.0+0x9c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RBX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R13D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R15D,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4265e3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2.lto_priv.0+0x9c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EDI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,0x1bc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1b8(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R14D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIVL 0x1bc(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 426647 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2.lto_priv.0+0xa27> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RSI,%RDX,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%RAX,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x1a4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8D,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4265e3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2.lto_priv.0+0x9c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX),%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DIVL 0x1b8(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV (%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EBX,0x194(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x3cd93(%RIP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVD %EBX,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %XMM3,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA32 %ZMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R13,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R15D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%R12),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R10D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R9D,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x198(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 425cb1 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2.lto_priv.0+0x91> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Source file and lines | advec_cell.cpp:65-110 |
Module | exec |
nb instructions | 95 |
nb uops | 106 |
loop length | 386 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 1 |
nb stack references | 20 |
micro-operation queue | 17.67 cycles |
front end | 17.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.70 | 8.00 | 7.67 | 7.67 | 13.50 | 6.80 | 6.70 | 13.50 | 13.50 | 13.50 | 6.80 | 7.67 |
cycles | 6.70 | 12.13 | 7.67 | 7.67 | 13.50 | 6.80 | 6.70 | 13.50 | 13.50 | 13.50 | 6.80 | 7.67 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 16.72-16.86 |
Stall cycles | 0.00 |
Front-end | 17.67 |
Dispatch | 13.50 |
DIV/SQRT | 12.00 |
Overall L1 | 17.67 |
all | 5% |
load | 0% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 5% |
load | 0% |
store | 5% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 11% |
all | 11% |
load | 9% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 11% |
load | 9% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x44(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x3c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RDI),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDX,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4265e3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2.lto_priv.0+0x9c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RBX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R13D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R15D,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4265e3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2.lto_priv.0+0x9c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EDI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,0x1bc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1b8(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R14D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIVL 0x1bc(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 426647 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2.lto_priv.0+0xa27> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RSI,%RDX,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%RAX,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x1a4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8D,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4265e3 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2.lto_priv.0+0x9c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX),%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
DIVL 0x1b8(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV (%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EBX,0x194(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x3cd93(%RIP),%XMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVD %EBX,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %XMM3,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA32 %ZMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R13,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R15D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%R12),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R10D,%R8 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R9D,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x198(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 425cb1 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2.lto_priv.0+0x91> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.2.lto_priv.0– | 3.47 | 2.59 |
▼Loop 109 - advec_cell.cpp:71-110 - exec– | 0 | 0 |
○Loop 111 - advec_cell.cpp:71-110 - exec | 3.46 | 2.58 |
○Loop 110 - advec_cell.cpp:71-110 - exec | 0 | 0 |