Function: clover_pack_message_right(global_variables&, int, int, int, int, clover::Buffer2D<double>& ... | Module: exec | Source: pack_kernel.cpp:120-124 [...] | Coverage: 0.03% |
---|
Function: clover_pack_message_right(global_variables&, int, int, int, int, clover::Buffer2D<double>& ... | Module: exec | Source: pack_kernel.cpp:120-124 [...] | Coverage: 0.03% |
---|
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/pack_kernel.cpp: 120 - 124 |
-------------------------------------------------------------------------------- |
120: #pragma omp parallel for simd |
121: for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { |
122: for (int j = 0; j < depth; ++j) { |
123: int index = buffer_offset + j + k * depth; |
124: right_snd[index] = field(x_max + 1 - j, k); |
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42f9e0 PUSH %RBP |
0x42f9e1 MOV %RSP,%RBP |
0x42f9e4 PUSH %R15 |
0x42f9e6 PUSH %R14 |
0x42f9e8 PUSH %R13 |
0x42f9ea PUSH %R12 |
0x42f9ec MOV %RDI,%R12 |
0x42f9ef PUSH %RBX |
0x42f9f0 SUB $0x8,%RSP |
0x42f9f4 MOV 0x1c(%RDI),%EBX |
0x42f9f7 MOV 0x14(%RDI),%R8D |
0x42f9fb SUB %EBX,%R8D |
0x42f9fe LEA 0x1(%R8),%R14D |
0x42fa02 CALL 4046c0 <omp_get_num_threads@plt> |
0x42fa07 MOV %EAX,%R13D |
0x42fa0a CALL 4045b0 <omp_get_thread_num@plt> |
0x42fa0f MOV %EAX,%ECX |
0x42fa11 MOV 0x18(%R12),%EAX |
0x42fa16 ADD 0x24(%R12),%EAX |
0x42fa1b LEA 0x2(%RBX,%RAX,1),%EAX |
0x42fa1f SUB %R14D,%EAX |
0x42fa22 CLTD |
0x42fa23 IDIV %R13D |
0x42fa26 CMP %EDX,%ECX |
0x42fa28 JL 42fbee |
0x42fa2e IMUL %EAX,%ECX |
0x42fa31 ADD %ECX,%EDX |
0x42fa33 ADD %EDX,%EAX |
0x42fa35 CMP %EAX,%EDX |
0x42fa37 JGE 42fbc4 |
0x42fa3d ADD %R14D,%EDX |
0x42fa40 MOV 0x10(%R12),%EDI |
0x42fa45 MOV %EBX,%R9D |
0x42fa48 MOVSXD 0x20(%R12),%R13 |
0x42fa4d IMUL %EDX,%R9D |
0x42fa51 MOV (%R12),%R10 |
0x42fa55 MOV 0x8(%R12),%R15 |
0x42fa5a MOVSXD %EBX,%R12 |
0x42fa5d LEA 0x1(%RDI),%R11D |
0x42fa61 LEA (%R14,%RAX,1),%R8D |
0x42fa65 MOVSXD %EDX,%RSI |
0x42fa68 ADD %R13,%R12 |
0x42fa6b MOVSXD %R11D,%R11 |
0x42fa6e XOR %R14D,%R14D |
0x42fa71 NOPL (%RAX) |
(201) 0x42fa78 TEST %EBX,%EBX |
(201) 0x42fa7a JLE 42fbb1 |
(201) 0x42fa80 MOV (%R10),%RCX |
(201) 0x42fa83 MOV 0x10(%R10),%RAX |
(201) 0x42fa87 MOV 0x8(%R15),%RDI |
(201) 0x42fa8b IMUL %RSI,%RCX |
(201) 0x42fa8f ADD %R11,%RCX |
(201) 0x42fa92 LEA (%RAX,%RCX,8),%RDX |
(201) 0x42fa96 MOVSXD %R9D,%RCX |
(201) 0x42fa99 LEA (%RCX,%R13,1),%RAX |
(201) 0x42fa9d ADD %R12,%RCX |
(201) 0x42faa0 LEA (%RDI,%RAX,8),%RAX |
(201) 0x42faa4 LEA (%RDI,%RCX,8),%RDI |
(201) 0x42faa8 MOV %RDI,%RCX |
(201) 0x42faab SUB %RAX,%RCX |
(201) 0x42faae SUB $0x8,%RCX |
(201) 0x42fab2 SHR $0x3,%RCX |
(201) 0x42fab6 INC %RCX |
(201) 0x42fab9 AND $0x7,%ECX |
(201) 0x42fabc JE 42fb55 |
(201) 0x42fac2 CMP $0x1,%RCX |
(201) 0x42fac6 JE 42fb3f |
(201) 0x42fac8 CMP $0x2,%RCX |
(201) 0x42facc JE 42fb2e |
(201) 0x42face CMP $0x3,%RCX |
(201) 0x42fad2 JE 42fb1d |
(201) 0x42fad4 CMP $0x4,%RCX |
(201) 0x42fad8 JE 42fb0c |
(201) 0x42fada CMP $0x5,%RCX |
(201) 0x42fade JE 42fafb |
(201) 0x42fae0 CMP $0x6,%RCX |
(201) 0x42fae4 JNE 42fbd8 |
(201) 0x42faea VMOVSD (%RDX),%XMM1 |
(201) 0x42faee ADD $0x8,%RAX |
(201) 0x42faf2 SUB $0x8,%RDX |
(201) 0x42faf6 VMOVSD %XMM1,-0x8(%RAX) |
(201) 0x42fafb VMOVSD (%RDX),%XMM2 |
(201) 0x42faff ADD $0x8,%RAX |
(201) 0x42fb03 SUB $0x8,%RDX |
(201) 0x42fb07 VMOVSD %XMM2,-0x8(%RAX) |
(201) 0x42fb0c VMOVSD (%RDX),%XMM3 |
(201) 0x42fb10 ADD $0x8,%RAX |
(201) 0x42fb14 SUB $0x8,%RDX |
(201) 0x42fb18 VMOVSD %XMM3,-0x8(%RAX) |
(201) 0x42fb1d VMOVSD (%RDX),%XMM4 |
(201) 0x42fb21 ADD $0x8,%RAX |
(201) 0x42fb25 SUB $0x8,%RDX |
(201) 0x42fb29 VMOVSD %XMM4,-0x8(%RAX) |
(201) 0x42fb2e VMOVSD (%RDX),%XMM5 |
(201) 0x42fb32 ADD $0x8,%RAX |
(201) 0x42fb36 SUB $0x8,%RDX |
(201) 0x42fb3a VMOVSD %XMM5,-0x8(%RAX) |
(201) 0x42fb3f VMOVSD (%RDX),%XMM6 |
(201) 0x42fb43 ADD $0x8,%RAX |
(201) 0x42fb47 SUB $0x8,%RDX |
(201) 0x42fb4b VMOVSD %XMM6,-0x8(%RAX) |
(201) 0x42fb50 CMP %RDI,%RAX |
(201) 0x42fb53 JE 42fbb1 |
(202) 0x42fb55 VMOVSD (%RDX),%XMM7 |
(202) 0x42fb59 ADD $0x40,%RAX |
(202) 0x42fb5d SUB $0x40,%RDX |
(202) 0x42fb61 VMOVSD %XMM7,-0x40(%RAX) |
(202) 0x42fb66 VMOVSD 0x38(%RDX),%XMM8 |
(202) 0x42fb6b VMOVSD %XMM8,-0x38(%RAX) |
(202) 0x42fb70 VMOVSD 0x30(%RDX),%XMM9 |
(202) 0x42fb75 VMOVSD %XMM9,-0x30(%RAX) |
(202) 0x42fb7a VMOVSD 0x28(%RDX),%XMM10 |
(202) 0x42fb7f VMOVSD %XMM10,-0x28(%RAX) |
(202) 0x42fb84 VMOVSD 0x20(%RDX),%XMM11 |
(202) 0x42fb89 VMOVSD %XMM11,-0x20(%RAX) |
(202) 0x42fb8e VMOVSD 0x18(%RDX),%XMM12 |
(202) 0x42fb93 VMOVSD %XMM12,-0x18(%RAX) |
(202) 0x42fb98 VMOVSD 0x10(%RDX),%XMM13 |
(202) 0x42fb9d VMOVSD %XMM13,-0x10(%RAX) |
(202) 0x42fba2 VMOVSD 0x8(%RDX),%XMM14 |
(202) 0x42fba7 VMOVSD %XMM14,-0x8(%RAX) |
(202) 0x42fbac CMP %RDI,%RAX |
(202) 0x42fbaf JNE 42fb55 |
(201) 0x42fbb1 INC %RSI |
(201) 0x42fbb4 ADD %EBX,%R9D |
(201) 0x42fbb7 LEA (%R14,%RSI,1),%EDX |
(201) 0x42fbbb CMP %EDX,%R8D |
(201) 0x42fbbe JG 42fa78 |
0x42fbc4 ADD $0x8,%RSP |
0x42fbc8 POP %RBX |
0x42fbc9 POP %R12 |
0x42fbcb POP %R13 |
0x42fbcd POP %R14 |
0x42fbcf POP %R15 |
0x42fbd1 POP %RBP |
0x42fbd2 RET |
0x42fbd3 NOPL (%RAX,%RAX,1) |
(201) 0x42fbd8 VMOVSD (%RDX),%XMM0 |
(201) 0x42fbdc ADD $0x8,%RAX |
(201) 0x42fbe0 SUB $0x8,%RDX |
(201) 0x42fbe4 VMOVSD %XMM0,-0x8(%RAX) |
(201) 0x42fbe9 JMP 42faea |
0x42fbee INC %EAX |
0x42fbf0 XOR %EDX,%EDX |
0x42fbf2 JMP 42fa2e |
0x42fbf7 NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | pack_kernel.cpp:120-124 |
Module | exec |
nb instructions | 58 |
nb uops | 63 |
loop length | 190 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 10.50 cycles |
front end | 10.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 6.00 | 5.00 | 5.00 | 4.00 | 5.07 | 5.00 | 4.00 | 4.00 | 4.00 | 4.93 | 5.00 |
cycles | 5.00 | 9.60 | 5.00 | 5.00 | 4.00 | 5.07 | 5.00 | 4.00 | 4.00 | 4.00 | 4.93 | 5.00 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 10.20-10.25 |
Stall cycles | 0.00 |
Front-end | 10.50 |
Dispatch | 9.60 |
DIV/SQRT | 6.00 |
Overall L1 | 10.50 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%R8),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x24(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 42fbee <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x20e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42fbc4 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x1e4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EBX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD 0x20(%R12),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EDX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA 0x1(%RDI),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%R14,%RAX,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOVSXD %EDX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
ADD %R13,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R11D,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42fa2e <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x4e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | pack_kernel.cpp:120-124 |
Module | exec |
nb instructions | 58 |
nb uops | 63 |
loop length | 190 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 10.50 cycles |
front end | 10.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 6.00 | 5.00 | 5.00 | 4.00 | 5.07 | 5.00 | 4.00 | 4.00 | 4.00 | 4.93 | 5.00 |
cycles | 5.00 | 9.60 | 5.00 | 5.00 | 4.00 | 5.07 | 5.00 | 4.00 | 4.00 | 4.00 | 4.93 | 5.00 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 10.20-10.25 |
Stall cycles | 0.00 |
Front-end | 10.50 |
Dispatch | 9.60 |
DIV/SQRT | 6.00 |
Overall L1 | 10.50 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x14(%RDI),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%R8),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD 0x24(%R12),%EAX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x2(%RBX,%RAX,1),%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 42fbee <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x20e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %ECX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %EAX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42fbc4 <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x1e4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%R12),%EDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EBX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD 0x20(%R12),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EDX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EBX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA 0x1(%RDI),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%R14,%RAX,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOVSXD %EDX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
ADD %R13,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %R11D,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42fa2e <_Z25clover_pack_message_rightR16global_variablesiiiiRN6clover8Buffer2DIdEERNS1_8Buffer1DIdEEiiiiiii._omp_fn.0.lto_priv.0+0x4e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼clover_pack_message_right(global_variables&, int, int, int, int, clover::Buffer2D | 0.03 | 0.01 |
▼Loop 201 - pack_kernel.cpp:122-124 - exec– | 0.03 | 0.02 |
○Loop 202 - pack_kernel.cpp:122-124 - exec | 0 | 0 |