Loop Id: 572 | Module: libkripke.so | Source: forall.hpp:59-59 [...] | Coverage: 0.01% |
---|
Loop Id: 572 | Module: libkripke.so | Source: forall.hpp:59-59 [...] | Coverage: 0.01% |
---|
0x4bbf0 MOV -0x100(%RBP),%RCX |
0x4bbf7 MOV -0xc8(%RBP),%R13 |
0x4bbfe LEA 0x1(%RCX),%RSI |
0x4bc02 CMP -0xf8(%RBP),%RCX |
0x4bc09 JGE 4bf20 |
0x4bc0f MOV %RSI,%RAX |
0x4bc12 CQTO |
0x4bc14 MOV %RSI,-0x100(%RBP) |
0x4bc1b XOR %R14D,%R14D |
0x4bc1e IDIV %R13 |
0x4bc21 MOV -0x50(%RBP),%RCX |
0x4bc25 MOV -0x38(%RBP),%R8 |
0x4bc29 ADD -0xe8(%RBP),%RDX |
0x4bc30 MOV %RDX,%RSI |
0x4bc33 IMUL -0xd8(%RBP),%RSI |
0x4bc3b MOV %RDX,%R10 |
0x4bc3e IMUL -0x70(%RBP),%R10 |
0x4bc43 IMUL -0xa0(%RBP),%RDX |
0x4bc4b ADD -0xa8(%RBP),%RDX |
0x4bc52 ADD -0xf0(%RBP),%RAX |
0x4bc59 ADD %RSI,%RCX |
0x4bc5c ADD -0x48(%RBP),%RSI |
0x4bc60 MOV %RAX,%R9 |
0x4bc63 IMUL -0xe0(%RBP),%RAX |
0x4bc6b IMUL -0xd0(%RBP),%R9 |
0x4bc73 ADD %RAX,%RSI |
0x4bc76 ADD %RAX,%RCX |
0x4bc79 MOV %R9,-0x138(%RBP) |
0x4bc80 LEA (%R8,%RSI,8),%RAX |
0x4bc84 MOV -0x30(%RBP),%RSI |
0x4bc88 LEA (%R8,%RCX,8),%R11 |
0x4bc8c MOV -0xc0(%RBP),%R8 |
0x4bc93 MOV %RCX,-0x80(%RBP) |
0x4bc97 MOV %RAX,-0x120(%RBP) |
0x4bc9e MOV %R11,-0x128(%RBP) |
0x4bca5 LEA (%RSI,%R9,1),%RAX |
0x4bca9 LEA (%R8,%R10,1),%R11 |
0x4bcad ADD -0xb8(%RBP),%R10 |
0x4bcb4 LEA 0x8(,%RAX,8),%RSI |
0x4bcbc SAL $0x3,%RAX |
0x4bcc0 MOV %RAX,-0x118(%RBP) |
0x4bcc7 LEA (,%R11,8),%RAX |
0x4bccf MOV %RSI,-0x110(%RBP) |
0x4bcd6 MOV -0xb0(%RBP),%RSI |
0x4bcdd MOV %RAX,-0x108(%RBP) |
0x4bce4 SAL $0x3,%R10 |
0x4bce8 MOV %R10,-0x130(%RBP) |
0x4bcef LEA (%RSI,%RCX,8),%RAX |
0x4bcf3 JMP 4bd1b |
(573) 0x4bd00 ADD -0x90(%RBP),%RDX |
(573) 0x4bd07 ADD -0x88(%RBP),%R11 |
(573) 0x4bd0e INC %R14 |
(573) 0x4bd11 CMP -0x58(%RBP),%R14 |
(573) 0x4bd15 JE 4bbf0 |
(573) 0x4bd1b MOV -0x30(%RBP),%RSI |
(573) 0x4bd1f LEA (%R14,%RSI,1),%R13 |
(573) 0x4bd23 ADD -0x138(%RBP),%R13 |
(573) 0x4bd2a CMP $0x4,%RDI |
(573) 0x4bd2e JAE 4bd90 |
(573) 0x4bd30 XOR %R10D,%R10D |
(573) 0x4bd33 MOV -0x38(%RBP),%R9 |
(573) 0x4bd37 LEA (%RCX,%R10,1),%RSI |
(573) 0x4bd3b MOV %RDI,%R8 |
(573) 0x4bd3e SUB %R10,%R8 |
(573) 0x4bd41 ADD %R11,%R10 |
(573) 0x4bd44 LEA (%R9,%RSI,8),%R9 |
(573) 0x4bd48 MOV -0x40(%RBP),%RSI |
(573) 0x4bd4c LEA (%RSI,%R10,8),%R10 |
(573) 0x4bd50 XOR %ESI,%ESI |
(573) 0x4bd52 NOPW %CS:(%RAX,%RAX,1) |
(571) 0x4bd60 VMOVSD (%R12,%R13,8),%XMM0 |
(571) 0x4bd66 VMOVSD (%R10,%RSI,8),%XMM1 |
(571) 0x4bd6c VFMADD213SD (%R9,%RSI,8),%XMM0,%XMM1 |
(571) 0x4bd72 VMOVSD %XMM1,(%R9,%RSI,8) |
(571) 0x4bd78 INC %RSI |
(571) 0x4bd7b CMP %RSI,%R8 |
(571) 0x4bd7e JNE 4bd60 |
(573) 0x4bd80 JMP 4bd00 |
(573) 0x4bd90 MOV -0x90(%RBP),%R8 |
(573) 0x4bd97 MOV -0x130(%RBP),%RSI |
(573) 0x4bd9e MOV -0x40(%RBP),%RCX |
(573) 0x4bda2 MOV -0x110(%RBP),%R9 |
(573) 0x4bda9 MOV -0x118(%RBP),%R10 |
(573) 0x4bdb0 MOV -0x120(%RBP),%RBX |
(573) 0x4bdb7 IMUL %R14,%R8 |
(573) 0x4bdbb LEA (%R9,%R14,8),%R9 |
(573) 0x4bdbf LEA (%R10,%R14,8),%R10 |
(573) 0x4bdc3 ADD %R12,%R9 |
(573) 0x4bdc6 ADD %R12,%R10 |
(573) 0x4bdc9 ADD %R8,%RSI |
(573) 0x4bdcc ADD -0x108(%RBP),%R8 |
(573) 0x4bdd3 ADD %RCX,%RSI |
(573) 0x4bdd6 ADD %RCX,%R8 |
(573) 0x4bdd9 MOV -0x128(%RBP),%RCX |
(573) 0x4bde0 CMP %R9,%RCX |
(573) 0x4bde3 SETB %R9B |
(573) 0x4bde7 CMP %RBX,%R10 |
(573) 0x4bdea SETB %R10B |
(573) 0x4bdee CMP %RSI,%RCX |
(573) 0x4bdf1 SETB %SIL |
(573) 0x4bdf5 CMP %RBX,%R8 |
(573) 0x4bdf8 SETB %R8B |
(573) 0x4bdfc TEST %R10B,%R9B |
(573) 0x4bdff JNE 4be21 |
(573) 0x4be01 MOV -0x80(%RBP),%RCX |
(573) 0x4be05 AND %R8B,%SIL |
(573) 0x4be08 MOV $0,%R10D |
(573) 0x4be0e JNE 4bd33 |
(573) 0x4be14 CMP $0x20,%RDI |
(573) 0x4be18 JAE 4be2a |
(573) 0x4be1a XOR %ESI,%ESI |
(573) 0x4be1c JMP 4bebf |
(573) 0x4be21 MOV -0x80(%RBP),%RCX |
(573) 0x4be25 JMP 4bd30 |
(573) 0x4be2a VBROADCASTSD (%R12,%R13,8),%ZMM0 |
(573) 0x4be31 XOR %R8D,%R8D |
(573) 0x4be34 NOPW %CS:(%RAX,%RAX,1) |
(574) 0x4be40 VMOVUPD -0xc0(%RDX,%R8,8),%ZMM1 |
(574) 0x4be48 VMOVUPD -0x80(%RDX,%R8,8),%ZMM2 |
(574) 0x4be50 VMOVUPD -0x40(%RDX,%R8,8),%ZMM3 |
(574) 0x4be58 VMOVUPD (%RDX,%R8,8),%ZMM4 |
(574) 0x4be5f VFMADD213PD -0xc0(%RAX,%R8,8),%ZMM0,%ZMM1 |
(574) 0x4be67 VFMADD213PD -0x80(%RAX,%R8,8),%ZMM0,%ZMM2 |
(574) 0x4be6f VFMADD213PD -0x40(%RAX,%R8,8),%ZMM0,%ZMM3 |
(574) 0x4be77 VFMADD213PD (%RAX,%R8,8),%ZMM0,%ZMM4 |
(574) 0x4be7e VMOVUPD %ZMM1,-0xc0(%RAX,%R8,8) |
(574) 0x4be86 VMOVUPD %ZMM2,-0x80(%RAX,%R8,8) |
(574) 0x4be8e VMOVUPD %ZMM3,-0x40(%RAX,%R8,8) |
(574) 0x4be96 VMOVUPD %ZMM4,(%RAX,%R8,8) |
(574) 0x4be9d ADD $0x20,%R8 |
(574) 0x4bea1 CMP %R8,%R15 |
(574) 0x4bea4 JNE 4be40 |
(573) 0x4bea6 CMP %R15,%RDI |
(573) 0x4bea9 JE 4bd00 |
(573) 0x4beaf MOV %R15,%RSI |
(573) 0x4beb2 MOV %R15,%R10 |
(573) 0x4beb5 TEST $0x1c,%DIL |
(573) 0x4beb9 JE 4bd33 |
(573) 0x4bebf VBROADCASTSD (%R12,%R13,8),%YMM0 |
(573) 0x4bec5 MOV -0x78(%RBP),%R8 |
(573) 0x4bec9 MOV -0x38(%RBP),%R10 |
(573) 0x4becd MOV -0x40(%RBP),%RBX |
(573) 0x4bed1 LEA (%RCX,%RSI,1),%R9 |
(573) 0x4bed5 SUB %RSI,%R8 |
(573) 0x4bed8 ADD %R11,%RSI |
(573) 0x4bedb LEA (%R10,%R9,8),%R9 |
(573) 0x4bedf LEA (%RBX,%RSI,8),%R10 |
(573) 0x4bee3 XOR %ESI,%ESI |
(573) 0x4bee5 NOPW %CS:(%RAX,%RAX,1) |
(575) 0x4bef0 VMOVUPD (%R10,%RSI,8),%YMM1 |
(575) 0x4bef6 VFMADD213PD (%R9,%RSI,8),%YMM0,%YMM1 |
(575) 0x4befc VMOVUPD %YMM1,(%R9,%RSI,8) |
(575) 0x4bf02 ADD $0x4,%RSI |
(575) 0x4bf06 CMP %RSI,%R8 |
(575) 0x4bf09 JNE 4bef0 |
(573) 0x4bf0b MOV -0x78(%RBP),%RSI |
(573) 0x4bf0f MOV %RSI,%R10 |
(573) 0x4bf12 CMP %RSI,%RDI |
(573) 0x4bf15 JE 4bd00 |
(573) 0x4bf1b JMP 4bd33 |
/beegfs/hackathon/users/eoseret/qaas_runs/170-850-6313/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/openmp/kernel/Collapse.hpp: 83 - 83 |
-------------------------------------------------------------------------------- |
83: for (i0 = 0; i0 < l0; ++i0) { |
/beegfs/hackathon/users/eoseret/qaas_runs/170-850-6313/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/loop/forall.hpp: 59 - 59 |
-------------------------------------------------------------------------------- |
59: for (decltype(distance_it) i = 0; i < distance_it; ++i) { |
/beegfs/hackathon/users/eoseret/qaas_runs/170-850-6313/intel/Kripke/build/Kripke/src/Kripke/Kernel/LPlusTimes.cpp: 57 - 57 |
-------------------------------------------------------------------------------- |
57: rhs(d,g,z) += ell_plus(d, nm) * phi_out(nm, g, z); |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 4.80 - 4.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.10 - 1.33 |
Bottlenecks | P8, P9, |
Function | .omp_outlined. |
Source | Collapse.hpp:83-83,forall.hpp:59-59 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 9.00 - 12.00 |
CQA cycles if no scalar integer | 9.00 - 12.00 |
CQA cycles if FP arith vectorized | 9.00 - 12.00 |
CQA cycles if fully vectorized | 1.88 - 3.00 |
Front-end cycles | 8.17 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 6.50 |
P1 cycles | 6.50 |
P2 cycles | 6.50 |
P3 cycles | 2.00 |
P4 cycles | 9.00 |
P5 cycles | 9.00 |
P6 cycles | 9.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.00 |
P10 cycles | 0.00 |
P11 cycles | 0.00 |
P12 cycles | 0.00 |
P13 cycles | 7.00 - 12.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 49.00 |
Nb uops | 49.00 |
Nb loads | 18.00 |
Nb stores | 9.00 |
Nb stack references | 26.00 |
FLOP/cycle | 0.00 - 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 18.00 - 24.00 |
Bytes prefetched | 0.00 |
Bytes loaded | 144.00 |
Bytes stored | 72.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.13 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 10.42 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 4.80 - 4.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.10 - 1.33 |
Bottlenecks | P8, P9, |
Function | .omp_outlined. |
Source | Collapse.hpp:83-83,forall.hpp:59-59 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 9.00 - 12.00 |
CQA cycles if no scalar integer | 9.00 - 12.00 |
CQA cycles if FP arith vectorized | 9.00 - 12.00 |
CQA cycles if fully vectorized | 1.88 - 3.00 |
Front-end cycles | 8.17 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 6.50 |
P1 cycles | 6.50 |
P2 cycles | 6.50 |
P3 cycles | 2.00 |
P4 cycles | 9.00 |
P5 cycles | 9.00 |
P6 cycles | 9.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.00 |
P10 cycles | 0.00 |
P11 cycles | 0.00 |
P12 cycles | 0.00 |
P13 cycles | 7.00 - 12.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 49.00 |
Nb uops | 49.00 |
Nb loads | 18.00 |
Nb stores | 9.00 |
Nb stack references | 26.00 |
FLOP/cycle | 0.00 - 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 18.00 - 24.00 |
Bytes prefetched | 0.00 |
Bytes loaded | 144.00 |
Bytes stored | 72.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.13 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 10.42 |
Path / |
Function | .omp_outlined. |
Source file and lines | forall.hpp:59-59 |
Module | libkripke.so |
nb instructions | 49 |
nb uops | 49 |
loop length | 261 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 26 |
micro-operation queue | 8.17 cycles |
front end | 8.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 6.50 | 6.50 | 2.00 | 9.00 | 9.00 | 9.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.50 | 6.50 | 6.50 | 6.50 | 2.00 | 9.00 | 9.00 | 9.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 7.00-12.00 |
Front-end | 8.17 |
Dispatch | 9.00 |
DIV/SQRT | 7.00-12.00 |
Overall L1 | 9.00-12.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x100(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0xc8(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x1(%RCX),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP -0xf8(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JGE 4bf20 <.omp_outlined.+0x540> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CQTO | |||||||||||||||||
MOV %RSI,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
IDIV %R13 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD -0xe8(%RBP),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL -0xd8(%RBP),%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL -0x70(%RBP),%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL -0xa0(%RBP),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD -0xa8(%RBP),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
ADD -0xf0(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
ADD %RSI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD -0x48(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL -0xe0(%RBP),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL -0xd0(%RBP),%R9 | 1 | 0 | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RAX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x138(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%R8,%RSI,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%R8,%RCX,8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0xc0(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RAX,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R11,-0x128(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%RSI,%R9,1),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R8,%R10,1),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD -0xb8(%RBP),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LEA 0x8(,%RAX,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x118(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (,%R11,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0xb0(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SAL $0x3,%R10 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R10,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%RSI,%RCX,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JMP 4bd1b <.omp_outlined.+0x33b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Function | .omp_outlined. |
Source file and lines | forall.hpp:59-59 |
Module | libkripke.so |
nb instructions | 49 |
nb uops | 49 |
loop length | 261 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 26 |
micro-operation queue | 8.17 cycles |
front end | 8.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 6.50 | 6.50 | 2.00 | 9.00 | 9.00 | 9.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 6.50 | 6.50 | 6.50 | 6.50 | 2.00 | 9.00 | 9.00 | 9.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 7.00-12.00 |
Front-end | 8.17 |
Dispatch | 9.00 |
DIV/SQRT | 7.00-12.00 |
Overall L1 | 9.00-12.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 10% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x100(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0xc8(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x1(%RCX),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP -0xf8(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JGE 4bf20 <.omp_outlined.+0x540> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CQTO | |||||||||||||||||
MOV %RSI,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
IDIV %R13 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD -0xe8(%RBP),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL -0xd8(%RBP),%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL -0x70(%RBP),%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL -0xa0(%RBP),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD -0xa8(%RBP),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
ADD -0xf0(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
ADD %RSI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD -0x48(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL -0xe0(%RBP),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL -0xd0(%RBP),%R9 | 1 | 0 | 1 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RAX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %RAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x138(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%R8,%RSI,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA (%R8,%RCX,8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0xc0(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RCX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RAX,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R11,-0x128(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%RSI,%R9,1),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%R8,%R10,1),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD -0xb8(%RBP),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LEA 0x8(,%RAX,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x3,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x118(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (,%R11,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0xb0(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SAL $0x3,%R10 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R10,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%RSI,%RCX,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JMP 4bd1b <.omp_outlined.+0x33b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |