Function: _ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_exe ... | Module: exec | Source: Collapse.hpp:81-81 [...] | Coverage: 0.47% |
---|
Function: _ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_exe ... | Module: exec | Source: Collapse.hpp:81-81 [...] | Coverage: 0.47% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-3942/intel/Kripke/build/Kripke/tpl/raja/tpl/camp/include/camp/tuple.hpp: 253 - 253 |
-------------------------------------------------------------------------------- |
253: CAMP_HOST_DEVICE constexpr tuple(tuple const& o) : base(o.base) {} |
/home/eoseret/qaas_runs_CPU_9468/171-112-3942/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/Layout.hpp: 55 - 55 |
-------------------------------------------------------------------------------- |
55: return a * b; |
/home/eoseret/qaas_runs_CPU_9468/171-112-3942/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/index/RangeSegment.hpp: 120 - 120 |
-------------------------------------------------------------------------------- |
120: RAJA_HOST_DEVICE RAJA_INLINE ~TypedRangeSegment() {} |
/home/eoseret/qaas_runs_CPU_9468/171-112-3942/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/loop/forall.hpp: 59 - 59 |
-------------------------------------------------------------------------------- |
59: for (decltype(distance_it) i = 0; i < distance_it; ++i) { |
/home/eoseret/qaas_runs_CPU_9468/171-112-3942/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/internal/Iterators.hpp: 55 - 177 |
-------------------------------------------------------------------------------- |
55: : val(rhs.val) |
[...] |
142: return val - rhs.val; |
[...] |
177: return value_type(val + rhs); |
/home/eoseret/qaas_runs_CPU_9468/171-112-3942/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/openmp/kernel/Collapse.hpp: 81 - 81 |
-------------------------------------------------------------------------------- |
81: #pragma omp parallel for private(i0, i1) firstprivate(privatizer) \ |
/home/eoseret/qaas_runs_CPU_9468/171-112-3942/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/pattern/detail/reduce.hpp: 74 - 262 |
-------------------------------------------------------------------------------- |
74: val = operator_type::operator()(val, v); |
[...] |
261: : parent{other.parent ? other.parent : &other}, |
262: identity{other.identity}, |
/home/eoseret/qaas_runs_CPU_9468/171-112-3942/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/policy/openmp/reduce.hpp: 46 - 59 |
-------------------------------------------------------------------------------- |
46: class ReduceOMP |
[...] |
59: #pragma omp critical(ompReduceCritical) |
/home/eoseret/qaas_runs_CPU_9468/171-112-3942/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/View.hpp: 79 - 79 |
-------------------------------------------------------------------------------- |
79: : layout(V.layout), data(V.data) |
/home/eoseret/qaas_runs_CPU_9468/171-112-3942/intel/Kripke/build/Kripke/tpl/raja/include/RAJA/util/Operators.hpp: 307 - 307 |
-------------------------------------------------------------------------------- |
307: return Ret{lhs} + rhs; |
/home/eoseret/qaas_runs_CPU_9468/171-112-3942/intel/Kripke/build/Kripke/src/Kripke/Kernel/Population.cpp: 58 - 58 |
-------------------------------------------------------------------------------- |
58: part_red += w(d) * psi(d,g,z) * volume(z); |
0x4ab420 PUSH %RBP |
0x4ab421 MOV %RDI,%RDX |
0x4ab424 MOV %RSP,%RBP |
0x4ab427 PUSH %R15 |
0x4ab429 PUSH %R14 |
0x4ab42b PUSH %R13 |
0x4ab42d PUSH %R12 |
0x4ab42f PUSH %RBX |
0x4ab430 SUB $0x58,%RSP |
0x4ab434 MOV 0x10(%RDI),%RAX |
0x4ab438 MOV (%RDX),%R13 |
0x4ab43b MOV 0x8(%RDX),%R12 |
0x4ab43f MOV 0x38(%RAX),%RDI |
0x4ab443 LEA 0x38(%RAX),%RSI |
0x4ab447 MOV (%RAX),%RBX |
0x4ab44a MOV 0x10(%RAX),%RCX |
0x4ab44e MOV 0x98(%RAX),%R8 |
0x4ab455 TEST %RDI,%RDI |
0x4ab458 MOV 0x20(%RAX),%R14 |
0x4ab45c VMOVSD 0x40(%RAX),%XMM0 |
0x4ab461 MOV %RBX,-0x38(%RBP) |
0x4ab465 CMOVE %RSI,%RDI |
0x4ab469 MOV 0x28(%RAX),%RBX |
0x4ab46d MOV 0x70(%RAX),%R9 |
0x4ab471 MOV %RCX,-0x48(%RBP) |
0x4ab475 MOV 0x90(%RAX),%R11 |
0x4ab47c MOV 0xd8(%RAX),%R10 |
0x4ab483 MOV %R8,-0x40(%RBP) |
0x4ab487 MOV %RDI,-0x50(%RBP) |
0x4ab48b MOV 0x100(%RAX),%R15 |
0x4ab492 TEST %R13,%R13 |
0x4ab495 JLE 4ab6f8 |
0x4ab49b MOV %R11,-0x78(%RBP) |
0x4ab49f MOV %R10,-0x70(%RBP) |
0x4ab4a3 MOV %R9,-0x68(%RBP) |
0x4ab4a7 MOV %R13,-0x60(%RBP) |
0x4ab4ab TEST %R12,%R12 |
0x4ab4ae JLE 4ab6f8 |
0x4ab4b4 VMOVSD %XMM0,-0x58(%RBP) |
0x4ab4b9 CALL 4034d0 <omp_get_num_threads@plt> |
0x4ab4be MOVSXD %EAX,%R13 |
0x4ab4c1 CALL 403420 <omp_get_thread_num@plt> |
0x4ab4c6 XOR %EDX,%EDX |
0x4ab4c8 VMOVSD -0x58(%RBP),%XMM0 |
0x4ab4cd MOV -0x68(%RBP),%R9 |
0x4ab4d1 MOVSXD %EAX,%RDI |
0x4ab4d4 MOV -0x60(%RBP),%RAX |
0x4ab4d8 MOV -0x70(%RBP),%R10 |
0x4ab4dc MOV -0x78(%RBP),%R11 |
0x4ab4e0 IMUL %R12,%RAX |
0x4ab4e4 DIV %R13 |
0x4ab4e7 CMP %RDX,%RDI |
0x4ab4ea MOV %RAX,%RCX |
0x4ab4ed JB 4ab73f |
0x4ab4f3 IMUL %RCX,%RDI |
0x4ab4f7 LEA (%RDI,%RDX,1),%RAX |
0x4ab4fb LEA (%RCX,%RAX,1),%RSI |
0x4ab4ff CMP %RSI,%RAX |
0x4ab502 JAE 4ab6f8 |
0x4ab508 XOR %EDX,%EDX |
0x4ab50a LEA (%R15,%R14,8),%RDI |
0x4ab50e SUB %R14,%RBX |
0x4ab511 XOR %R15D,%R15D |
0x4ab514 DIV %R12 |
0x4ab517 LEA -0x1(%RCX),%R8 |
0x4ab51b NOPL (%RAX,%RAX,1) |
(1977) 0x4ab520 TEST %RBX,%RBX |
(1977) 0x4ab523 JLE 4ab6dc |
(1977) 0x4ab529 MOV -0x38(%RBP),%RSI |
(1977) 0x4ab52d MOV -0x48(%RBP),%RCX |
(1977) 0x4ab531 MOV -0x40(%RBP),%R13 |
(1977) 0x4ab535 ADD %RAX,%RSI |
(1977) 0x4ab538 ADD %RDX,%RCX |
(1977) 0x4ab53b IMUL %R13,%RCX |
(1977) 0x4ab53f VMOVSD (%R9,%RSI,8),%XMM2 |
(1977) 0x4ab545 MOV %RBX,%R13 |
(1977) 0x4ab548 IMUL %R11,%RSI |
(1977) 0x4ab54c ADD %RSI,%RCX |
(1977) 0x4ab54f ADD %R14,%RCX |
(1977) 0x4ab552 LEA (%R10,%RCX,8),%RSI |
(1977) 0x4ab556 XOR %ECX,%ECX |
(1977) 0x4ab558 AND $0x7,%R13D |
(1977) 0x4ab55c JE 4ab62d |
(1977) 0x4ab562 CMP $0x1,%R13 |
(1977) 0x4ab566 JE 4ab60e |
(1977) 0x4ab56c CMP $0x2,%R13 |
(1977) 0x4ab570 JE 4ab5f9 |
(1977) 0x4ab576 CMP $0x3,%R13 |
(1977) 0x4ab57a JE 4ab5e4 |
(1977) 0x4ab57c CMP $0x4,%R13 |
(1977) 0x4ab580 JE 4ab5ce |
(1977) 0x4ab582 CMP $0x5,%R13 |
(1977) 0x4ab586 JE 4ab5b8 |
(1977) 0x4ab588 CMP $0x6,%R13 |
(1977) 0x4ab58c JE 4ab5a3 |
(1977) 0x4ab58e VMOVSD (%RSI),%XMM1 |
(1977) 0x4ab592 MOV $0x1,%ECX |
(1977) 0x4ab597 VMULSD (%RDI),%XMM1,%XMM3 |
(1977) 0x4ab59b VMULSD %XMM2,%XMM3,%XMM4 |
(1977) 0x4ab59f VADDSD %XMM4,%XMM0,%XMM0 |
(1977) 0x4ab5a3 VMOVSD (%RSI,%RCX,8),%XMM5 |
(1977) 0x4ab5a8 VMULSD (%RDI,%RCX,8),%XMM5,%XMM6 |
(1977) 0x4ab5ad INC %RCX |
(1977) 0x4ab5b0 VMULSD %XMM2,%XMM6,%XMM7 |
(1977) 0x4ab5b4 VADDSD %XMM7,%XMM0,%XMM0 |
(1977) 0x4ab5b8 VMOVSD (%RSI,%RCX,8),%XMM8 |
(1977) 0x4ab5bd VMULSD (%RDI,%RCX,8),%XMM8,%XMM9 |
(1977) 0x4ab5c2 INC %RCX |
(1977) 0x4ab5c5 VMULSD %XMM2,%XMM9,%XMM10 |
(1977) 0x4ab5c9 VADDSD %XMM10,%XMM0,%XMM0 |
(1977) 0x4ab5ce VMOVSD (%RSI,%RCX,8),%XMM11 |
(1977) 0x4ab5d3 VMULSD (%RDI,%RCX,8),%XMM11,%XMM12 |
(1977) 0x4ab5d8 INC %RCX |
(1977) 0x4ab5db VMULSD %XMM2,%XMM12,%XMM13 |
(1977) 0x4ab5df VADDSD %XMM13,%XMM0,%XMM0 |
(1977) 0x4ab5e4 VMOVSD (%RSI,%RCX,8),%XMM14 |
(1977) 0x4ab5e9 VMULSD (%RDI,%RCX,8),%XMM14,%XMM15 |
(1977) 0x4ab5ee INC %RCX |
(1977) 0x4ab5f1 VMULSD %XMM2,%XMM15,%XMM1 |
(1977) 0x4ab5f5 VADDSD %XMM1,%XMM0,%XMM0 |
(1977) 0x4ab5f9 VMOVSD (%RSI,%RCX,8),%XMM3 |
(1977) 0x4ab5fe VMULSD (%RDI,%RCX,8),%XMM3,%XMM4 |
(1977) 0x4ab603 INC %RCX |
(1977) 0x4ab606 VMULSD %XMM2,%XMM4,%XMM5 |
(1977) 0x4ab60a VADDSD %XMM5,%XMM0,%XMM0 |
(1977) 0x4ab60e VMOVSD (%RSI,%RCX,8),%XMM6 |
(1977) 0x4ab613 VMULSD (%RDI,%RCX,8),%XMM6,%XMM7 |
(1977) 0x4ab618 INC %RCX |
(1977) 0x4ab61b VMULSD %XMM2,%XMM7,%XMM8 |
(1977) 0x4ab61f VADDSD %XMM8,%XMM0,%XMM0 |
(1977) 0x4ab624 CMP %RBX,%RCX |
(1977) 0x4ab627 JE 4ab6dc |
(1978) 0x4ab62d VMOVSD (%RSI,%RCX,8),%XMM9 |
(1978) 0x4ab632 VMOVSD 0x8(%RSI,%RCX,8),%XMM13 |
(1978) 0x4ab638 VMOVSD 0x10(%RSI,%RCX,8),%XMM1 |
(1978) 0x4ab63e VMOVSD 0x18(%RSI,%RCX,8),%XMM7 |
(1978) 0x4ab644 VMULSD (%RDI,%RCX,8),%XMM9,%XMM10 |
(1978) 0x4ab649 VMULSD 0x8(%RDI,%RCX,8),%XMM13,%XMM14 |
(1978) 0x4ab64f VMULSD 0x10(%RDI,%RCX,8),%XMM1,%XMM4 |
(1978) 0x4ab655 VMOVSD 0x30(%RSI,%RCX,8),%XMM1 |
(1978) 0x4ab65b VMULSD 0x18(%RDI,%RCX,8),%XMM7,%XMM8 |
(1978) 0x4ab661 VMULSD %XMM2,%XMM10,%XMM11 |
(1978) 0x4ab665 VMOVSD 0x20(%RSI,%RCX,8),%XMM10 |
(1978) 0x4ab66b VMULSD %XMM2,%XMM14,%XMM15 |
(1978) 0x4ab66f VMOVSD 0x28(%RSI,%RCX,8),%XMM14 |
(1978) 0x4ab675 VMULSD %XMM2,%XMM4,%XMM5 |
(1978) 0x4ab679 VADDSD %XMM11,%XMM0,%XMM12 |
(1978) 0x4ab67e VMULSD 0x20(%RDI,%RCX,8),%XMM10,%XMM11 |
(1978) 0x4ab684 VMULSD %XMM2,%XMM8,%XMM0 |
(1978) 0x4ab688 VMOVSD 0x38(%RSI,%RCX,8),%XMM8 |
(1978) 0x4ab68e VADDSD %XMM15,%XMM12,%XMM3 |
(1978) 0x4ab693 VMULSD 0x28(%RDI,%RCX,8),%XMM14,%XMM15 |
(1978) 0x4ab699 VMULSD %XMM2,%XMM11,%XMM12 |
(1978) 0x4ab69d VADDSD %XMM5,%XMM3,%XMM6 |
(1978) 0x4ab6a1 VMULSD 0x30(%RDI,%RCX,8),%XMM1,%XMM5 |
(1978) 0x4ab6a7 VMULSD %XMM2,%XMM15,%XMM3 |
(1978) 0x4ab6ab VADDSD %XMM0,%XMM6,%XMM9 |
(1978) 0x4ab6af VMULSD 0x38(%RDI,%RCX,8),%XMM8,%XMM0 |
(1978) 0x4ab6b5 ADD $0x8,%RCX |
(1978) 0x4ab6b9 VMULSD %XMM2,%XMM5,%XMM6 |
(1978) 0x4ab6bd VADDSD %XMM12,%XMM9,%XMM13 |
(1978) 0x4ab6c2 VMULSD %XMM2,%XMM0,%XMM9 |
(1978) 0x4ab6c6 VADDSD %XMM3,%XMM13,%XMM4 |
(1978) 0x4ab6ca VADDSD %XMM6,%XMM4,%XMM7 |
(1978) 0x4ab6ce VADDSD %XMM9,%XMM7,%XMM0 |
(1978) 0x4ab6d3 CMP %RBX,%RCX |
(1978) 0x4ab6d6 JNE 4ab62d |
(1977) 0x4ab6dc CMP %R15,%R8 |
(1977) 0x4ab6df JE 4ab6f8 |
(1977) 0x4ab6e1 INC %RDX |
(1977) 0x4ab6e4 CMP %RDX,%R12 |
(1977) 0x4ab6e7 JLE 4ab738 |
(1977) 0x4ab6e9 INC %R15 |
(1977) 0x4ab6ec JMP 4ab520 |
0x4ab6f1 NOPL (%RAX) |
0x4ab6f8 MOV $0x502390,%R13 |
0x4ab6ff VMOVSD %XMM0,-0x38(%RBP) |
0x4ab704 MOV %R13,%RDI |
0x4ab707 CALL 403210 <GOMP_critical_name_start@plt> |
0x4ab70c MOV -0x50(%RBP),%RSI |
0x4ab710 VMOVSD -0x38(%RBP),%XMM2 |
0x4ab715 MOV %R13,%RDI |
0x4ab718 VADDSD 0x10(%RSI),%XMM2,%XMM10 |
0x4ab71d VMOVSD %XMM10,0x10(%RSI) |
0x4ab722 ADD $0x58,%RSP |
0x4ab726 POP %RBX |
0x4ab727 POP %R12 |
0x4ab729 POP %R13 |
0x4ab72b POP %R14 |
0x4ab72d POP %R15 |
0x4ab72f POP %RBP |
0x4ab730 JMP 4030b0 |
0x4ab735 NOPL (%RAX) |
(1977) 0x4ab738 INC %RAX |
(1977) 0x4ab73b XOR %EDX,%EDX |
(1977) 0x4ab73d JMP 4ab6e9 |
0x4ab73f INC %RCX |
0x4ab742 XOR %EDX,%EDX |
0x4ab744 JMP 4ab4f3 |
0x4ab749 NOPL (%RAX) |
Path / |
Source file and lines | Collapse.hpp:81-81 |
Module | exec |
nb instructions | 89 |
nb uops | 100 |
loop length | 344 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 9 |
micro-operation queue | 16.67 cycles |
front end | 16.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.70 | 8.00 | 9.33 | 9.33 | 10.00 | 3.87 | 3.70 | 10.00 | 10.00 | 10.00 | 3.73 | 9.33 |
cycles | 3.70 | 10.07 | 9.33 | 9.33 | 10.00 | 3.87 | 3.70 | 10.00 | 10.00 | 10.00 | 3.73 | 9.33 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.17-20.22 |
Stall cycles | 3.83-3.88 |
ROB full (events) | 4.26-4.32 |
Front-end | 16.67 |
Dispatch | 10.07 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RAX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV 0x20(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x40(%RAX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMOVE %RSI,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RAX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x100(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4ab6f8 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x2d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4ab6f8 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x2d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD %XMM0,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4034d0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %EAX,%R13 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
CALL 403420 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x58(%RBP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x70(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R12,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JB 4ab73f <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RCX,%RAX,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RSI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4ab6f8 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x2d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R15,%R14,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R14,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R12 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
LEA -0x1(%RCX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x502390,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD %XMM0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403210 <GOMP_critical_name_start@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x38(%RBP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDSD 0x10(%RSI),%XMM2,%XMM10 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM10,0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 4030b0 <GOMP_critical_name_end@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4ab4f3 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0xd3> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | Collapse.hpp:81-81 |
Module | exec |
nb instructions | 89 |
nb uops | 100 |
loop length | 344 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 9 |
micro-operation queue | 16.67 cycles |
front end | 16.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.70 | 8.00 | 9.33 | 9.33 | 10.00 | 3.87 | 3.70 | 10.00 | 10.00 | 10.00 | 3.73 | 9.33 |
cycles | 3.70 | 10.07 | 9.33 | 9.33 | 10.00 | 3.87 | 3.70 | 10.00 | 10.00 | 10.00 | 3.73 | 9.33 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.17-20.22 |
Stall cycles | 3.83-3.88 |
ROB full (events) | 4.26-4.32 |
Front-end | 16.67 |
Dispatch | 10.07 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RAX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV 0x20(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x40(%RAX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMOVE %RSI,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RAX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x100(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4ab6f8 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x2d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4ab6f8 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x2d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD %XMM0,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4034d0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %EAX,%R13 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
CALL 403420 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x58(%RBP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x70(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x78(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R12,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JB 4ab73f <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RCX,%RAX,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RSI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4ab6f8 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0x2d8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R15,%R14,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R14,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R12 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
LEA -0x1(%RCX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x502390,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD %XMM0,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403210 <GOMP_critical_name_start@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD -0x38(%RBP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VADDSD 0x10(%RSI),%XMM2,%XMM10 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM10,0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x58,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 4030b0 <GOMP_critical_name_end@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4ab4f3 <_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9Iterators16numeric_iteratorIN6Kripke9DirectionElPSR_EESR_EENSN_INSP_INSQ_5GroupElPSV_EESV_EENSN_INSP_INSQ_4ZoneElPSZ_EESZ_EEEEENSL_IJEEEJZNK14PopulationSdomclINSQ_11ArchLayoutTINSQ_12ArchT_OpenMPENSQ_11LayoutT_DGZEEEEEvT_NSQ_6SdomIdERKNSQ_4Core3SetES1G_S1G_RNS1D_5FieldIdJSR_SV_SZ_EEERNS1H_IdJSR_EEERNS1H_IdJSZ_EEEPdEUlSR_SV_SZ_E_EEEEEvOS1B_._omp_fn.0+0xd3> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN4RAJA8internal17StatementExecutorINS_9statement8CollapseINS_26omp_parallel_collapse_execEN4camp7int_seqIlJLl0ELl1EEEEJNS2_3ForILl2ENS_6policy4loop9loop_execEJNS2_6LambdaILl0EJEEEEEEEEEE4execIRNS0_8LoopDataINS5_4listIJSF_EEENS5_5tupleIJNS_4impl4SpanINS_9...– | 0.47 | 0.3 |
▼Loop 1977 - RangeSegment.hpp:120-120 - exec– | 0 | 0 |
○Loop 1978 - forall.hpp:59-59 - exec | 0.47 | 0.27 |