| Function: clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100 | Module: exec | Source: pack_kernel.f90:155-163 | Coverage: 0.04% |
|---|
| Function: clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100 | Module: exec | Source: pack_kernel.f90:155-163 | Coverage: 0.04% |
|---|
/scratch_na/users/xoserete/qaas_runs/171-214-9740/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/pack_kernel.f90: 155 - 163 |
-------------------------------------------------------------------------------- |
155: !$OMP PARALLEL DO PRIVATE(index) |
156: DO k=y_min-depth,y_max+y_inc+depth |
157: !$OMP SIMD |
158: DO j=1,depth |
159: index= buffer_offset + j+(k+depth-1)*depth |
160: right_snd_buffer(index)=field(x_max+1-j,k) |
161: ENDDO |
162: ENDDO |
163: !$OMP END PARALLEL DO |
0x43f920 PUSH %RBP |
0x43f921 MOV %RSP,%RBP |
0x43f924 PUSH %R15 |
0x43f926 PUSH %R14 |
0x43f928 PUSH %R13 |
0x43f92a PUSH %R12 |
0x43f92c PUSH %RBX |
0x43f92d SUB $0x78,%RSP |
0x43f931 MOV %R9,-0x78(%RBP) |
0x43f935 MOV %R8,-0x48(%RBP) |
0x43f939 MOV %RCX,-0x70(%RBP) |
0x43f93d MOV 0x18(%RBP),%RAX |
0x43f941 MOV %RAX,-0x68(%RBP) |
0x43f945 MOV 0x10(%RBP),%RAX |
0x43f949 MOV %RAX,-0x60(%RBP) |
0x43f94d MOV 0x28(%RBP),%EAX |
0x43f950 MOVL $0,-0x40(%RBP) |
0x43f957 TEST %EAX,%EAX |
0x43f959 JS 43f9ca |
0x43f95b MOV %RDX,%RBX |
0x43f95e MOV (%RDI),%ESI |
0x43f960 MOVL $0,-0x34(%RBP) |
0x43f967 MOV %EAX,-0x30(%RBP) |
0x43f96a MOVL $0x1,-0x3c(%RBP) |
0x43f971 SUB $0x8,%RSP |
0x43f975 LEA -0x3c(%RBP),%RAX |
0x43f979 LEA -0x40(%RBP),%RCX |
0x43f97d LEA -0x34(%RBP),%R8 |
0x43f981 LEA -0x30(%RBP),%R9 |
0x43f985 MOV $0x7388b0,%EDI |
0x43f98a MOV %ESI,-0x38(%RBP) |
0x43f98d MOV $0x22,%EDX |
0x43f992 PUSH $0x1 |
0x43f994 PUSH $0x1 |
0x43f996 PUSH %RAX |
0x43f997 CALL 404520 <__kmpc_for_static_init_4@plt> |
0x43f99c ADD $0x20,%RSP |
0x43f9a0 MOV -0x34(%RBP),%ECX |
0x43f9a3 MOV -0x30(%RBP),%R8D |
0x43f9a7 SUB %ECX,%R8D |
0x43f9aa JAE 43f9d9 |
0x43f9ac MOV $0x7388d0,%EDI |
0x43f9b1 MOV -0x38(%RBP),%ESI |
0x43f9b4 ADD $0x78,%RSP |
0x43f9b8 POP %RBX |
0x43f9b9 POP %R12 |
0x43f9bb POP %R13 |
0x43f9bd POP %R14 |
0x43f9bf POP %R15 |
0x43f9c1 POP %RBP |
0x43f9c2 VZEROUPPER |
0x43f9c5 JMP 404110 |
0x43f9ca ADD $0x78,%RSP |
0x43f9ce POP %RBX |
0x43f9cf POP %R12 |
0x43f9d1 POP %R13 |
0x43f9d3 POP %R14 |
0x43f9d5 POP %R15 |
0x43f9d7 POP %RBP |
0x43f9d8 RET |
0x43f9d9 MOV -0x48(%RBP),%RAX |
0x43f9dd MOV (%RAX),%EAX |
0x43f9df LEA (%RCX,%RBX,1),%EDX |
0x43f9e2 MOV %RDX,-0x58(%RBP) |
0x43f9e6 LEA (%RCX,%RBX,1),%EDX |
0x43f9e9 DEC %EDX |
0x43f9eb XOR %ESI,%ESI |
0x43f9ed VMOVDQA 0xbdf7b(%RIP),%XMM0 |
0x43f9f5 VPCMPEQD %YMM1,%YMM1,%YMM1 |
0x43f9f9 VMOVDQA 0xb535f(%RIP),%YMM2 |
0x43fa01 MOV %R8D,-0x2c(%RBP) |
0x43fa05 JMP 43fa23 |
0x43fa07 NOPW (%RAX,%RAX,1) |
(405) 0x43fa10 MOV %R13D,%EAX |
(405) 0x43fa13 MOV -0x50(%RBP),%RSI |
(405) 0x43fa17 LEA 0x1(%RSI),%ECX |
(405) 0x43fa1a INC %EDX |
(405) 0x43fa1c CMP %R8D,%ESI |
(405) 0x43fa1f MOV %ECX,%ESI |
(405) 0x43fa21 JE 43f9ac |
(405) 0x43fa23 TEST %EAX,%EAX |
(405) 0x43fa25 JLE 43fa17 |
(405) 0x43fa27 MOV -0x58(%RBP),%RCX |
(405) 0x43fa2b MOV %RSI,-0x50(%RBP) |
(405) 0x43fa2f ADD %ESI,%ECX |
(405) 0x43fa31 MOV -0x70(%RBP),%RSI |
(405) 0x43fa35 MOVSXD (%RSI),%R14 |
(405) 0x43fa38 MOV -0x48(%RBP),%RSI |
(405) 0x43fa3c MOV (%RSI),%R13D |
(405) 0x43fa3f MOV -0x60(%RBP),%R8 |
(405) 0x43fa43 MOV (%R8),%R9 |
(405) 0x43fa46 MOV 0x38(%R8),%R10 |
(405) 0x43fa4a MOV -0x68(%RBP),%RSI |
(405) 0x43fa4e MOV (%RSI),%R11D |
(405) 0x43fa51 MOV 0x50(%R8),%RDI |
(405) 0x43fa55 MOV -0x78(%RBP),%R8 |
(405) 0x43fa59 MOV (%R8),%RSI |
(405) 0x43fa5c MOV 0x38(%R8),%R15 |
(405) 0x43fa60 MOV %EAX,%R12D |
(405) 0x43fa63 MOV %R12,%RBX |
(405) 0x43fa66 MOVSXD %ECX,%RCX |
(405) 0x43fa69 MOV $-0x4,%EAX |
(405) 0x43fa6e AND %RAX,%RBX |
(405) 0x43fa71 MOV %R14,-0x98(%RBP) |
(405) 0x43fa78 MOV %RDI,-0x90(%RBP) |
(405) 0x43fa7f JE 43fb30 |
(405) 0x43fa85 MOV %R13,%RAX |
(405) 0x43fa88 MOV %RCX,-0x80(%RBP) |
(405) 0x43fa8c LEA 0x1(%RCX),%R13 |
(405) 0x43fa90 IMUL %RDI,%R13 |
(405) 0x43fa94 VPBROADCASTQ %R10,%YMM3 |
(405) 0x43fa9a VPBROADCASTQ %R15,%YMM4 |
(405) 0x43faa0 LEA (%RAX,%RDX,1),%ECX |
(405) 0x43faa3 MOV %RAX,-0x88(%RBP) |
(405) 0x43faaa IMUL %EAX,%ECX |
(405) 0x43faad MOVSXD %ECX,%R8 |
(405) 0x43fab0 ADD %R14,%R8 |
(405) 0x43fab3 ADD %R9,%R13 |
(405) 0x43fab6 MOV %R11D,%R14D |
(405) 0x43fab9 XOR %ECX,%ECX |
(405) 0x43fabb NOPL (%RAX,%RAX,1) |
(407) 0x43fac0 VPBROADCASTD %R14D,%XMM5 |
(407) 0x43fac6 VPADDD %XMM0,%XMM5,%XMM5 |
(407) 0x43faca VPMOVSXDQ %XMM5,%YMM5 |
(407) 0x43facf VPSUBQ %YMM1,%YMM5,%YMM5 |
(407) 0x43fad3 VPMULLQ %YMM5,%YMM3,%YMM5 |
(407) 0x43fad9 KXNORW %K0,%K0,%K1 |
(407) 0x43fadd VXORPD %XMM6,%XMM6,%XMM6 |
(407) 0x43fae1 VGATHERQPD (%R13,%YMM5,1),%YMM6{%K1} |
(407) 0x43fae9 LEA (%R8,%RCX,1),%RAX |
(407) 0x43faed VPBROADCASTQ %RAX,%YMM5 |
(407) 0x43faf3 VPADDQ %YMM2,%YMM5,%YMM5 |
(407) 0x43faf7 VPMULLQ %YMM5,%YMM4,%YMM5 |
(407) 0x43fafd KXNORW %K0,%K0,%K1 |
(407) 0x43fb01 VSCATTERQPD %YMM6,(%RSI,%YMM5,1){%K1} |
(407) 0x43fb08 ADD $0x4,%RCX |
(407) 0x43fb0c ADD $-0x4,%R14D |
(407) 0x43fb10 CMP %RBX,%RCX |
(407) 0x43fb13 JB 43fac0 |
(405) 0x43fb15 CMP %R12,%RBX |
(405) 0x43fb18 MOV -0x2c(%RBP),%R8D |
(405) 0x43fb1c MOV -0x88(%RBP),%R13 |
(405) 0x43fb23 MOV -0x80(%RBP),%RCX |
(405) 0x43fb27 JE 43fa10 |
(405) 0x43fb2d JMP 43fb36 |
0x43fb2f NOP |
(405) 0x43fb30 XOR %EBX,%EBX |
(405) 0x43fb32 MOV -0x2c(%RBP),%R8D |
(405) 0x43fb36 INC %RCX |
(405) 0x43fb39 MOV -0x90(%RBP),%RAX |
(405) 0x43fb40 IMUL %RCX,%RAX |
(405) 0x43fb44 ADD %RAX,%R9 |
(405) 0x43fb47 SUB %RBX,%R12 |
(405) 0x43fb4a SUB %EBX,%R11D |
(405) 0x43fb4d MOV -0x98(%RBP),%RCX |
(405) 0x43fb54 ADD %RBX,%RCX |
(405) 0x43fb57 LEA (%RDX,%R13,1),%EAX |
(405) 0x43fb5b IMUL %R13D,%EAX |
(405) 0x43fb5f CLTQ |
(405) 0x43fb61 ADD %RCX,%RAX |
(405) 0x43fb64 IMUL %R15,%RAX |
(405) 0x43fb68 ADD %RAX,%RSI |
(405) 0x43fb6b NOPL (%RAX,%RAX,1) |
(406) 0x43fb70 MOVSXD %R11D,%R11 |
(406) 0x43fb73 LEA 0x1(%R11),%RAX |
(406) 0x43fb77 IMUL %R10,%RAX |
(406) 0x43fb7b VMOVQ (%R9,%RAX,1),%XMM3 |
(406) 0x43fb81 VMOVQ %XMM3,(%RSI) |
(406) 0x43fb85 DEC %R11D |
(406) 0x43fb88 ADD %R15,%RSI |
(406) 0x43fb8b DEC %R12 |
(406) 0x43fb8e JNE 43fb70 |
(405) 0x43fb90 JMP 43fa10 |
0x43fb95 NOPW %CS:(%RAX,%RAX,1) |
| Path / |
| Source file and lines | pack_kernel.f90:155-163 |
| Module | exec |
| nb instructions | 75 |
| nb uops | 77 |
| loop length | 252 |
| used x86 registers | 14 |
| used mmx registers | 0 |
| used xmm registers | 1 |
| used ymm registers | 2 |
| used zmm registers | 0 |
| nb stack references | 15 |
| micro-operation queue | 12.83 cycles |
| front end | 12.83 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.50 | 2.40 | 8.00 | 8.00 | 11.00 | 2.40 | 2.30 | 11.00 | 11.00 | 11.00 | 2.40 | 8.00 |
| cycles | 2.50 | 2.40 | 8.00 | 8.00 | 11.00 | 2.40 | 2.30 | 11.00 | 11.00 | 11.00 | 2.40 | 8.00 |
| Cycles executing div or sqrt instructions | NA |
| FE+BE cycles | 12.64-12.69 |
| Stall cycles | 0.00 |
| Front-end | 12.83 |
| Dispatch | 11.00 |
| Overall L1 | 12.83 |
| all | 14% |
| load | 40% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 50% |
| all | 13% |
| load | 18% |
| store | 9% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 11% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 21% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| SUB $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV %R8,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV %RCX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV 0x28(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
| JS 43f9ca <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0xaa> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOVL $0,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV %EAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOVL $0x1,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| LEA -0x3c(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA -0x34(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA -0x30(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| MOV $0x7388b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| MOV %ESI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| CALL 404520 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
| ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| MOV -0x34(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV -0x30(%RBP),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| SUB %ECX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| JAE 43f9d9 <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0xb9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| MOV $0x7388d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| MOV -0x38(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| ADD $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| JMP 404110 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
| ADD $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
| MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV (%RAX),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| LEA (%RCX,%RBX,1),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| MOV %RDX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| LEA (%RCX,%RBX,1),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| DEC %EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VMOVDQA 0xbdf7b(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VPCMPEQD %YMM1,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMOVDQA 0xb535f(%RIP),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| MOV %R8D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| JMP 43fa23 <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0x103> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| Source file and lines | pack_kernel.f90:155-163 |
| Module | exec |
| nb instructions | 75 |
| nb uops | 77 |
| loop length | 252 |
| used x86 registers | 14 |
| used mmx registers | 0 |
| used xmm registers | 1 |
| used ymm registers | 2 |
| used zmm registers | 0 |
| nb stack references | 15 |
| micro-operation queue | 12.83 cycles |
| front end | 12.83 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 2.50 | 2.40 | 8.00 | 8.00 | 11.00 | 2.40 | 2.30 | 11.00 | 11.00 | 11.00 | 2.40 | 8.00 |
| cycles | 2.50 | 2.40 | 8.00 | 8.00 | 11.00 | 2.40 | 2.30 | 11.00 | 11.00 | 11.00 | 2.40 | 8.00 |
| Cycles executing div or sqrt instructions | NA |
| FE+BE cycles | 12.64-12.69 |
| Stall cycles | 0.00 |
| Front-end | 12.83 |
| Dispatch | 11.00 |
| Overall L1 | 12.83 |
| all | 14% |
| load | 40% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 50% |
| all | 13% |
| load | 18% |
| store | 9% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 11% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 21% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| SUB $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV %R8,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV %RCX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV 0x28(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
| JS 43f9ca <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0xaa> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOVL $0,-0x34(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV %EAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOVL $0x1,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| LEA -0x3c(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA -0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA -0x34(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA -0x30(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| MOV $0x7388b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| MOV %ESI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
| CALL 404520 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
| ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| MOV -0x34(%RBP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV -0x30(%RBP),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| SUB %ECX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| JAE 43f9d9 <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0xb9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| MOV $0x7388d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| MOV -0x38(%RBP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| ADD $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| JMP 404110 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
| ADD $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
| RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
| MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV (%RAX),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| LEA (%RCX,%RBX,1),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| MOV %RDX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| LEA (%RCX,%RBX,1),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| DEC %EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VMOVDQA 0xbdf7b(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VPCMPEQD %YMM1,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMOVDQA 0xb535f(%RIP),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| MOV %R8D,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
| JMP 43fa23 <pack_kernel_module_mp_clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100+0x103> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼clover_pack_message_right_.DIR.OMP.PARALLEL.LOOP.2.split100– | 0.04 | 0.02 |
| ▼Loop 405 - pack_kernel.f90:156-160 - exec– | 0 | 0.01 |
| ○Loop 406 - pack_kernel.f90:158-160 - exec | 0.04 | 0.03 |
| ○Loop 407 - pack_kernel.f90:158-160 - exec | 0 | 0 |
