Function: kineticEnergy._omp_fn.0 | Module: exec | Source: timestep.c:107-116 | Coverage: 0.07% |
---|
Function: kineticEnergy._omp_fn.0 | Module: exec | Source: timestep.c:107-116 | Coverage: 0.07% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-850-7424/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 107 - 116 |
-------------------------------------------------------------------------------- |
107: #pragma omp parallel for reduction(+:kenergy) |
108: for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) |
109: { |
110: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
111: { |
112: int iSpecies = s->atoms->iSpecies[iOff]; |
113: real_t invMass = 0.5/s->species[iSpecies].mass; |
114: kenergy += ( s->atoms->p[iOff][0] * s->atoms->p[iOff][0] + |
115: s->atoms->p[iOff][1] * s->atoms->p[iOff][1] + |
116: s->atoms->p[iOff][2] * s->atoms->p[iOff][2] )*invMass; |
0x40f820 PUSH %RBP |
0x40f821 MOV %RSP,%RBP |
0x40f824 PUSH %R14 |
0x40f826 PUSH %R13 |
0x40f828 PUSH %R12 |
0x40f82a MOV %RDI,%R12 |
0x40f82d PUSH %RBX |
0x40f82e MOV (%RDI),%RBX |
0x40f831 MOV 0x18(%RBX),%R14 |
0x40f835 CALL 403060 <omp_get_num_threads@plt> |
0x40f83a MOV %EAX,%R13D |
0x40f83d CALL 403160 <omp_get_thread_num@plt> |
0x40f842 MOV %EAX,%R8D |
0x40f845 MOV 0xc(%R14),%EAX |
0x40f849 CLTD |
0x40f84a IDIV %R13D |
0x40f84d CMP %EDX,%R8D |
0x40f850 JL 40fab0 |
0x40f856 IMUL %EAX,%R8D |
0x40f85a VXORPD %XMM0,%XMM0,%XMM0 |
0x40f85e ADD %EDX,%R8D |
0x40f861 ADD %R8D,%EAX |
0x40f864 CMP %EAX,%R8D |
0x40f867 JGE 40fa88 |
0x40f86d MOVSXD %R8D,%R9 |
0x40f870 MOV 0x78(%R14),%R13 |
0x40f874 VMOVSD 0x227c(%RIP),%XMM2 |
0x40f87c SAL $0x6,%R8D |
0x40f880 LEA (%R9,%R9,2),%R11 |
0x40f884 SAL $0x9,%R11 |
0x40f888 NOPL (%RAX,%RAX,1) |
(99) 0x40f890 MOVSXD (%R13,%R9,4),%RSI |
(99) 0x40f895 TEST %ESI,%ESI |
(99) 0x40f897 JLE 40fa71 |
(99) 0x40f89d MOV 0x20(%RBX),%R14 |
(99) 0x40f8a1 MOVSXD %R8D,%RCX |
(99) 0x40f8a4 MOV 0x28(%RBX),%RDI |
(99) 0x40f8a8 MOV 0x10(%R14),%R10 |
(99) 0x40f8ac MOV 0x20(%R14),%RDX |
(99) 0x40f8b0 MOV %R9,%R14 |
(99) 0x40f8b3 SAL $0x6,%R14 |
(99) 0x40f8b7 ADD %R14,%RSI |
(99) 0x40f8ba LEA (%R10,%RCX,4),%RCX |
(99) 0x40f8be ADD %R11,%RDX |
(99) 0x40f8c1 LEA (%R10,%RSI,4),%R10 |
(99) 0x40f8c5 MOV %R10,%RSI |
(99) 0x40f8c8 SUB %RCX,%RSI |
(99) 0x40f8cb SUB $0x4,%RSI |
(99) 0x40f8cf SHR $0x2,%RSI |
(99) 0x40f8d3 INC %RSI |
(99) 0x40f8d6 AND $0x3,%ESI |
(99) 0x40f8d9 JE 40f99d |
(99) 0x40f8df CMP $0x1,%RSI |
(99) 0x40f8e3 JE 40f95b |
(99) 0x40f8e5 CMP $0x2,%RSI |
(99) 0x40f8e9 JE 40f923 |
(99) 0x40f8eb VMOVSD 0x8(%RDX),%XMM3 |
(99) 0x40f8f0 MOVSXD (%RCX),%R14 |
(99) 0x40f8f3 ADD $0x18,%RDX |
(99) 0x40f8f7 ADD $0x4,%RCX |
(99) 0x40f8fb VMOVSD -0x18(%RDX),%XMM1 |
(99) 0x40f900 VMOVSD -0x8(%RDX),%XMM5 |
(99) 0x40f905 VMULSD %XMM3,%XMM3,%XMM4 |
(99) 0x40f909 SAL $0x4,%R14 |
(99) 0x40f90d VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM6 |
(99) 0x40f914 VFMADD231SD %XMM1,%XMM1,%XMM4 |
(99) 0x40f919 VFMADD132SD %XMM5,%XMM4,%XMM5 |
(99) 0x40f91e VFMADD231SD %XMM5,%XMM6,%XMM0 |
(99) 0x40f923 VMOVSD 0x8(%RDX),%XMM8 |
(99) 0x40f928 MOVSXD (%RCX),%RSI |
(99) 0x40f92b ADD $0x18,%RDX |
(99) 0x40f92f ADD $0x4,%RCX |
(99) 0x40f933 VMOVSD -0x18(%RDX),%XMM7 |
(99) 0x40f938 VMOVSD -0x8(%RDX),%XMM10 |
(99) 0x40f93d VMULSD %XMM8,%XMM8,%XMM9 |
(99) 0x40f942 SAL $0x4,%RSI |
(99) 0x40f946 VDIVSD 0x8(%RDI,%RSI,1),%XMM2,%XMM11 |
(99) 0x40f94c VFMADD231SD %XMM7,%XMM7,%XMM9 |
(99) 0x40f951 VFMADD132SD %XMM10,%XMM9,%XMM10 |
(99) 0x40f956 VFMADD231SD %XMM10,%XMM11,%XMM0 |
(99) 0x40f95b VMOVSD 0x8(%RDX),%XMM13 |
(99) 0x40f960 MOVSXD (%RCX),%R14 |
(99) 0x40f963 ADD $0x4,%RCX |
(99) 0x40f967 ADD $0x18,%RDX |
(99) 0x40f96b VMOVSD -0x18(%RDX),%XMM12 |
(99) 0x40f970 VMOVSD -0x8(%RDX),%XMM15 |
(99) 0x40f975 VMULSD %XMM13,%XMM13,%XMM14 |
(99) 0x40f97a SAL $0x4,%R14 |
(99) 0x40f97e VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM1 |
(99) 0x40f985 VFMADD231SD %XMM12,%XMM12,%XMM14 |
(99) 0x40f98a VFMADD132SD %XMM15,%XMM14,%XMM15 |
(99) 0x40f98f VFMADD231SD %XMM15,%XMM1,%XMM0 |
(99) 0x40f994 CMP %RCX,%R10 |
(99) 0x40f997 JE 40fa71 |
(100) 0x40f99d VMOVSD 0x8(%RDX),%XMM3 |
(100) 0x40f9a2 MOVSXD (%RCX),%RSI |
(100) 0x40f9a5 ADD $0x10,%RCX |
(100) 0x40f9a9 ADD $0x60,%RDX |
(100) 0x40f9ad VMOVSD -0x60(%RDX),%XMM4 |
(100) 0x40f9b2 VMOVSD -0x50(%RDX),%XMM6 |
(100) 0x40f9b7 VMULSD %XMM3,%XMM3,%XMM5 |
(100) 0x40f9bb VMOVSD -0x40(%RDX),%XMM8 |
(100) 0x40f9c0 SAL $0x4,%RSI |
(100) 0x40f9c4 VMOVSD -0x28(%RDX),%XMM13 |
(100) 0x40f9c9 VDIVSD 0x8(%RDI,%RSI,1),%XMM2,%XMM7 |
(100) 0x40f9cf VMOVSD -0x38(%RDX),%XMM10 |
(100) 0x40f9d4 VMOVSD -0x30(%RDX),%XMM12 |
(100) 0x40f9d9 VMULSD %XMM8,%XMM8,%XMM9 |
(100) 0x40f9de VMOVSD -0x10(%RDX),%XMM1 |
(100) 0x40f9e3 MOVSXD -0xc(%RCX),%R14 |
(100) 0x40f9e7 VMULSD %XMM13,%XMM13,%XMM14 |
(100) 0x40f9ec VMOVSD -0x20(%RDX),%XMM15 |
(100) 0x40f9f1 VMOVSD -0x18(%RDX),%XMM3 |
(100) 0x40f9f6 SAL $0x4,%R14 |
(100) 0x40f9fa MOVSXD -0x8(%RCX),%RSI |
(100) 0x40f9fe VFMADD231SD %XMM4,%XMM4,%XMM5 |
(100) 0x40fa03 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM11 |
(100) 0x40fa0a MOVSXD -0x4(%RCX),%R14 |
(100) 0x40fa0e SAL $0x4,%RSI |
(100) 0x40fa12 VDIVSD 0x8(%RDI,%RSI,1),%XMM2,%XMM4 |
(100) 0x40fa18 SAL $0x4,%R14 |
(100) 0x40fa1c VFMADD231SD %XMM12,%XMM12,%XMM14 |
(100) 0x40fa21 VFMADD132SD %XMM6,%XMM5,%XMM6 |
(100) 0x40fa26 VMULSD %XMM1,%XMM1,%XMM5 |
(100) 0x40fa2a VFMADD132SD %XMM15,%XMM14,%XMM15 |
(100) 0x40fa2f VFMADD132SD %XMM6,%XMM0,%XMM7 |
(100) 0x40fa34 VMOVSD -0x48(%RDX),%XMM0 |
(100) 0x40fa39 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM6 |
(100) 0x40fa40 VFMADD231SD %XMM3,%XMM3,%XMM5 |
(100) 0x40fa45 VFMADD231SD %XMM0,%XMM0,%XMM9 |
(100) 0x40fa4a VMOVSD -0x8(%RDX),%XMM0 |
(100) 0x40fa4f VFMADD132SD %XMM0,%XMM5,%XMM0 |
(100) 0x40fa54 VFMADD132SD %XMM10,%XMM9,%XMM10 |
(100) 0x40fa59 VFMADD132SD %XMM10,%XMM7,%XMM11 |
(100) 0x40fa5e VFMADD132SD %XMM15,%XMM11,%XMM4 |
(100) 0x40fa63 VFMADD132SD %XMM6,%XMM4,%XMM0 |
(100) 0x40fa68 CMP %RCX,%R10 |
(100) 0x40fa6b JNE 40f99d |
(99) 0x40fa71 INC %R9 |
(99) 0x40fa74 ADD $0x40,%R8D |
(99) 0x40fa78 ADD $0x600,%R11 |
(99) 0x40fa7f CMP %R9D,%EAX |
(99) 0x40fa82 JG 40f890 |
0x40fa88 MOV 0x8(%R12),%RAX |
0x40fa8d LEA 0x8(%R12),%RBX |
(98) 0x40fa92 VMOVQ %RAX,%XMM2 |
(98) 0x40fa97 VADDSD %XMM2,%XMM0,%XMM7 |
(98) 0x40fa9b VMOVQ %XMM7,%R12 |
(98) 0x40faa0 LOCK CMPXCHG %R12,(%RBX) |
(98) 0x40faa5 JNE 40fa92 |
0x40faa7 POP %RBX |
0x40faa8 POP %R12 |
0x40faaa POP %R13 |
0x40faac POP %R14 |
0x40faae POP %RBP |
0x40faaf RET |
0x40fab0 INC %EAX |
0x40fab2 XOR %EDX,%EDX |
0x40fab4 JMP 40f856 |
0x40fab9 NOPL (%RAX) |
Path / |
Source file and lines | timestep.c:107-116 |
Module | exec |
nb instructions | 43 |
nb uops | 43 |
loop length | 147 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 7.17 cycles |
front end | 7.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.00 | 3.75 | 3.75 | 3.50 | 4.00 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 4.00 | 3.75 | 3.75 | 3.50 | 4.00 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 6.00 |
Front-end | 7.17 |
Dispatch | 4.00 |
DIV/SQRT | 6.00 |
Overall L1 | 7.17 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 6% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 18% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 403060 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CLTD | |||||||||||||||||
IDIV %R13D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-14 | 6 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 40fab0 <kineticEnergy._omp_fn.0+0x290> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %EAX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R8D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EAX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 40fa88 <kineticEnergy._omp_fn.0+0x268> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOVSXD %R8D,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x78(%R14),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD 0x227c(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x6,%R8D | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R9,%R9,2),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x9,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV 0x8(%R12),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x8(%R12),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INC %EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40f856 <kineticEnergy._omp_fn.0+0x36> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | timestep.c:107-116 |
Module | exec |
nb instructions | 43 |
nb uops | 43 |
loop length | 147 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 7.17 cycles |
front end | 7.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.00 | 3.75 | 3.75 | 3.50 | 4.00 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 4.00 | 3.75 | 3.75 | 3.50 | 4.00 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 6.00 |
Front-end | 7.17 |
Dispatch | 4.00 |
DIV/SQRT | 6.00 |
Overall L1 | 7.17 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 6% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 18% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 403060 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CLTD | |||||||||||||||||
IDIV %R13D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-14 | 6 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 40fab0 <kineticEnergy._omp_fn.0+0x290> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %EAX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %R8D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EAX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 40fa88 <kineticEnergy._omp_fn.0+0x268> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOVSXD %R8D,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x78(%R14),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVSD 0x227c(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SAL $0x6,%R8D | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R9,%R9,2),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x9,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV 0x8(%R12),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x8(%R12),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INC %EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40f856 <kineticEnergy._omp_fn.0+0x36> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼kineticEnergy._omp_fn.0– | 0.07 | 0.01 |
▼Loop 99 - timestep.c:110-116 - exec– | 0.02 | 0 |
○Loop 100 - timestep.c:110-116 - exec | 0.05 | 0 |
○Loop 98 - timestep.c:107-107 - exec | 0 | 0 |