Function: advanceVelocity._omp_fn.0 | Module: exec | Source: timestep.c:71-78 | Coverage: 1.82% |
---|
Function: advanceVelocity._omp_fn.0 | Module: exec | Source: timestep.c:71-78 | Coverage: 1.82% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-850-7424/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 71 - 78 |
-------------------------------------------------------------------------------- |
71: #pragma omp parallel for |
72: for (int iBox=0; iBox<nBoxes; iBox++) |
73: { |
74: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
75: { |
76: s->atoms->p[iOff][0] += dt*s->atoms->f[iOff][0]; |
77: s->atoms->p[iOff][1] += dt*s->atoms->f[iOff][1]; |
78: s->atoms->p[iOff][2] += dt*s->atoms->f[iOff][2]; |
0x40f3c0 PUSH %RBP |
0x40f3c1 MOV %RSP,%RBP |
0x40f3c4 PUSH %R12 |
0x40f3c6 PUSH %RBX |
0x40f3c7 MOV %RDI,%RBX |
0x40f3ca CALL 403060 <omp_get_num_threads@plt> |
0x40f3cf MOV %EAX,%R12D |
0x40f3d2 CALL 403160 <omp_get_thread_num@plt> |
0x40f3d7 MOV %EAX,%ESI |
0x40f3d9 MOV 0x10(%RBX),%EAX |
0x40f3dc CLTD |
0x40f3dd IDIV %R12D |
0x40f3e0 CMP %EDX,%ESI |
0x40f3e2 JL 40f80a |
0x40f3e8 IMUL %EAX,%ESI |
0x40f3eb ADD %EDX,%ESI |
0x40f3ed ADD %ESI,%EAX |
0x40f3ef CMP %EAX,%ESI |
0x40f3f1 JGE 40f805 |
0x40f3f7 MOV (%RBX),%R10 |
0x40f3fa MOVSXD %ESI,%R12 |
0x40f3fd VMOVSD 0x8(%RBX),%XMM0 |
0x40f402 LEA (%R12,%R12,2),%R8 |
0x40f406 MOV 0x18(%R10),%RCX |
0x40f40a SAL $0x9,%R8 |
0x40f40e MOV 0x78(%RCX),%R9 |
0x40f412 NOPW (%RAX,%RAX,1) |
(96) 0x40f418 MOVSXD (%R9,%R12,4),%RDI |
(96) 0x40f41c TEST %EDI,%EDI |
(96) 0x40f41e JLE 40f7f2 |
(96) 0x40f424 MOV 0x20(%R10),%R11 |
(96) 0x40f428 LEA (%RDI,%RDI,2),%RBX |
(96) 0x40f42c LEA -0x18(,%RBX,8),%RDI |
(96) 0x40f434 MOV 0x20(%R11),%RSI |
(96) 0x40f438 MOV 0x28(%R11),%RCX |
(96) 0x40f43c SHR $0x3,%RDI |
(96) 0x40f440 MOV $0xaaaaaaaaaaaaaab,%R11 |
(96) 0x40f44a IMUL %R11,%RDI |
(96) 0x40f44e ADD %R8,%RSI |
(96) 0x40f451 ADD %R8,%RCX |
(96) 0x40f454 LEA (%RSI,%RBX,8),%RDX |
(96) 0x40f458 INC %RDI |
(96) 0x40f45b AND $0x7,%EDI |
(96) 0x40f45e JE 40f61b |
(96) 0x40f464 CMP $0x1,%RDI |
(96) 0x40f468 JE 40f5dc |
(96) 0x40f46e CMP $0x2,%RDI |
(96) 0x40f472 JE 40f5a6 |
(96) 0x40f478 CMP $0x3,%RDI |
(96) 0x40f47c JE 40f570 |
(96) 0x40f482 CMP $0x4,%RDI |
(96) 0x40f486 JE 40f53a |
(96) 0x40f48c CMP $0x5,%RDI |
(96) 0x40f490 JE 40f504 |
(96) 0x40f492 CMP $0x6,%RDI |
(96) 0x40f496 JE 40f4ce |
(96) 0x40f498 VMOVSD (%RCX),%XMM1 |
(96) 0x40f49c VFMADD213SD (%RSI),%XMM0,%XMM1 |
(96) 0x40f4a1 ADD $0x18,%RCX |
(96) 0x40f4a5 ADD $0x18,%RSI |
(96) 0x40f4a9 VMOVSD %XMM1,-0x18(%RSI) |
(96) 0x40f4ae VMOVSD -0x10(%RCX),%XMM2 |
(96) 0x40f4b3 VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 |
(96) 0x40f4b9 VMOVSD %XMM2,-0x10(%RSI) |
(96) 0x40f4be VMOVSD -0x8(%RCX),%XMM3 |
(96) 0x40f4c3 VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 |
(96) 0x40f4c9 VMOVSD %XMM3,-0x8(%RSI) |
(96) 0x40f4ce VMOVSD (%RCX),%XMM4 |
(96) 0x40f4d2 VFMADD213SD (%RSI),%XMM0,%XMM4 |
(96) 0x40f4d7 ADD $0x18,%RCX |
(96) 0x40f4db ADD $0x18,%RSI |
(96) 0x40f4df VMOVSD %XMM4,-0x18(%RSI) |
(96) 0x40f4e4 VMOVSD -0x10(%RCX),%XMM5 |
(96) 0x40f4e9 VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 |
(96) 0x40f4ef VMOVSD %XMM5,-0x10(%RSI) |
(96) 0x40f4f4 VMOVSD -0x8(%RCX),%XMM6 |
(96) 0x40f4f9 VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 |
(96) 0x40f4ff VMOVSD %XMM6,-0x8(%RSI) |
(96) 0x40f504 VMOVSD (%RCX),%XMM7 |
(96) 0x40f508 VFMADD213SD (%RSI),%XMM0,%XMM7 |
(96) 0x40f50d ADD $0x18,%RCX |
(96) 0x40f511 ADD $0x18,%RSI |
(96) 0x40f515 VMOVSD %XMM7,-0x18(%RSI) |
(96) 0x40f51a VMOVSD -0x10(%RCX),%XMM8 |
(96) 0x40f51f VFMADD213SD -0x10(%RSI),%XMM0,%XMM8 |
(96) 0x40f525 VMOVSD %XMM8,-0x10(%RSI) |
(96) 0x40f52a VMOVSD -0x8(%RCX),%XMM9 |
(96) 0x40f52f VFMADD213SD -0x8(%RSI),%XMM0,%XMM9 |
(96) 0x40f535 VMOVSD %XMM9,-0x8(%RSI) |
(96) 0x40f53a VMOVSD (%RCX),%XMM10 |
(96) 0x40f53e VFMADD213SD (%RSI),%XMM0,%XMM10 |
(96) 0x40f543 ADD $0x18,%RCX |
(96) 0x40f547 ADD $0x18,%RSI |
(96) 0x40f54b VMOVSD %XMM10,-0x18(%RSI) |
(96) 0x40f550 VMOVSD -0x10(%RCX),%XMM11 |
(96) 0x40f555 VFMADD213SD -0x10(%RSI),%XMM0,%XMM11 |
(96) 0x40f55b VMOVSD %XMM11,-0x10(%RSI) |
(96) 0x40f560 VMOVSD -0x8(%RCX),%XMM12 |
(96) 0x40f565 VFMADD213SD -0x8(%RSI),%XMM0,%XMM12 |
(96) 0x40f56b VMOVSD %XMM12,-0x8(%RSI) |
(96) 0x40f570 VMOVSD (%RCX),%XMM13 |
(96) 0x40f574 VFMADD213SD (%RSI),%XMM0,%XMM13 |
(96) 0x40f579 ADD $0x18,%RCX |
(96) 0x40f57d ADD $0x18,%RSI |
(96) 0x40f581 VMOVSD %XMM13,-0x18(%RSI) |
(96) 0x40f586 VMOVSD -0x10(%RCX),%XMM14 |
(96) 0x40f58b VFMADD213SD -0x10(%RSI),%XMM0,%XMM14 |
(96) 0x40f591 VMOVSD %XMM14,-0x10(%RSI) |
(96) 0x40f596 VMOVSD -0x8(%RCX),%XMM15 |
(96) 0x40f59b VFMADD213SD -0x8(%RSI),%XMM0,%XMM15 |
(96) 0x40f5a1 VMOVSD %XMM15,-0x8(%RSI) |
(96) 0x40f5a6 VMOVSD (%RCX),%XMM1 |
(96) 0x40f5aa VFMADD213SD (%RSI),%XMM0,%XMM1 |
(96) 0x40f5af ADD $0x18,%RCX |
(96) 0x40f5b3 ADD $0x18,%RSI |
(96) 0x40f5b7 VMOVSD %XMM1,-0x18(%RSI) |
(96) 0x40f5bc VMOVSD -0x10(%RCX),%XMM2 |
(96) 0x40f5c1 VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 |
(96) 0x40f5c7 VMOVSD %XMM2,-0x10(%RSI) |
(96) 0x40f5cc VMOVSD -0x8(%RCX),%XMM3 |
(96) 0x40f5d1 VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 |
(96) 0x40f5d7 VMOVSD %XMM3,-0x8(%RSI) |
(96) 0x40f5dc VMOVSD (%RCX),%XMM4 |
(96) 0x40f5e0 VFMADD213SD (%RSI),%XMM0,%XMM4 |
(96) 0x40f5e5 ADD $0x18,%RSI |
(96) 0x40f5e9 ADD $0x18,%RCX |
(96) 0x40f5ed VMOVSD %XMM4,-0x18(%RSI) |
(96) 0x40f5f2 VMOVSD -0x10(%RCX),%XMM5 |
(96) 0x40f5f7 VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 |
(96) 0x40f5fd VMOVSD %XMM5,-0x10(%RSI) |
(96) 0x40f602 VMOVSD -0x8(%RCX),%XMM6 |
(96) 0x40f607 VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 |
(96) 0x40f60d VMOVSD %XMM6,-0x8(%RSI) |
(96) 0x40f612 CMP %RSI,%RDX |
(96) 0x40f615 JE 40f7f2 |
(97) 0x40f61b VMOVSD (%RCX),%XMM7 |
(97) 0x40f61f VFMADD213SD (%RSI),%XMM0,%XMM7 |
(97) 0x40f624 ADD $0xc0,%RSI |
(97) 0x40f62b ADD $0xc0,%RCX |
(97) 0x40f632 VMOVSD %XMM7,-0xc0(%RSI) |
(97) 0x40f63a VMOVSD -0xb8(%RCX),%XMM8 |
(97) 0x40f642 VFMADD213SD -0xb8(%RSI),%XMM0,%XMM8 |
(97) 0x40f64b VMOVSD %XMM8,-0xb8(%RSI) |
(97) 0x40f653 VMOVSD -0xb0(%RCX),%XMM9 |
(97) 0x40f65b VFMADD213SD -0xb0(%RSI),%XMM0,%XMM9 |
(97) 0x40f664 VMOVSD %XMM9,-0xb0(%RSI) |
(97) 0x40f66c VMOVSD -0xa8(%RCX),%XMM10 |
(97) 0x40f674 VFMADD213SD -0xa8(%RSI),%XMM0,%XMM10 |
(97) 0x40f67d VMOVSD %XMM10,-0xa8(%RSI) |
(97) 0x40f685 VMOVSD -0xa0(%RCX),%XMM11 |
(97) 0x40f68d VFMADD213SD -0xa0(%RSI),%XMM0,%XMM11 |
(97) 0x40f696 VMOVSD %XMM11,-0xa0(%RSI) |
(97) 0x40f69e VMOVSD -0x98(%RCX),%XMM12 |
(97) 0x40f6a6 VFMADD213SD -0x98(%RSI),%XMM0,%XMM12 |
(97) 0x40f6af VMOVSD %XMM12,-0x98(%RSI) |
(97) 0x40f6b7 VMOVSD -0x90(%RCX),%XMM13 |
(97) 0x40f6bf VFMADD213SD -0x90(%RSI),%XMM0,%XMM13 |
(97) 0x40f6c8 VMOVSD %XMM13,-0x90(%RSI) |
(97) 0x40f6d0 VMOVSD -0x88(%RCX),%XMM14 |
(97) 0x40f6d8 VFMADD213SD -0x88(%RSI),%XMM0,%XMM14 |
(97) 0x40f6e1 VMOVSD %XMM14,-0x88(%RSI) |
(97) 0x40f6e9 VMOVSD -0x80(%RCX),%XMM15 |
(97) 0x40f6ee VFMADD213SD -0x80(%RSI),%XMM0,%XMM15 |
(97) 0x40f6f4 VMOVSD %XMM15,-0x80(%RSI) |
(97) 0x40f6f9 VMOVSD -0x78(%RCX),%XMM1 |
(97) 0x40f6fe VFMADD213SD -0x78(%RSI),%XMM0,%XMM1 |
(97) 0x40f704 VMOVSD %XMM1,-0x78(%RSI) |
(97) 0x40f709 VMOVSD -0x70(%RCX),%XMM2 |
(97) 0x40f70e VFMADD213SD -0x70(%RSI),%XMM0,%XMM2 |
(97) 0x40f714 VMOVSD %XMM2,-0x70(%RSI) |
(97) 0x40f719 VMOVSD -0x68(%RCX),%XMM3 |
(97) 0x40f71e VFMADD213SD -0x68(%RSI),%XMM0,%XMM3 |
(97) 0x40f724 VMOVSD %XMM3,-0x68(%RSI) |
(97) 0x40f729 VMOVSD -0x60(%RCX),%XMM4 |
(97) 0x40f72e VFMADD213SD -0x60(%RSI),%XMM0,%XMM4 |
(97) 0x40f734 VMOVSD %XMM4,-0x60(%RSI) |
(97) 0x40f739 VMOVSD -0x58(%RCX),%XMM5 |
(97) 0x40f73e VFMADD213SD -0x58(%RSI),%XMM0,%XMM5 |
(97) 0x40f744 VMOVSD %XMM5,-0x58(%RSI) |
(97) 0x40f749 VMOVSD -0x50(%RCX),%XMM6 |
(97) 0x40f74e VFMADD213SD -0x50(%RSI),%XMM0,%XMM6 |
(97) 0x40f754 VMOVSD %XMM6,-0x50(%RSI) |
(97) 0x40f759 VMOVSD -0x48(%RCX),%XMM7 |
(97) 0x40f75e VFMADD213SD -0x48(%RSI),%XMM0,%XMM7 |
(97) 0x40f764 VMOVSD %XMM7,-0x48(%RSI) |
(97) 0x40f769 VMOVSD -0x40(%RCX),%XMM8 |
(97) 0x40f76e VFMADD213SD -0x40(%RSI),%XMM0,%XMM8 |
(97) 0x40f774 VMOVSD %XMM8,-0x40(%RSI) |
(97) 0x40f779 VMOVSD -0x38(%RCX),%XMM9 |
(97) 0x40f77e VFMADD213SD -0x38(%RSI),%XMM0,%XMM9 |
(97) 0x40f784 VMOVSD %XMM9,-0x38(%RSI) |
(97) 0x40f789 VMOVSD -0x30(%RCX),%XMM10 |
(97) 0x40f78e VFMADD213SD -0x30(%RSI),%XMM0,%XMM10 |
(97) 0x40f794 VMOVSD %XMM10,-0x30(%RSI) |
(97) 0x40f799 VMOVSD -0x28(%RCX),%XMM11 |
(97) 0x40f79e VFMADD213SD -0x28(%RSI),%XMM0,%XMM11 |
(97) 0x40f7a4 VMOVSD %XMM11,-0x28(%RSI) |
(97) 0x40f7a9 VMOVSD -0x20(%RCX),%XMM12 |
(97) 0x40f7ae VFMADD213SD -0x20(%RSI),%XMM0,%XMM12 |
(97) 0x40f7b4 VMOVSD %XMM12,-0x20(%RSI) |
(97) 0x40f7b9 VMOVSD -0x18(%RCX),%XMM13 |
(97) 0x40f7be VFMADD213SD -0x18(%RSI),%XMM0,%XMM13 |
(97) 0x40f7c4 VMOVSD %XMM13,-0x18(%RSI) |
(97) 0x40f7c9 VMOVSD -0x10(%RCX),%XMM14 |
(97) 0x40f7ce VFMADD213SD -0x10(%RSI),%XMM0,%XMM14 |
(97) 0x40f7d4 VMOVSD %XMM14,-0x10(%RSI) |
(97) 0x40f7d9 VMOVSD -0x8(%RCX),%XMM15 |
(97) 0x40f7de VFMADD213SD -0x8(%RSI),%XMM0,%XMM15 |
(97) 0x40f7e4 VMOVSD %XMM15,-0x8(%RSI) |
(97) 0x40f7e9 CMP %RSI,%RDX |
(97) 0x40f7ec JNE 40f61b |
(96) 0x40f7f2 INC %R12 |
(96) 0x40f7f5 ADD $0x600,%R8 |
(96) 0x40f7fc CMP %R12D,%EAX |
(96) 0x40f7ff JG 40f418 |
0x40f805 POP %RBX |
0x40f806 POP %R12 |
0x40f808 POP %RBP |
0x40f809 RET |
0x40f80a INC %EAX |
0x40f80c XOR %EDX,%EDX |
0x40f80e JMP 40f3e8 |
0x40f813 NOPW %CS:(%RAX,%RAX,1) |
0x40f81e XCHG %AX,%AX |
Path / |
Source file and lines | timestep.c:71-78 |
Module | exec |
nb instructions | 36 |
nb uops | 35 |
loop length | 115 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 5.83 cycles |
front end | 5.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.00 | 3.00 | 3.00 | 3.00 | 4.00 | 2.33 | 2.33 | 2.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 4.00 | 3.00 | 3.00 | 3.00 | 4.00 | 2.33 | 2.33 | 2.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 6.00 |
Front-end | 5.83 |
Dispatch | 4.00 |
DIV/SQRT | 6.00 |
Overall L1 | 6.00 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 403060 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x10(%RBX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CLTD | |||||||||||||||||
IDIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-14 | 6 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 40f80a <advanceVelocity._omp_fn.0+0x44a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %EAX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %ESI,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EAX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 40f805 <advanceVelocity._omp_fn.0+0x445> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD %ESI,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMOVSD 0x8(%RBX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R12,%R12,2),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x18(%R10),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SAL $0x9,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RCX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INC %EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40f3e8 <advanceVelocity._omp_fn.0+0x28> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | timestep.c:71-78 |
Module | exec |
nb instructions | 36 |
nb uops | 35 |
loop length | 115 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 5.83 cycles |
front end | 5.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.00 | 3.00 | 3.00 | 3.00 | 4.00 | 2.33 | 2.33 | 2.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 4.00 | 3.00 | 3.00 | 3.00 | 4.00 | 2.33 | 2.33 | 2.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 6.00 |
Front-end | 5.83 |
Dispatch | 4.00 |
DIV/SQRT | 6.00 |
Overall L1 | 6.00 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 6% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 403060 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x10(%RBX),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CLTD | |||||||||||||||||
IDIV %R12D | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-14 | 6 |
CMP %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 40f80a <advanceVelocity._omp_fn.0+0x44a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %EAX,%ESI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD %ESI,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EAX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 40f805 <advanceVelocity._omp_fn.0+0x445> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVSXD %ESI,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VMOVSD 0x8(%RBX),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%R12,%R12,2),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x18(%R10),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SAL $0x9,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RCX),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
INC %EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 40f3e8 <advanceVelocity._omp_fn.0+0x28> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advanceVelocity._omp_fn.0– | 1.82 | 0.18 |
▼Loop 96 - timestep.c:74-78 - exec– | 0.97 | 0.05 |
○Loop 97 - timestep.c:74-78 - exec | 0.86 | 0.04 |