Function: updateLinkCells | Module: exec | Source: linkCells.c:211-385 [...] | Coverage: 0.71% |
---|
Function: updateLinkCells | Module: exec | Source: linkCells.c:211-385 [...] | Coverage: 0.71% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-850-7424/intel/CoMD/build/CoMD/CoMD/src-openmp/linkCells.c: 211 - 385 |
-------------------------------------------------------------------------------- |
211: iBox = boxes->nLocalBoxes + 2*gridSize[2]*gridSize[1] + 2*gridSize[2]*(gridSize[0]+2) + |
212: (gridSize[0]+2)*(gridSize[1]+2) + (gridSize[0]+2)*(iy+1) + (ix+1); |
213: } |
214: // Halo in Z- |
215: else if (iz == -1) |
216: { |
217: iBox = boxes->nLocalBoxes + 2*gridSize[2]*gridSize[1] + 2*gridSize[2]*(gridSize[0]+2) + |
218: (gridSize[0]+2)*(iy+1) + (ix+1); |
219: } |
220: // Halo in Y+ |
221: else if (iy == gridSize[1]) |
222: { |
223: iBox = boxes->nLocalBoxes + 2*gridSize[2]*gridSize[1] + gridSize[2]*(gridSize[0]+2) + |
224: (gridSize[0]+2)*iz + (ix+1); |
225: } |
226: // Halo in Y- |
227: else if (iy == -1) |
228: { |
229: iBox = boxes->nLocalBoxes + 2*gridSize[2]*gridSize[1] + iz*(gridSize[0]+2) + (ix+1); |
230: } |
231: // Halo in X+ |
232: else if (ix == gridSize[0]) |
233: { |
234: iBox = boxes->nLocalBoxes + gridSize[1]*gridSize[2] + iz*gridSize[1] + iy; |
235: } |
236: // Halo in X- |
237: else if (ix == -1) |
238: { |
239: iBox = boxes->nLocalBoxes + iz*gridSize[1] + iy; |
240: } |
241: // local link celll. |
242: else |
243: { |
244: iBox = ix + gridSize[0]*iy + gridSize[0]*gridSize[1]*iz; |
245: } |
246: assert(iBox >= 0); |
247: assert(iBox < boxes->nTotalBoxes); |
[...] |
288: { |
289: emptyHaloCells(boxes); |
290: |
291: for (int iBox=0; iBox<boxes->nLocalBoxes; ++iBox) |
292: { |
293: int iOff = iBox*MAXATOMS; |
294: int ii=0; |
295: while (ii < boxes->nAtoms[iBox]) |
296: { |
297: int jBox = getBoxFromCoord(boxes, atoms->r[iOff+ii]); |
298: if (jBox != iBox) |
299: moveAtom(boxes, atoms, ii, iBox, jBox); |
300: else |
301: ++ii; |
302: } |
303: } |
304: } |
[...] |
352: int ix = (int)(floor((rr[0] - localMin[0])*boxes->invBoxSize[0])); |
353: int iy = (int)(floor((rr[1] - localMin[1])*boxes->invBoxSize[1])); |
354: int iz = (int)(floor((rr[2] - localMin[2])*boxes->invBoxSize[2])); |
355: |
356: |
357: // For each axis, if we are inside the local domain, make sure we get |
358: // a local link cell. Otherwise, make sure we get a halo link cell. |
359: if (rr[0] < localMax[0]) |
360: { |
361: if (ix == gridSize[0]) ix = gridSize[0] - 1; |
362: } |
363: else |
364: ix = gridSize[0]; // assign to halo cell |
365: if (rr[1] < localMax[1]) |
366: { |
367: if (iy == gridSize[1]) iy = gridSize[1] - 1; |
368: } |
369: else |
370: iy = gridSize[1]; |
371: if (rr[2] < localMax[2]) |
372: { |
373: if (iz == gridSize[2]) iz = gridSize[2] - 1; |
[...] |
384: for (int ii=boxes->nLocalBoxes; ii<boxes->nTotalBoxes; ++ii) |
385: boxes->nAtoms[ii] = 0; |
0x40b180 PUSH %RBP |
0x40b181 MOV %RSP,%RBP |
0x40b184 PUSH %R15 |
0x40b186 PUSH %R14 |
0x40b188 PUSH %R13 |
0x40b18a PUSH %R12 |
0x40b18c PUSH %RBX |
0x40b18d MOV %RDI,%RBX |
0x40b190 SUB $0x28,%RSP |
0x40b194 MOV %RSI,-0x40(%RBP) |
0x40b198 MOVSXD 0xc(%RDI),%R14 |
0x40b19c CMP 0x14(%RDI),%R14D |
0x40b1a0 JGE 40b1c5 |
0x40b1a2 MOV 0x78(%RDI),%RDX |
0x40b1a6 NOPW %CS:(%RAX,%RAX,1) |
(80) 0x40b1b0 MOVL $0,(%RDX,%R14,4) |
(80) 0x40b1b8 INC %R14 |
(80) 0x40b1bb CMP %R14D,0x14(%RBX) |
(80) 0x40b1bf JG 40b1b0 |
0x40b1c1 MOV 0xc(%RBX),%R14D |
0x40b1c5 TEST %R14D,%R14D |
0x40b1c8 JLE 40b354 |
0x40b1ce MOV 0x78(%RBX),%R12 |
0x40b1d2 XOR %R13D,%R13D |
0x40b1d5 NOPL (%RAX) |
(77) 0x40b1d8 MOV %R13D,%R15D |
(77) 0x40b1db MOV (%R12,%R13,4),%ECX |
(77) 0x40b1df LEA (,%R13,4),%RSI |
(77) 0x40b1e7 MOV %R13D,-0x44(%RBP) |
(77) 0x40b1eb SAL $0x6,%R15D |
(77) 0x40b1ef MOV %RSI,-0x38(%RBP) |
(77) 0x40b1f3 MOV %R15D,-0x48(%RBP) |
(77) 0x40b1f7 TEST %ECX,%ECX |
(77) 0x40b1f9 JLE 40b348 |
(77) 0x40b1ff XOR %R14D,%R14D |
(79) 0x40b202 MOV -0x40(%RBP),%RAX |
(79) 0x40b206 VMOVSD 0x18(%RBX),%XMM8 |
(79) 0x40b20b VMOVSD 0x60(%RBX),%XMM7 |
(79) 0x40b210 VMOVSD 0x20(%RBX),%XMM6 |
(79) 0x40b215 MOV 0x18(%RAX),%R10 |
(79) 0x40b219 VMOVSD 0x68(%RBX),%XMM5 |
(79) 0x40b21e VMOVSD 0x28(%RBX),%XMM4 |
(79) 0x40b223 VMOVSD 0x70(%RBX),%XMM3 |
(79) 0x40b228 MOV (%RBX),%EDI |
(79) 0x40b22a MOV 0x4(%RBX),%ECX |
(79) 0x40b22d MOV 0x8(%RBX),%ESI |
(78) 0x40b230 MOVSXD %R15D,%R8 |
(78) 0x40b233 MOV %EDI,%EAX |
(78) 0x40b235 LEA (%R8,%R8,2),%R9 |
(78) 0x40b239 LEA (%R10,%R9,8),%R11 |
(78) 0x40b23d VMOVSD (%R11),%XMM2 |
(78) 0x40b242 VCOMISD 0x30(%RBX),%XMM2 |
(78) 0x40b247 VMOVSD 0x8(%R11),%XMM1 |
(78) 0x40b24d VMOVSD 0x10(%R11),%XMM0 |
(78) 0x40b253 JAE 40b272 |
(78) 0x40b255 VSUBSD %XMM8,%XMM2,%XMM9 |
(78) 0x40b25a XOR %EDX,%EDX |
(78) 0x40b25c VMULSD %XMM7,%XMM9,%XMM10 |
(78) 0x40b260 VROUNDSD $0x9,%XMM10,%XMM10,%XMM10 |
(78) 0x40b266 VCVTTSD2SI %XMM10,%EAX |
(78) 0x40b26b CMP %EDI,%EAX |
(78) 0x40b26d SETE %DL |
(78) 0x40b270 SUB %EDX,%EAX |
(78) 0x40b272 VCOMISD 0x38(%RBX),%XMM1 |
(78) 0x40b277 MOV %ECX,%R8D |
(78) 0x40b27a JAE 40b29c |
(78) 0x40b27c VSUBSD %XMM6,%XMM1,%XMM11 |
(78) 0x40b280 XOR %R9D,%R9D |
(78) 0x40b283 VMULSD %XMM5,%XMM11,%XMM12 |
(78) 0x40b287 VROUNDSD $0x9,%XMM12,%XMM12,%XMM12 |
(78) 0x40b28d VCVTTSD2SI %XMM12,%R8D |
(78) 0x40b292 CMP %ECX,%R8D |
(78) 0x40b295 SETE %R9B |
(78) 0x40b299 SUB %R9D,%R8D |
(78) 0x40b29c VCOMISD 0x40(%RBX),%XMM0 |
(78) 0x40b2a1 JAE 40b388 |
(78) 0x40b2a7 VSUBSD %XMM4,%XMM0,%XMM13 |
(78) 0x40b2ab XOR %R9D,%R9D |
(78) 0x40b2ae VMULSD %XMM3,%XMM13,%XMM14 |
(78) 0x40b2b2 VROUNDSD $0x9,%XMM14,%XMM14,%XMM14 |
(78) 0x40b2b8 VCVTTSD2SI %XMM14,%EDX |
(78) 0x40b2bd CMP %ESI,%EDX |
(78) 0x40b2bf SETE %R9B |
(78) 0x40b2c3 SUB %R9D,%EDX |
(78) 0x40b2c6 CMP $-0x1,%EDX |
(78) 0x40b2c9 JE 40b3c8 |
(78) 0x40b2cf CMP %ECX,%R8D |
(78) 0x40b2d2 JE 40b400 |
(78) 0x40b2d8 CMP $-0x1,%R8D |
(78) 0x40b2dc JE 40b430 |
(78) 0x40b2e2 CMP %EDI,%EAX |
(78) 0x40b2e4 JE 40b458 |
(78) 0x40b2ea CMP $-0x1,%EAX |
(78) 0x40b2ed JE 40b478 |
(78) 0x40b2f3 IMUL %EDI,%R8D |
(78) 0x40b2f7 ADD %EAX,%R8D |
(78) 0x40b2fa MOV %ECX,%EAX |
(78) 0x40b2fc IMUL %EDI,%EAX |
(78) 0x40b2ff IMUL %EDX,%EAX |
(78) 0x40b302 ADD %EAX,%R8D |
(78) 0x40b305 NOPL (%RAX) |
(78) 0x40b308 TEST %R8D,%R8D |
(78) 0x40b30b JS 40b486 |
(78) 0x40b311 CMP 0x14(%RBX),%R8D |
(78) 0x40b315 JGE 40b49f |
(78) 0x40b31b CMP %R13D,%R8D |
(78) 0x40b31e JE 40b368 |
(79) 0x40b320 MOV -0x44(%RBP),%ECX |
(79) 0x40b323 MOV -0x40(%RBP),%RSI |
(79) 0x40b327 MOV %R14D,%EDX |
(79) 0x40b32a MOV %RBX,%RDI |
(79) 0x40b32d CALL 40afd0 <moveAtom> |
(79) 0x40b332 MOV 0x78(%RBX),%R12 |
(79) 0x40b336 MOV -0x38(%RBP),%R10 |
(79) 0x40b33a CMP %R14D,(%R12,%R10,1) |
(79) 0x40b33e JG 40b202 |
(77) 0x40b344 MOV 0xc(%RBX),%R14D |
(77) 0x40b348 INC %R13 |
(77) 0x40b34b CMP %R13D,%R14D |
(77) 0x40b34e JG 40b1d8 |
0x40b354 ADD $0x28,%RSP |
0x40b358 POP %RBX |
0x40b359 POP %R12 |
0x40b35b POP %R13 |
0x40b35d POP %R14 |
0x40b35f POP %R15 |
0x40b361 POP %RBP |
0x40b362 RET |
0x40b363 NOPL (%RAX,%RAX,1) |
(78) 0x40b368 MOV -0x38(%RBP),%R15 |
(78) 0x40b36c INC %R14D |
(78) 0x40b36f CMP %R14D,(%R12,%R15,1) |
(78) 0x40b373 JLE 40b344 |
(78) 0x40b375 MOV -0x48(%RBP),%EAX |
(78) 0x40b378 LEA (%RAX,%R14,1),%R15D |
(78) 0x40b37c JMP 40b230 |
0x40b381 NOPL (%RAX) |
(78) 0x40b388 MOV %ESI,%R11D |
(78) 0x40b38b MOV 0xc(%RBX),%EDX |
(78) 0x40b38e LEA 0x2(%RDI),%R9D |
(78) 0x40b392 INC %R8D |
(78) 0x40b395 IMUL %ECX,%R11D |
(78) 0x40b399 IMUL %R9D,%R8D |
(78) 0x40b39d LEA (%RDX,%R11,2),%EDX |
(78) 0x40b3a1 MOV %R9D,%R11D |
(78) 0x40b3a4 IMUL %ESI,%R11D |
(78) 0x40b3a8 LEA (%RDX,%R11,2),%EDX |
(78) 0x40b3ac LEA 0x2(%RCX),%R11D |
(78) 0x40b3b0 IMUL %R9D,%R11D |
(78) 0x40b3b4 ADD %R11D,%EDX |
(78) 0x40b3b7 ADD %EDX,%R8D |
(78) 0x40b3ba LEA 0x1(%R8,%RAX,1),%R8D |
(78) 0x40b3bf JMP 40b308 |
0x40b3c4 NOPL (%RAX) |
(78) 0x40b3c8 MOV %ESI,%R11D |
(78) 0x40b3cb MOV 0xc(%RBX),%EDX |
(78) 0x40b3ce LEA 0x2(%RDI),%R9D |
(78) 0x40b3d2 INC %R8D |
(78) 0x40b3d5 IMUL %ECX,%R11D |
(78) 0x40b3d9 IMUL %R9D,%R8D |
(78) 0x40b3dd LEA (%RDX,%R11,2),%EDX |
(78) 0x40b3e1 MOV %R9D,%R11D |
(78) 0x40b3e4 IMUL %ESI,%R11D |
(78) 0x40b3e8 LEA (%RDX,%R11,2),%EDX |
(78) 0x40b3ec ADD %EDX,%R8D |
(78) 0x40b3ef LEA 0x1(%R8,%RAX,1),%R8D |
(78) 0x40b3f4 JMP 40b308 |
0x40b3f9 NOPL (%RAX) |
(78) 0x40b400 MOV %ESI,%R11D |
(78) 0x40b403 MOV 0xc(%RBX),%R8D |
(78) 0x40b407 LEA 0x2(%RDI),%R9D |
(78) 0x40b40b IMUL %ECX,%R11D |
(78) 0x40b40f LEA (%R8,%R11,2),%R8D |
(78) 0x40b413 MOV %R9D,%R11D |
(78) 0x40b416 IMUL %ESI,%R11D |
(78) 0x40b41a IMUL %EDX,%R9D |
(78) 0x40b41e ADD %R11D,%R8D |
(78) 0x40b421 LEA (%R8,%R9,1),%EDX |
(78) 0x40b425 LEA 0x1(%RDX,%RAX,1),%R8D |
(78) 0x40b42a JMP 40b308 |
0x40b42f NOP |
(78) 0x40b430 MOV %ESI,%R9D |
(78) 0x40b433 MOV 0xc(%RBX),%R8D |
(78) 0x40b437 IMUL %ECX,%R9D |
(78) 0x40b43b LEA (%R8,%R9,2),%R11D |
(78) 0x40b43f LEA 0x2(%RDI),%R9D |
(78) 0x40b443 IMUL %R9D,%EDX |
(78) 0x40b447 ADD %R11D,%EDX |
(78) 0x40b44a LEA 0x1(%RDX,%RAX,1),%R8D |
(78) 0x40b44f JMP 40b308 |
0x40b454 NOPL (%RAX) |
(78) 0x40b458 IMUL %ECX,%EDX |
(78) 0x40b45b MOV %ESI,%R11D |
(78) 0x40b45e IMUL %ECX,%R11D |
(78) 0x40b462 ADD 0xc(%RBX),%R11D |
(78) 0x40b466 ADD %R11D,%EDX |
(78) 0x40b469 ADD %EDX,%R8D |
(78) 0x40b46c JMP 40b308 |
0x40b471 NOPL (%RAX) |
(78) 0x40b478 IMUL %ECX,%EDX |
(78) 0x40b47b ADD 0xc(%RBX),%EDX |
(78) 0x40b47e ADD %EDX,%R8D |
(78) 0x40b481 JMP 40b308 |
0x40b486 MOV $0x411f10,%ECX |
0x40b48b MOV $0xf6,%EDX |
0x40b490 MOV $0x411de8,%ESI |
0x40b495 MOV $0x411ec7,%EDI |
0x40b49a CALL 4030c0 <__assert_fail@plt> |
0x40b49f MOV $0x411f10,%ECX |
0x40b4a4 MOV $0xf7,%EDX |
0x40b4a9 MOV $0x411de8,%ESI |
0x40b4ae MOV $0x411ed1,%EDI |
0x40b4b3 CALL 4030c0 <__assert_fail@plt> |
0x40b4b8 NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | linkCells.c:211-385 |
Module | exec |
nb instructions | 47 |
nb uops | 39 |
loop length | 179 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 6.50 cycles |
front end | 6.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.00 | 4.00 | 3.75 | 3.75 | 2.50 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 4.00 | 4.00 | 3.75 | 3.75 | 2.50 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 6.50 |
Dispatch | 4.00 |
Overall L1 | 6.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 8% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD 0xc(%RDI),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CMP 0x14(%RDI),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JGE 40b1c5 <updateLinkCells+0x45> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x78(%RDI),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV 0xc(%RBX),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %R14D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 40b354 <updateLinkCells+0x1d4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x78(%RBX),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV $0x411f10,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0xf6,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x411de8,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x411ec7,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 4030c0 <__assert_fail@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x411f10,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0xf7,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x411de8,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x411ed1,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 4030c0 <__assert_fail@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | linkCells.c:211-385 |
Module | exec |
nb instructions | 47 |
nb uops | 39 |
loop length | 179 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 1 |
micro-operation queue | 6.50 cycles |
front end | 6.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.00 | 4.00 | 3.75 | 3.75 | 2.50 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 4.00 | 4.00 | 3.75 | 3.75 | 2.50 | 2.67 | 2.67 | 2.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 6.50 |
Dispatch | 4.00 |
Overall L1 | 6.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 8% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 6% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RSI,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSXD 0xc(%RDI),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
CMP 0x14(%RDI),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JGE 40b1c5 <updateLinkCells+0x45> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x78(%RDI),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV 0xc(%RBX),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
TEST %R14D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 40b354 <updateLinkCells+0x1d4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x78(%RBX),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
ADD $0x28,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV $0x411f10,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0xf6,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x411de8,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x411ec7,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 4030c0 <__assert_fail@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x411f10,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0xf7,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x411de8,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x411ed1,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 4030c0 <__assert_fail@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼updateLinkCells– | 0.71 | 0.07 |
▼Loop 77 - linkCells.c:211-373 - exec– | 0 | 0.01 |
○Loop 78 - linkCells.c:211-373 - exec | 0.71 | 3.46 |
○Loop 79 - linkCells.c:294-299 - exec | 0 | 0.01 |
○Loop 80 - linkCells.c:384-385 - exec | 0 | 0 |