Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:234-257 | Coverage: 1.45% |
---|
Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:234-257 | Coverage: 1.45% |
---|
/home/hbollore/qaas-runs/171-284-6744/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 234 - 257 |
-------------------------------------------------------------------------------- |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
256: } |
257: } |
0x4559c0 SUBS W17, W6, W5 |
0x4559c4 B.LE 455c18 |
0x4559c8 LDP D0, D1, [X1] |
0x4559cc LDR D2, [X1, #16] |
0x4559d0 LDR X8, [X2, #24] |
0x4559d4 CNTD X14, ALL |
0x4559d8 SUB W15, W6, #1 |
0x4559dc CMP W17, W14 |
0x4559e0 LDR X10, [X2, #8] |
0x4559e4 LDR X11, [X4, #24] |
0x4559e8 LDR X13, [X4, #8] |
0x4559ec CCMP W15, W5, #8, #2 |
0x4559f0 ADD X9, X8, X10,LSL #3 |
0x4559f4 ADD X10, X8, X10,LSL #4 |
0x4559f8 ADD X12, X11, X13,LSL #3 |
0x4559fc ADD X13, X11, X13,LSL #4 |
0x455a00 B.GE 455a0c |
0x455a04 ORR W15, WZR, WZR |
0x455a08 B 455b38 |
0x455a0c UDIV W15, W17, W14 |
0x455a10 ORR W16, WZR, WZR |
0x455a14 DUP Z3.D, Z0.D[0] |
0x455a18 DUP Z4.D, Z1.D[0] |
0x455a1c DUP Z5.D, Z2.D[0] |
0x455a20 PTRUE P0.D, ALL |
0x455a24 MADD W15, W15, W14, WZR |
0x455a28 SUB W17, W17, W15 |
(1551) 0x455a2c ADD W18, W5, W16 |
(1551) 0x455a30 LD1RD {Z17.D}, P0/Z, [X0, #9] |
(1551) 0x455a34 LD1RD {Z18.D}, P0/Z, [X0, #10] |
(1551) 0x455a38 LD1RD {Z19.D}, P0/Z, [X0, #12] |
(1551) 0x455a3c ADD W16, W16, W14 |
(1551) 0x455a40 SBFM X18, X18, #0, #31 |
(1551) 0x455a44 CMP W15, W16 |
(1551) 0x455a48 LD1D {Z6.D}, P0/Z, [X8, X18,LSL #3] |
(1551) 0x455a4c LD1D {Z7.D}, P0/Z, [X9, X18,LSL #3] |
(1551) 0x455a50 LD1D {Z16.D}, P0/Z, [X10, X18,LSL #3] |
(1551) 0x455a54 FSUB Z6.D, Z6.D, Z3.D |
(1551) 0x455a58 FSUB Z7.D, Z7.D, Z4.D |
(1551) 0x455a5c FSUB Z16.D, Z16.D, Z5.D |
(1551) 0x455a60 FMUL Z17.D, Z17.D, Z6.D |
(1551) 0x455a64 FMUL Z19.D, Z19.D, Z6.D |
(1551) 0x455a68 FMLA Z17.D, P0/M, Z18.D, Z7.D |
(1551) 0x455a6c LD1RD {Z18.D}, P0/Z, [X0, #11] |
(1551) 0x455a70 FMLA Z17.D, P0/M, Z18.D, Z16.D |
(1551) 0x455a74 LD1RD {Z18.D}, P0/Z, [X0, #13] |
(1551) 0x455a78 FMAD Z18.D, P0/M, Z7.D, Z19.D |
(1551) 0x455a7c LD1RD {Z19.D}, P0/Z, [X0, #14] |
(1551) 0x455a80 FMLA Z18.D, P0/M, Z19.D, Z16.D |
(1551) 0x455a84 LD1RD {Z19.D}, P0/Z, [X0, #15] |
(1551) 0x455a88 FMUL Z6.D, Z19.D, Z6.D |
(1551) 0x455a8c LD1RD {Z19.D}, P0/Z, [X0, #16] |
(1551) 0x455a90 FMLA Z6.D, P0/M, Z19.D, Z7.D |
(1551) 0x455a94 LD1RD {Z7.D}, P0/Z, [X0, #17] |
(1551) 0x455a98 FMLA Z6.D, P0/M, Z7.D, Z16.D |
(1551) 0x455a9c MOVPRFX Z7, Z17 |
(1551) 0x455aa0 FRINTA Z7.D, P0/M, Z17.D |
(1551) 0x455aa4 FSUB Z7.D, Z17.D, Z7.D |
(1551) 0x455aa8 MOVPRFX Z17, Z6 |
(1551) 0x455aac FRINTA Z17.D, P0/M, Z6.D |
(1551) 0x455ab0 MOVPRFX Z16, Z18 |
(1551) 0x455ab4 FRINTA Z16.D, P0/M, Z18.D |
(1551) 0x455ab8 FSUB Z16.D, Z18.D, Z16.D |
(1551) 0x455abc LD1RD {Z18.D}, P0/Z, [X0, #1] |
(1551) 0x455ac0 FSUB Z6.D, Z6.D, Z17.D |
(1551) 0x455ac4 LD1RD {Z17.D}, P0/Z, [X0] |
(1551) 0x455ac8 FMUL Z17.D, Z17.D, Z7.D |
(1551) 0x455acc FMLA Z17.D, P0/M, Z18.D, Z16.D |
(1551) 0x455ad0 LD1RD {Z18.D}, P0/Z, [X0, #2] |
(1551) 0x455ad4 FMLA Z17.D, P0/M, Z6.D, Z18.D |
(1551) 0x455ad8 ST1D {Z17.D}, P0, [X11, X18,LSL #3] |
(1551) 0x455adc LD1RD {Z17.D}, P0/Z, [X0, #3] |
(1551) 0x455ae0 LD1RD {Z18.D}, P0/Z, [X0, #4] |
(1551) 0x455ae4 FMUL Z17.D, Z17.D, Z7.D |
(1551) 0x455ae8 FMLA Z17.D, P0/M, Z18.D, Z16.D |
(1551) 0x455aec LD1RD {Z18.D}, P0/Z, [X0, #5] |
(1551) 0x455af0 FMLA Z17.D, P0/M, Z18.D, Z6.D |
(1551) 0x455af4 ST1D {Z17.D}, P0, [X12, X18,LSL #3] |
(1551) 0x455af8 LD1RD {Z17.D}, P0/Z, [X0, #6] |
(1551) 0x455afc FMUL Z7.D, Z17.D, Z7.D |
(1551) 0x455b00 LD1RD {Z17.D}, P0/Z, [X0, #7] |
(1551) 0x455b04 FMLA Z7.D, P0/M, Z17.D, Z16.D |
(1551) 0x455b08 LD1RD {Z16.D}, P0/Z, [X0, #8] |
(1551) 0x455b0c FMAD Z6.D, P0/M, Z16.D, Z7.D |
(1551) 0x455b10 ST1D {Z6.D}, P0, [X13, X18,LSL #3] |
(1551) 0x455b14 LD1D {Z7.D}, P0/Z, [X11, X18,LSL #3] |
(1551) 0x455b18 LD1D {Z16.D}, P0/Z, [X12, X18,LSL #3] |
(1551) 0x455b1c FMUL Z7.D, Z7.D, Z7.D |
(1551) 0x455b20 FMAD Z6.D, P0/M, Z6.D, Z7.D |
(1551) 0x455b24 FMLA Z6.D, P0/M, Z16.D, Z16.D |
(1551) 0x455b28 FSQRT Z6.D, P0/M, Z6.D |
(1551) 0x455b2c ST1D {Z6.D}, P0, [X3, X18,LSL #3] |
(1551) 0x455b30 B.NE 455a2c |
0x455b34 CBZ W17, 455c18 |
0x455b38 ADD W14, W15, W5 |
0x455b3c HINT #0 |
(1552) 0x455b40 LDR D3, [X8, X14,SXTW #3] |
(1552) 0x455b44 LDP D6, D7, [X0, #72] |
(1552) 0x455b48 LDR D4, [X9, X14,SXTW #3] |
(1552) 0x455b4c FSUB D3, D3, S0 |
(1552) 0x455b50 FSUB D4, D4, S1 |
(1552) 0x455b54 LDR D5, [X10, X14,SXTW #3] |
(1552) 0x455b58 FSUB D5, D5, S2 |
(1552) 0x455b5c FMUL D6, D6, D3 |
(1552) 0x455b60 FMADD D6, D7, D4, D6 |
(1552) 0x455b64 LDP D7, D16, [X0, #88] |
(1552) 0x455b68 FMADD D6, D7, D5, D6 |
(1552) 0x455b6c FMUL D7, D16, D3 |
(1552) 0x455b70 LDP D16, D17, [X0, #104] |
(1552) 0x455b74 FMADD D7, D16, D4, D7 |
(1552) 0x455b78 FMADD D7, D17, D5, D7 |
(1552) 0x455b7c LDP D16, D17, [X0, #120] |
(1552) 0x455b80 FMUL D3, D16, D3 |
(1552) 0x455b84 FMADD D3, D17, D4, D3 |
(1552) 0x455b88 LDR D4, [X0, #136] |
(1552) 0x455b8c FMADD D3, D4, D5, D3 |
(1552) 0x455b90 FRINTA D4, D6 |
(1552) 0x455b94 FRINTA D5, D7 |
(1552) 0x455b98 FSUB D4, D6, S4 |
(1552) 0x455b9c FSUB D5, D7, S5 |
(1552) 0x455ba0 FRINTA D6, D3 |
(1552) 0x455ba4 FSUB D3, D3, S6 |
(1552) 0x455ba8 LDP D6, D7, [X0] |
(1552) 0x455bac FMUL D6, D6, D4 |
(1552) 0x455bb0 FMADD D6, D7, D5, D6 |
(1552) 0x455bb4 LDR D7, [X0, #16] |
(1552) 0x455bb8 FMADD D6, D3, D7, D6 |
(1552) 0x455bbc STR D6, [X11, X14,SXTW #3] |
(1552) 0x455bc0 LDP D6, D7, [X0, #24] |
(1552) 0x455bc4 FMUL D6, D6, D4 |
(1552) 0x455bc8 FMADD D6, D7, D5, D6 |
(1552) 0x455bcc LDR D7, [X0, #40] |
(1552) 0x455bd0 FMADD D6, D7, D3, D6 |
(1552) 0x455bd4 STR D6, [X12, X14,SXTW #3] |
(1552) 0x455bd8 LDP D6, D7, [X0, #48] |
(1552) 0x455bdc FMUL D4, D6, D4 |
(1552) 0x455be0 FMADD D4, D7, D5, D4 |
(1552) 0x455be4 LDR D5, [X0, #64] |
(1552) 0x455be8 FMADD D3, D5, D3, D4 |
(1552) 0x455bec STR D3, [X13, X14,SXTW #3] |
(1552) 0x455bf0 FMUL D3, D3, D3 |
(1552) 0x455bf4 LDR D4, [X11, X14,SXTW #3] |
(1552) 0x455bf8 LDR D5, [X12, X14,SXTW #3] |
(1552) 0x455bfc FMADD D3, D4, D4, D3 |
(1552) 0x455c00 FMADD D3, D5, D5, D3 |
(1552) 0x455c04 FSQRT D3, D3 |
(1552) 0x455c08 STR D3, [X3, X14,SXTW #3] |
(1552) 0x455c0c ADD W14, W14, #1 |
(1552) 0x455c10 CMP W6, W14 |
(1552) 0x455c14 B.NE 455b40 |
0x455c18 RET |
0x455c1c HINT #0 |
Coverage (%) | Name | Source Location | Module |
---|
Path / |
Source file and lines | ParticleBConds3DSoa.h:234-257 |
Module | exec |
nb instructions | 32 |
loop length | 128 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.00 | 4.00 | 4.00 | 4.00 | 1.50 | 1.50 | 0.00 | 0.00 | 2.00 | 2.00 | 2.00 | 0.00 | 0.00 |
cycles | 2.50 | 2.50 | 4.00 | 4.00 | 4.00 | 4.00 | 1.50 | 1.50 | 0.00 | 0.00 | 2.00 | 2.00 | 2.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 5.00-12.50 |
Front-end | 3.75 |
Overall L1 | 5.00-12.50 |
all | 26% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 42% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUBS W17, W6, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 455c18 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x258> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP D0, D1, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDR D2, [X1, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDR X8, [X2, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CNTD X14, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB W15, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W17, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDR X10, [X2, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X11, [X4, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X13, [X4, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CCMP W15, W5, #8, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X9, X8, X10,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X10, X8, X10,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X11, X13,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X13, X11, X13,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.GE 455a0c <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W15, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 455b38 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x178> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W15, W17, W14 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 |
ORR W16, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z3.D, Z0.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z4.D, Z1.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z5.D, Z2.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
PTRUE P0.D, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MADD W15, W15, W14, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB W17, W17, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CBZ W17, 455c18 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x258> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W14, W15, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 |
Source file and lines | ParticleBConds3DSoa.h:234-257 |
Module | exec |
nb instructions | 32 |
loop length | 128 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.00 | 4.00 | 4.00 | 4.00 | 1.50 | 1.50 | 0.00 | 0.00 | 2.00 | 2.00 | 2.00 | 0.00 | 0.00 |
cycles | 2.50 | 2.50 | 4.00 | 4.00 | 4.00 | 4.00 | 1.50 | 1.50 | 0.00 | 0.00 | 2.00 | 2.00 | 2.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 5.00-12.50 |
Front-end | 3.75 |
Overall L1 | 5.00-12.50 |
all | 26% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 42% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUBS W17, W6, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 455c18 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x258> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP D0, D1, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDR D2, [X1, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDR X8, [X2, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CNTD X14, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB W15, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W17, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDR X10, [X2, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X11, [X4, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X13, [X4, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CCMP W15, W5, #8, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X9, X8, X10,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X10, X8, X10,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X11, X13,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X13, X11, X13,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.GE 455a0c <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W15, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 455b38 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x178> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W15, W17, W14 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 |
ORR W16, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z3.D, Z0.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z4.D, Z1.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z5.D, Z2.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
PTRUE P0.D, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MADD W15, W15, W14, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB W17, W17, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CBZ W17, 455c18 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x258> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W14, W15, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼void qmcplusplus::DTD_BConds | 1.45 | 2.28 |
○Loop 1551 - ParticleBConds3DSoa.h:234-255 - exec | 1.44 | 1.97 |
○Loop 1552 - ParticleBConds3DSoa.h:234-255 - exec | 0 | 0.01 |