Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:234-257 | Coverage: 1.39% |
---|
Function: void qmcplusplus::DTD_BConds<double, 3u, 40>::computeDistances<qmcplusplus::TinyVector<dou ... | Module: exec | Source: ParticleBConds3DSoa.h:234-257 | Coverage: 1.39% |
---|
/home/hbollore/qaas-runs/171-284-6744/intel/miniqmc/build/miniqmc/src/Particle/Lattice/ParticleBConds3DSoa.h: 234 - 257 |
-------------------------------------------------------------------------------- |
234: #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT) |
235: for (int iat = first; iat < last; ++iat) |
236: { |
237: T displ_0 = px[iat] - x0; |
238: T displ_1 = py[iat] - y0; |
239: T displ_2 = pz[iat] - z0; |
240: |
241: T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20; |
242: T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21; |
243: T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22; |
244: |
245: //put them in the box |
246: ar_0 -= round(ar_0); |
247: ar_1 -= round(ar_1); |
248: ar_2 -= round(ar_2); |
249: |
250: //unit2cart |
251: dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20; |
252: dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21; |
253: dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22; |
254: |
255: temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]); |
256: } |
257: } |
0x455a80 SUBS W17, W6, W5 |
0x455a84 B.LE 455cd8 |
0x455a88 LDP D0, D1, [X1] |
0x455a8c LDR D2, [X1, #16] |
0x455a90 LDR X8, [X2, #24] |
0x455a94 CNTD X14, ALL |
0x455a98 SUB W15, W6, #1 |
0x455a9c CMP W17, W14 |
0x455aa0 LDR X10, [X2, #8] |
0x455aa4 LDR X11, [X4, #24] |
0x455aa8 LDR X13, [X4, #8] |
0x455aac CCMP W15, W5, #8, #2 |
0x455ab0 ADD X9, X8, X10,LSL #3 |
0x455ab4 ADD X10, X8, X10,LSL #4 |
0x455ab8 ADD X12, X11, X13,LSL #3 |
0x455abc ADD X13, X11, X13,LSL #4 |
0x455ac0 B.GE 455acc |
0x455ac4 ORR W15, WZR, WZR |
0x455ac8 B 455bf8 |
0x455acc UDIV W15, W17, W14 |
0x455ad0 ORR W16, WZR, WZR |
0x455ad4 DUP Z3.D, Z0.D[0] |
0x455ad8 DUP Z4.D, Z1.D[0] |
0x455adc DUP Z5.D, Z2.D[0] |
0x455ae0 PTRUE P0.D, ALL |
0x455ae4 MADD W15, W15, W14, WZR |
0x455ae8 SUB W17, W17, W15 |
(1551) 0x455aec ADD W18, W5, W16 |
(1551) 0x455af0 LD1RD {Z17.D}, P0/Z, [X0, #9] |
(1551) 0x455af4 LD1RD {Z18.D}, P0/Z, [X0, #10] |
(1551) 0x455af8 LD1RD {Z19.D}, P0/Z, [X0, #12] |
(1551) 0x455afc ADD W16, W16, W14 |
(1551) 0x455b00 SBFM X18, X18, #0, #31 |
(1551) 0x455b04 CMP W15, W16 |
(1551) 0x455b08 LD1D {Z6.D}, P0/Z, [X8, X18,LSL #3] |
(1551) 0x455b0c LD1D {Z7.D}, P0/Z, [X9, X18,LSL #3] |
(1551) 0x455b10 LD1D {Z16.D}, P0/Z, [X10, X18,LSL #3] |
(1551) 0x455b14 FSUB Z6.D, Z6.D, Z3.D |
(1551) 0x455b18 FSUB Z7.D, Z7.D, Z4.D |
(1551) 0x455b1c FSUB Z16.D, Z16.D, Z5.D |
(1551) 0x455b20 FMUL Z17.D, Z17.D, Z6.D |
(1551) 0x455b24 FMUL Z19.D, Z19.D, Z6.D |
(1551) 0x455b28 FMLA Z17.D, P0/M, Z18.D, Z7.D |
(1551) 0x455b2c LD1RD {Z18.D}, P0/Z, [X0, #11] |
(1551) 0x455b30 FMLA Z17.D, P0/M, Z18.D, Z16.D |
(1551) 0x455b34 LD1RD {Z18.D}, P0/Z, [X0, #13] |
(1551) 0x455b38 FMAD Z18.D, P0/M, Z7.D, Z19.D |
(1551) 0x455b3c LD1RD {Z19.D}, P0/Z, [X0, #14] |
(1551) 0x455b40 FMLA Z18.D, P0/M, Z19.D, Z16.D |
(1551) 0x455b44 LD1RD {Z19.D}, P0/Z, [X0, #15] |
(1551) 0x455b48 FMUL Z6.D, Z19.D, Z6.D |
(1551) 0x455b4c LD1RD {Z19.D}, P0/Z, [X0, #16] |
(1551) 0x455b50 FMLA Z6.D, P0/M, Z19.D, Z7.D |
(1551) 0x455b54 LD1RD {Z7.D}, P0/Z, [X0, #17] |
(1551) 0x455b58 FMLA Z6.D, P0/M, Z7.D, Z16.D |
(1551) 0x455b5c MOVPRFX Z7, Z17 |
(1551) 0x455b60 FRINTA Z7.D, P0/M, Z17.D |
(1551) 0x455b64 FSUB Z7.D, Z17.D, Z7.D |
(1551) 0x455b68 MOVPRFX Z17, Z6 |
(1551) 0x455b6c FRINTA Z17.D, P0/M, Z6.D |
(1551) 0x455b70 MOVPRFX Z16, Z18 |
(1551) 0x455b74 FRINTA Z16.D, P0/M, Z18.D |
(1551) 0x455b78 FSUB Z16.D, Z18.D, Z16.D |
(1551) 0x455b7c LD1RD {Z18.D}, P0/Z, [X0, #1] |
(1551) 0x455b80 FSUB Z6.D, Z6.D, Z17.D |
(1551) 0x455b84 LD1RD {Z17.D}, P0/Z, [X0] |
(1551) 0x455b88 FMUL Z17.D, Z17.D, Z7.D |
(1551) 0x455b8c FMLA Z17.D, P0/M, Z18.D, Z16.D |
(1551) 0x455b90 LD1RD {Z18.D}, P0/Z, [X0, #2] |
(1551) 0x455b94 FMLA Z17.D, P0/M, Z6.D, Z18.D |
(1551) 0x455b98 ST1D {Z17.D}, P0, [X11, X18,LSL #3] |
(1551) 0x455b9c LD1RD {Z17.D}, P0/Z, [X0, #3] |
(1551) 0x455ba0 LD1RD {Z18.D}, P0/Z, [X0, #4] |
(1551) 0x455ba4 FMUL Z17.D, Z17.D, Z7.D |
(1551) 0x455ba8 FMLA Z17.D, P0/M, Z18.D, Z16.D |
(1551) 0x455bac LD1RD {Z18.D}, P0/Z, [X0, #5] |
(1551) 0x455bb0 FMLA Z17.D, P0/M, Z18.D, Z6.D |
(1551) 0x455bb4 ST1D {Z17.D}, P0, [X12, X18,LSL #3] |
(1551) 0x455bb8 LD1RD {Z17.D}, P0/Z, [X0, #6] |
(1551) 0x455bbc FMUL Z7.D, Z17.D, Z7.D |
(1551) 0x455bc0 LD1RD {Z17.D}, P0/Z, [X0, #7] |
(1551) 0x455bc4 FMLA Z7.D, P0/M, Z17.D, Z16.D |
(1551) 0x455bc8 LD1RD {Z16.D}, P0/Z, [X0, #8] |
(1551) 0x455bcc FMAD Z6.D, P0/M, Z16.D, Z7.D |
(1551) 0x455bd0 ST1D {Z6.D}, P0, [X13, X18,LSL #3] |
(1551) 0x455bd4 LD1D {Z7.D}, P0/Z, [X11, X18,LSL #3] |
(1551) 0x455bd8 LD1D {Z16.D}, P0/Z, [X12, X18,LSL #3] |
(1551) 0x455bdc FMUL Z7.D, Z7.D, Z7.D |
(1551) 0x455be0 FMAD Z6.D, P0/M, Z6.D, Z7.D |
(1551) 0x455be4 FMLA Z6.D, P0/M, Z16.D, Z16.D |
(1551) 0x455be8 FSQRT Z6.D, P0/M, Z6.D |
(1551) 0x455bec ST1D {Z6.D}, P0, [X3, X18,LSL #3] |
(1551) 0x455bf0 B.NE 455aec |
0x455bf4 CBZ W17, 455cd8 |
0x455bf8 ADD W14, W15, W5 |
0x455bfc HINT #0 |
(1552) 0x455c00 LDR D3, [X8, X14,SXTW #3] |
(1552) 0x455c04 LDP D6, D7, [X0, #72] |
(1552) 0x455c08 LDR D4, [X9, X14,SXTW #3] |
(1552) 0x455c0c FSUB D3, D3, S0 |
(1552) 0x455c10 FSUB D4, D4, S1 |
(1552) 0x455c14 LDR D5, [X10, X14,SXTW #3] |
(1552) 0x455c18 FSUB D5, D5, S2 |
(1552) 0x455c1c FMUL D6, D6, D3 |
(1552) 0x455c20 FMADD D6, D7, D4, D6 |
(1552) 0x455c24 LDP D7, D16, [X0, #88] |
(1552) 0x455c28 FMADD D6, D7, D5, D6 |
(1552) 0x455c2c FMUL D7, D16, D3 |
(1552) 0x455c30 LDP D16, D17, [X0, #104] |
(1552) 0x455c34 FMADD D7, D16, D4, D7 |
(1552) 0x455c38 FMADD D7, D17, D5, D7 |
(1552) 0x455c3c LDP D16, D17, [X0, #120] |
(1552) 0x455c40 FMUL D3, D16, D3 |
(1552) 0x455c44 FMADD D3, D17, D4, D3 |
(1552) 0x455c48 LDR D4, [X0, #136] |
(1552) 0x455c4c FMADD D3, D4, D5, D3 |
(1552) 0x455c50 FRINTA D4, D6 |
(1552) 0x455c54 FRINTA D5, D7 |
(1552) 0x455c58 FSUB D4, D6, S4 |
(1552) 0x455c5c FSUB D5, D7, S5 |
(1552) 0x455c60 FRINTA D6, D3 |
(1552) 0x455c64 FSUB D3, D3, S6 |
(1552) 0x455c68 LDP D6, D7, [X0] |
(1552) 0x455c6c FMUL D6, D6, D4 |
(1552) 0x455c70 FMADD D6, D7, D5, D6 |
(1552) 0x455c74 LDR D7, [X0, #16] |
(1552) 0x455c78 FMADD D6, D3, D7, D6 |
(1552) 0x455c7c STR D6, [X11, X14,SXTW #3] |
(1552) 0x455c80 LDP D6, D7, [X0, #24] |
(1552) 0x455c84 FMUL D6, D6, D4 |
(1552) 0x455c88 FMADD D6, D7, D5, D6 |
(1552) 0x455c8c LDR D7, [X0, #40] |
(1552) 0x455c90 FMADD D6, D7, D3, D6 |
(1552) 0x455c94 STR D6, [X12, X14,SXTW #3] |
(1552) 0x455c98 LDP D6, D7, [X0, #48] |
(1552) 0x455c9c FMUL D4, D6, D4 |
(1552) 0x455ca0 FMADD D4, D7, D5, D4 |
(1552) 0x455ca4 LDR D5, [X0, #64] |
(1552) 0x455ca8 FMADD D3, D5, D3, D4 |
(1552) 0x455cac STR D3, [X13, X14,SXTW #3] |
(1552) 0x455cb0 FMUL D3, D3, D3 |
(1552) 0x455cb4 LDR D4, [X11, X14,SXTW #3] |
(1552) 0x455cb8 LDR D5, [X12, X14,SXTW #3] |
(1552) 0x455cbc FMADD D3, D4, D4, D3 |
(1552) 0x455cc0 FMADD D3, D5, D5, D3 |
(1552) 0x455cc4 FSQRT D3, D3 |
(1552) 0x455cc8 STR D3, [X3, X14,SXTW #3] |
(1552) 0x455ccc ADD W14, W14, #1 |
(1552) 0x455cd0 CMP W6, W14 |
(1552) 0x455cd4 B.NE 455c00 |
0x455cd8 RET |
0x455cdc HINT #0 |
Coverage (%) | Name | Source Location | Module |
---|
Path / |
Source file and lines | ParticleBConds3DSoa.h:234-257 |
Module | exec |
nb instructions | 32 |
loop length | 128 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.00 | 4.00 | 4.00 | 4.00 | 1.50 | 1.50 | 0.00 | 0.00 | 2.00 | 2.00 | 2.00 | 0.00 | 0.00 |
cycles | 2.50 | 2.50 | 4.00 | 4.00 | 4.00 | 4.00 | 1.50 | 1.50 | 0.00 | 0.00 | 2.00 | 2.00 | 2.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 5.00-12.50 |
Front-end | 3.75 |
Overall L1 | 5.00-12.50 |
all | 26% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 42% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUBS W17, W6, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 455cd8 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x258> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP D0, D1, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDR D2, [X1, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDR X8, [X2, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CNTD X14, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB W15, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W17, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDR X10, [X2, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X11, [X4, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X13, [X4, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CCMP W15, W5, #8, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X9, X8, X10,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X10, X8, X10,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X11, X13,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X13, X11, X13,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.GE 455acc <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W15, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 455bf8 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x178> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W15, W17, W14 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 |
ORR W16, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z3.D, Z0.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z4.D, Z1.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z5.D, Z2.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
PTRUE P0.D, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MADD W15, W15, W14, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB W17, W17, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CBZ W17, 455cd8 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x258> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W14, W15, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 |
Source file and lines | ParticleBConds3DSoa.h:234-257 |
Module | exec |
nb instructions | 32 |
loop length | 128 |
nb stack references | 0 |
front end | 3.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 4.00 | 4.00 | 4.00 | 4.00 | 1.50 | 1.50 | 0.00 | 0.00 | 2.00 | 2.00 | 2.00 | 0.00 | 0.00 |
cycles | 2.50 | 2.50 | 4.00 | 4.00 | 4.00 | 4.00 | 1.50 | 1.50 | 0.00 | 0.00 | 2.00 | 2.00 | 2.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 5.00-12.50 |
Front-end | 3.75 |
Overall L1 | 5.00-12.50 |
all | 26% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 42% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SUBS W17, W6, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LE 455cd8 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x258> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LDP D0, D1, [X1] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDR D2, [X1, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
LDR X8, [X2, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CNTD X14, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB W15, W6, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP W17, W14 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDR X10, [X2, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X11, [X4, #24] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
LDR X13, [X4, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
CCMP W15, W5, #8, #2 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X9, X8, X10,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X10, X8, X10,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X11, X13,LSL #3 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X13, X11, X13,LSL #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.GE 455acc <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x4c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR W15, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 455bf8 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x178> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV W15, W17, W14 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-12 | 5-12.50 |
ORR W16, WZR, WZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DUP Z3.D, Z0.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z4.D, Z1.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
DUP Z5.D, Z2.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
PTRUE P0.D, ALL | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MADD W15, W15, W14, WZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB W17, W17, W15 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CBZ W17, 455cd8 <_ZNK11qmcplusplus10DTD_BCondsIdLj3ELi40EE16computeDistancesINS_10TinyVectorIdLj3EEENS_18VectorSoAContainerIdLj3ENS_10MallocatorIdLm32EEEEES8_EEvRKT_RKT0_PdRT1_iii+0x258> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD W14, W15, W5 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
RET | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼void qmcplusplus::DTD_BConds | 1.39 | 2.22 |
○Loop 1551 - ParticleBConds3DSoa.h:234-255 - exec | 1.39 | 1.93 |
○Loop 1552 - ParticleBConds3DSoa.h:234-255 - exec | 0 | 0 |