Loop Id: 964 | Module: exec | Source: MultiBsplineRef.hpp:63-71 | Coverage: 0.01% |
---|
Loop Id: 964 | Module: exec | Source: MultiBsplineRef.hpp:63-71 | Coverage: 0.01% |
---|
0x4324ac ADD X8, X8, #1 |
0x4324b0 ADD X11, X11, X10 |
0x4324b4 ADD X15, X15, X10 |
0x4324b8 CMP X8, #4 |
0x4324bc ADD X14, X14, X10 |
0x4324c0 ADD X12, X12, X10 |
0x4324c4 ADD X13, X13, X10 |
0x4324c8 ADD X0, X0, X10 |
0x4324cc ADD X16, X16, X10 |
0x4324d0 ADD X17, X17, X10 |
0x4324d4 ADD X18, X18, X10 |
0x4324d8 ADD X4, X4, X10 |
0x4324dc ADD X1, X1, X10 |
0x4324e0 ADD X2, X2, X10 |
0x4324e4 ADD X3, X3, X10 |
0x4324e8 ADD X21, X21, X10 |
0x4324ec ADD X5, X5, X10 |
0x4324f0 ADD X6, X6, X10 |
0x4324f4 ADD X7, X7, X10 |
0x4324f8 B.EQ 432944 |
0x4324fc ADD X22, SP, #0 |
0x432500 CMP X9, X19 |
0x432504 LDR D20, [X22, X8,LSL #3] |
0x432508 FMUL D21, D5, D20 |
0x43250c B.LS 432520 |
0x432510 ORR X23, XZR, XZR |
0x432514 B 4325b0 |
0x432520 UDIV X22, X19, X9 |
0x432524 ORR X24, XZR, XZR |
0x432528 ADDVL X26, X15, #1 |
0x43252c DUP Z22.D, Z21.D[0] |
0x432530 ADDVL X27, X14, #1 |
0x432534 ADDVL X28, X12, #1 |
0x432538 ADDVL X30, X13, #1 |
0x43253c MADD X23, X22, X9, XZR |
0x432540 ADDVL X22, X20, #1 |
0x432544 SUB X25, X19, X23 |
(960) 0x432548 LD1D {Z23.D}, P0/Z, [X11, X24,LSL #3] |
(960) 0x43254c LD1D {Z24.D}, P0/Z, [X26, X24,LSL #3] |
(960) 0x432550 FMUL Z23.D, Z23.D, Z18.D |
(960) 0x432554 LD1D {Z25.D}, P0/Z, [X14, X24,LSL #3] |
(960) 0x432558 LD1D {Z26.D}, P0/Z, [X27, X24,LSL #3] |
(960) 0x43255c FMUL Z24.D, Z24.D, Z18.D |
(960) 0x432560 FMLA Z23.D, P0/M, Z25.D, Z17.D |
(960) 0x432564 FMLA Z24.D, P0/M, Z26.D, Z17.D |
(960) 0x432568 LD1D {Z25.D}, P0/Z, [X12, X24,LSL #3] |
(960) 0x43256c LD1D {Z26.D}, P0/Z, [X28, X24,LSL #3] |
(960) 0x432570 FMLA Z23.D, P0/M, Z25.D, Z19.D |
(960) 0x432574 FMLA Z24.D, P0/M, Z26.D, Z19.D |
(960) 0x432578 LD1D {Z25.D}, P0/Z, [X13, X24,LSL #3] |
(960) 0x43257c LD1D {Z26.D}, P0/Z, [X30, X24,LSL #3] |
(960) 0x432580 FMLA Z23.D, P0/M, Z2.D, Z25.D |
(960) 0x432584 FMLA Z24.D, P0/M, Z2.D, Z26.D |
(960) 0x432588 LD1D {Z25.D}, P0/Z, [X20, X24,LSL #3] |
(960) 0x43258c LD1D {Z26.D}, P0/Z, [X22, X24,LSL #3] |
(960) 0x432590 FMAD Z23.D, P0/M, Z22.D, Z25.D |
(960) 0x432594 FMAD Z24.D, P0/M, Z22.D, Z26.D |
(960) 0x432598 ST1D {Z23.D}, P0, [X20, X24,LSL #3] |
(960) 0x43259c ST1D {Z24.D}, P0, [X22, X24,LSL #3] |
(960) 0x4325a0 ADD X24, X24, X9 |
(960) 0x4325a4 CMP X23, X24 |
(960) 0x4325a8 B.NE 432548 |
0x4325ac CBZ X25, 432618 |
0x4325b0 UBFM X28, X23, #61, #60 |
0x4325b4 ORR X24, XZR, XZR |
0x4325b8 SUB X23, X19, X23 |
0x4325bc ADD X22, X13, X28 |
0x4325c0 ADD X25, X12, X28 |
0x4325c4 ADD X26, X14, X28 |
0x4325c8 ADD X27, X20, X28 |
0x4325cc ADD X28, X15, X28 |
0x4325d0 HINT #0 |
0x4325d4 HINT #0 |
0x4325d8 HINT #0 |
0x4325dc HINT #0 |
(966) 0x4325e0 LDR D22, [X28, X24,LSL #3] |
(966) 0x4325e4 LDR D23, [X26, X24,LSL #3] |
(966) 0x4325e8 FMUL D22, D22, D7 |
(966) 0x4325ec FMADD D22, D23, D4, D22 |
(966) 0x4325f0 LDR D23, [X25, X24,LSL #3] |
(966) 0x4325f4 FMADD D22, D23, D16, D22 |
(966) 0x4325f8 LDR D23, [X22, X24,LSL #3] |
(966) 0x4325fc FMADD D22, D1, D23, D22 |
(966) 0x432600 LDR D23, [X27, X24,LSL #3] |
(966) 0x432604 FMADD D22, D21, D22, D23 |
(966) 0x432608 STR D22, [X27, X24,LSL #3] |
(966) 0x43260c ADD X24, X24, #1 |
(966) 0x432610 CMP X23, X24 |
(966) 0x432614 B.NE 4325e0 |
0x432618 FMUL D21, D3, D20 |
0x43261c CMP X9, X19 |
0x432620 B.LS 43262c |
0x432624 ORR X23, XZR, XZR |
0x432628 B 4326c8 |
0x43262c UDIV X22, X19, X9 |
0x432630 ORR X24, XZR, XZR |
0x432634 ADDVL X26, X0, #1 |
0x432638 DUP Z22.D, Z21.D[0] |
0x43263c ADDVL X27, X16, #1 |
0x432640 ADDVL X28, X17, #1 |
0x432644 ADDVL X30, X18, #1 |
0x432648 MADD X23, X22, X9, XZR |
0x43264c ADDVL X22, X20, #1 |
0x432650 SUB X25, X19, X23 |
0x432654 HINT #0 |
0x432658 HINT #0 |
0x43265c HINT #0 |
(961) 0x432660 LD1D {Z23.D}, P0/Z, [X0, X24,LSL #3] |
(961) 0x432664 LD1D {Z24.D}, P0/Z, [X26, X24,LSL #3] |
(961) 0x432668 FMUL Z23.D, Z23.D, Z18.D |
(961) 0x43266c LD1D {Z25.D}, P0/Z, [X16, X24,LSL #3] |
(961) 0x432670 LD1D {Z26.D}, P0/Z, [X27, X24,LSL #3] |
(961) 0x432674 FMUL Z24.D, Z24.D, Z18.D |
(961) 0x432678 FMLA Z23.D, P0/M, Z25.D, Z17.D |
(961) 0x43267c FMLA Z24.D, P0/M, Z26.D, Z17.D |
(961) 0x432680 LD1D {Z25.D}, P0/Z, [X17, X24,LSL #3] |
(961) 0x432684 LD1D {Z26.D}, P0/Z, [X28, X24,LSL #3] |
(961) 0x432688 FMLA Z23.D, P0/M, Z25.D, Z19.D |
(961) 0x43268c FMLA Z24.D, P0/M, Z26.D, Z19.D |
(961) 0x432690 LD1D {Z25.D}, P0/Z, [X18, X24,LSL #3] |
(961) 0x432694 LD1D {Z26.D}, P0/Z, [X30, X24,LSL #3] |
(961) 0x432698 FMLA Z23.D, P0/M, Z2.D, Z25.D |
(961) 0x43269c FMLA Z24.D, P0/M, Z2.D, Z26.D |
(961) 0x4326a0 LD1D {Z25.D}, P0/Z, [X20, X24,LSL #3] |
(961) 0x4326a4 LD1D {Z26.D}, P0/Z, [X22, X24,LSL #3] |
(961) 0x4326a8 FMAD Z23.D, P0/M, Z22.D, Z25.D |
(961) 0x4326ac FMAD Z24.D, P0/M, Z22.D, Z26.D |
(961) 0x4326b0 ST1D {Z23.D}, P0, [X20, X24,LSL #3] |
(961) 0x4326b4 ST1D {Z24.D}, P0, [X22, X24,LSL #3] |
(961) 0x4326b8 ADD X24, X24, X9 |
(961) 0x4326bc CMP X23, X24 |
(961) 0x4326c0 B.NE 432660 |
0x4326c4 CBZ X25, 432720 |
0x4326c8 UBFM X28, X23, #61, #60 |
0x4326cc ORR X24, XZR, XZR |
0x4326d0 SUB X22, X19, X23 |
0x4326d4 ADD X23, X20, X28 |
0x4326d8 ADD X25, X18, X28 |
0x4326dc ADD X26, X17, X28 |
0x4326e0 ADD X27, X16, X28 |
0x4326e4 ADD X28, X0, X28 |
(965) 0x4326e8 LDR D22, [X28, X24,LSL #3] |
(965) 0x4326ec LDR D23, [X27, X24,LSL #3] |
(965) 0x4326f0 FMUL D22, D22, D7 |
(965) 0x4326f4 FMADD D22, D23, D4, D22 |
(965) 0x4326f8 LDR D23, [X26, X24,LSL #3] |
(965) 0x4326fc FMADD D22, D23, D16, D22 |
(965) 0x432700 LDR D23, [X25, X24,LSL #3] |
(965) 0x432704 FMADD D22, D1, D23, D22 |
(965) 0x432708 LDR D23, [X23, X24,LSL #3] |
(965) 0x43270c FMADD D22, D21, D22, D23 |
(965) 0x432710 STR D22, [X23, X24,LSL #3] |
(965) 0x432714 ADD X24, X24, #1 |
(965) 0x432718 CMP X22, X24 |
(965) 0x43271c B.NE 4326e8 |
0x432720 FMUL D21, D6, D20 |
0x432724 CMP X9, X19 |
0x432728 B.LS 432740 |
0x43272c ORR X23, XZR, XZR |
0x432730 B 4327d0 |
0x4327d0 UBFM X28, X23, #61, #60 |
0x4327d4 ORR X24, XZR, XZR |
0x4327d8 SUB X22, X19, X23 |
0x4327dc ADD X23, X20, X28 |
0x4327e0 ADD X25, X3, X28 |
0x4327e4 ADD X26, X2, X28 |
0x4327e8 ADD X27, X1, X28 |
0x4327ec ADD X28, X4, X28 |
0x4327f0 HINT #0 |
0x4327f4 HINT #0 |
0x4327f8 HINT #0 |
0x4327fc HINT #0 |
(963) 0x432800 LDR D22, [X28, X24,LSL #3] |
(963) 0x432804 LDR D23, [X27, X24,LSL #3] |
(963) 0x432808 FMUL D22, D22, D7 |
(963) 0x43280c FMADD D22, D23, D4, D22 |
(963) 0x432810 LDR D23, [X26, X24,LSL #3] |
(963) 0x432814 FMADD D22, D23, D16, D22 |
(963) 0x432818 LDR D23, [X25, X24,LSL #3] |
(963) 0x43281c FMADD D22, D1, D23, D22 |
(963) 0x432820 LDR D23, [X23, X24,LSL #3] |
(963) 0x432824 FMADD D22, D21, D22, D23 |
(963) 0x432828 STR D22, [X23, X24,LSL #3] |
(963) 0x43282c ADD X24, X24, #1 |
(963) 0x432830 CMP X22, X24 |
(963) 0x432834 B.NE 432800 |
0x432838 FMUL D20, D0, D20 |
0x43283c CMP X9, X19 |
0x432840 B.LS 43284c |
0x432844 ORR X23, XZR, XZR |
0x432848 B 4328e8 |
0x43284c UDIV X22, X19, X9 |
0x432850 ORR X24, XZR, XZR |
0x432854 ADDVL X26, X21, #1 |
0x432858 DUP Z21.D, Z20.D[0] |
0x43285c ADDVL X27, X5, #1 |
0x432860 ADDVL X28, X6, #1 |
0x432864 ADDVL X30, X7, #1 |
0x432868 MADD X23, X22, X9, XZR |
0x43286c ADDVL X22, X20, #1 |
0x432870 SUB X25, X19, X23 |
0x432874 HINT #0 |
0x432878 HINT #0 |
0x43287c HINT #0 |
(959) 0x432880 LD1D {Z22.D}, P0/Z, [X21, X24,LSL #3] |
(959) 0x432884 LD1D {Z23.D}, P0/Z, [X26, X24,LSL #3] |
(959) 0x432888 FMUL Z22.D, Z22.D, Z18.D |
(959) 0x43288c LD1D {Z24.D}, P0/Z, [X5, X24,LSL #3] |
(959) 0x432890 LD1D {Z25.D}, P0/Z, [X27, X24,LSL #3] |
(959) 0x432894 FMUL Z23.D, Z23.D, Z18.D |
(959) 0x432898 FMLA Z22.D, P0/M, Z24.D, Z17.D |
(959) 0x43289c FMLA Z23.D, P0/M, Z25.D, Z17.D |
(959) 0x4328a0 LD1D {Z24.D}, P0/Z, [X6, X24,LSL #3] |
(959) 0x4328a4 LD1D {Z25.D}, P0/Z, [X28, X24,LSL #3] |
(959) 0x4328a8 FMLA Z22.D, P0/M, Z24.D, Z19.D |
(959) 0x4328ac FMLA Z23.D, P0/M, Z25.D, Z19.D |
(959) 0x4328b0 LD1D {Z24.D}, P0/Z, [X7, X24,LSL #3] |
(959) 0x4328b4 LD1D {Z25.D}, P0/Z, [X30, X24,LSL #3] |
(959) 0x4328b8 FMLA Z22.D, P0/M, Z2.D, Z24.D |
(959) 0x4328bc FMLA Z23.D, P0/M, Z2.D, Z25.D |
(959) 0x4328c0 LD1D {Z24.D}, P0/Z, [X20, X24,LSL #3] |
(959) 0x4328c4 LD1D {Z25.D}, P0/Z, [X22, X24,LSL #3] |
(959) 0x4328c8 FMAD Z22.D, P0/M, Z21.D, Z24.D |
(959) 0x4328cc FMAD Z23.D, P0/M, Z21.D, Z25.D |
(959) 0x4328d0 ST1D {Z22.D}, P0, [X20, X24,LSL #3] |
(959) 0x4328d4 ST1D {Z23.D}, P0, [X22, X24,LSL #3] |
(959) 0x4328d8 ADD X24, X24, X9 |
(959) 0x4328dc CMP X23, X24 |
(959) 0x4328e0 B.NE 432880 |
0x4328e4 CBZ X25, 4324ac |
0x4328e8 UBFM X28, X23, #61, #60 |
0x4328ec ORR X24, XZR, XZR |
0x4328f0 SUB X22, X19, X23 |
0x4328f4 ADD X23, X20, X28 |
0x4328f8 ADD X25, X7, X28 |
0x4328fc ADD X26, X6, X28 |
0x432900 ADD X27, X5, X28 |
0x432904 ADD X28, X21, X28 |
(967) 0x432908 LDR D21, [X28, X24,LSL #3] |
(967) 0x43290c LDR D22, [X27, X24,LSL #3] |
(967) 0x432910 FMUL D21, D21, D7 |
(967) 0x432914 FMADD D21, D22, D4, D21 |
(967) 0x432918 LDR D22, [X26, X24,LSL #3] |
(967) 0x43291c FMADD D21, D22, D16, D21 |
(967) 0x432920 LDR D22, [X25, X24,LSL #3] |
(967) 0x432924 FMADD D21, D1, D22, D21 |
(967) 0x432928 LDR D22, [X23, X24,LSL #3] |
(967) 0x43292c FMADD D21, D20, D21, D22 |
(967) 0x432930 STR D21, [X23, X24,LSL #3] |
(967) 0x432934 ADD X24, X24, #1 |
(967) 0x432938 CMP X22, X24 |
(967) 0x43293c B.NE 432908 |
0x432940 B 4324ac |
/home/hbollore/qaas-runs/171-284-6744/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 63 - 71 |
-------------------------------------------------------------------------------- |
63: for (size_t i = 0; i < 4; i++) |
64: for (size_t j = 0; j < 4; j++) |
65: { |
66: const T pre00 = a[i] * b[j]; |
67: const T* restrict coefs = spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs; |
68: for (size_t n = 0; n < num_splines; n++) |
69: vals[n] += pre00 * |
70: (c[0] * coefs[n] + c[1] * coefs[n + zs] + c[2] * coefs[n + 2 * zs] + |
71: c[3] * coefs[n + 3 * zs]); |
Coverage (%) | Name | Source Location | Module |
---|
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 7.25 - 20.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.45 - 1.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 2.76 |
Bottlenecks | P6, P8, |
Function | void miniqmcreference::MultiBsplineEvalRef::evaluate_v |
Source | MultiBsplineRef.hpp:63-63,MultiBsplineRef.hpp:66-68 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 21.75 - 60.00 |
CQA cycles if no scalar integer | 3.00 |
CQA cycles if FP arith vectorized | 21.75 - 60.00 |
CQA cycles if fully vectorized | 15.00 - 60.00 |
Front-end cycles | 13.50 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 6.50 |
P1 cycles | 21.75 |
P2 cycles | 21.75 |
P3 cycles | 21.75 |
P4 cycles | 21.75 |
P5 cycles | 1.75 |
P6 cycles | 1.75 |
P7 cycles | 1.75 |
P8 cycles | 1.75 |
P9 cycles | 0.33 |
P10 cycles | 0.33 |
P11 cycles | 0.33 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
P14 cycles | 15.00 - 60.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 122.00 |
Nb uops | 108.00 |
Nb loads | NA |
Nb stores | 0.00 |
Nb stack references | 0.00 |
FLOP/cycle | 0.18 - 0.07 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 4.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 0.13 - 0.37 |
Bytes prefetched | 0.00 |
Bytes loaded | 8.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 4.48 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 15.79 |
Vector-efficiency ratio all | 28.36 |
Vector-efficiency ratio load | 25.00 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 25.00 |
Vector-efficiency ratio fma | 25.00 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 36.84 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 7.25 - 20.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.45 - 1.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 2.76 |
Bottlenecks | P6, P8, |
Function | void miniqmcreference::MultiBsplineEvalRef::evaluate_v |
Source | MultiBsplineRef.hpp:63-63,MultiBsplineRef.hpp:66-68 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 21.75 - 60.00 |
CQA cycles if no scalar integer | 3.00 |
CQA cycles if FP arith vectorized | 21.75 - 60.00 |
CQA cycles if fully vectorized | 15.00 - 60.00 |
Front-end cycles | 13.50 |
DIV/SQRT cycles | 6.50 |
P0 cycles | 6.50 |
P1 cycles | 21.75 |
P2 cycles | 21.75 |
P3 cycles | 21.75 |
P4 cycles | 21.75 |
P5 cycles | 1.75 |
P6 cycles | 1.75 |
P7 cycles | 1.75 |
P8 cycles | 1.75 |
P9 cycles | 0.33 |
P10 cycles | 0.33 |
P11 cycles | 0.33 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
P14 cycles | 15.00 - 60.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 122.00 |
Nb uops | 108.00 |
Nb loads | NA |
Nb stores | 0.00 |
Nb stack references | 0.00 |
FLOP/cycle | 0.18 - 0.07 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 4.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 0.13 - 0.37 |
Bytes prefetched | 0.00 |
Bytes loaded | 8.00 |
Bytes stored | 0.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 4.48 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | NA |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 15.79 |
Vector-efficiency ratio all | 28.36 |
Vector-efficiency ratio load | 25.00 |
Vector-efficiency ratio store | NA |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 25.00 |
Vector-efficiency ratio fma | 25.00 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 36.84 |
Path / |
Function | void miniqmcreference::MultiBsplineEvalRef::evaluate_v |
Source file and lines | MultiBsplineRef.hpp:63-71 |
Module | exec |
nb instructions | 122 |
loop length | 488 |
nb stack references | 0 |
front end | 13.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 21.75 | 21.75 | 21.75 | 21.75 | 1.75 | 1.75 | 1.75 | 1.75 | 0.33 | 0.33 | 0.33 | 0.00 | 0.00 |
cycles | 6.50 | 6.50 | 21.75 | 21.75 | 21.75 | 21.75 | 1.75 | 1.75 | 1.75 | 1.75 | 0.33 | 0.33 | 0.33 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 15.00-60.00 |
Front-end | 13.50 |
Overall L1 | 21.75-60.00 |
all | 4% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ADD X8, X8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X11, X11, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X15, X15, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X8, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
ADD X14, X14, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X12, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X13, X13, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X16, X16, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X17, X17, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X18, X18, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X4, X4, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X1, X1, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X2, X2, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X3, X3, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X21, X21, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X5, X5, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X6, X6, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X7, X7, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.EQ 432944 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x804> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD X22, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X9, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDR D20, [X22, X8,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
FMUL D21, D5, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
B.LS 432520 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x3e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X23, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4325b0 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x470> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV X22, X19, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 5-20 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADDVL X26, X15, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
DUP Z22.D, Z21.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADDVL X27, X14, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X28, X12, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X30, X13, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MADD X23, X22, X9, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X22, X20, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB X25, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CBZ X25, 432618 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x4d8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X28, X23, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X23, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X22, X13, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X25, X12, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X26, X14, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X27, X20, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X28, X15, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
FMUL D21, D3, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
CMP X9, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LS 43262c <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x4ec> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X23, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4326c8 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x588> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV X22, X19, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 5-20 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADDVL X26, X0, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
DUP Z22.D, Z21.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADDVL X27, X16, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X28, X17, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X30, X18, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MADD X23, X22, X9, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X22, X20, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB X25, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
CBZ X25, 432720 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x5e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X28, X23, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X22, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X23, X20, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X25, X18, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X26, X17, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X27, X16, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X28, X0, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
FMUL D21, D6, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
CMP X9, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LS 432740 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x600> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X23, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4327d0 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x690> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X28, X23, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X22, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X23, X20, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X25, X3, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X26, X2, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X27, X1, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X28, X4, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
FMUL D20, D0, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
CMP X9, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LS 43284c <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x70c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X23, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4328e8 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x7a8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV X22, X19, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 5-20 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADDVL X26, X21, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
DUP Z21.D, Z20.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADDVL X27, X5, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X28, X6, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X30, X7, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MADD X23, X22, X9, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X22, X20, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB X25, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
CBZ X25, 4324ac <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x36c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X28, X23, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X22, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X23, X20, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X25, X7, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X26, X6, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X27, X5, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X28, X21, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4324ac <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x36c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Function | void miniqmcreference::MultiBsplineEvalRef::evaluate_v |
Source file and lines | MultiBsplineRef.hpp:63-71 |
Module | exec |
nb instructions | 122 |
loop length | 488 |
nb stack references | 0 |
front end | 13.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.50 | 6.50 | 21.75 | 21.75 | 21.75 | 21.75 | 1.75 | 1.75 | 1.75 | 1.75 | 0.33 | 0.33 | 0.33 | 0.00 | 0.00 |
cycles | 6.50 | 6.50 | 21.75 | 21.75 | 21.75 | 21.75 | 1.75 | 1.75 | 1.75 | 1.75 | 0.33 | 0.33 | 0.33 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 15.00-60.00 |
Front-end | 13.50 |
Overall L1 | 21.75-60.00 |
all | 4% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ADD X8, X8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X11, X11, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X15, X15, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X8, #4 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
ADD X14, X14, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X12, X12, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X13, X13, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X0, X0, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X16, X16, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X17, X17, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X18, X18, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X4, X4, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X1, X1, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X2, X2, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X3, X3, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X21, X21, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X5, X5, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X6, X6, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X7, X7, X10 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B.EQ 432944 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x804> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD X22, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP X9, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LDR D20, [X22, X8,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 |
FMUL D21, D5, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
B.LS 432520 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x3e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X23, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4325b0 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x470> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV X22, X19, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 5-20 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADDVL X26, X15, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
DUP Z22.D, Z21.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADDVL X27, X14, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X28, X12, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X30, X13, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MADD X23, X22, X9, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X22, X20, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB X25, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CBZ X25, 432618 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x4d8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X28, X23, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X23, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X22, X13, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X25, X12, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X26, X14, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X27, X20, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X28, X15, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
FMUL D21, D3, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
CMP X9, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LS 43262c <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x4ec> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X23, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4326c8 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x588> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV X22, X19, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 5-20 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADDVL X26, X0, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
DUP Z22.D, Z21.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADDVL X27, X16, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X28, X17, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X30, X18, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MADD X23, X22, X9, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X22, X20, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB X25, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
CBZ X25, 432720 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x5e0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X28, X23, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X22, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X23, X20, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X25, X18, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X26, X17, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X27, X16, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X28, X0, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
FMUL D21, D6, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
CMP X9, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LS 432740 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x600> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X23, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4327d0 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x690> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X28, X23, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X22, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X23, X20, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X25, X3, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X26, X2, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X27, X1, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X28, X4, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
FMUL D20, D0, D20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 3 | 0.25 |
CMP X9, X19 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LS 43284c <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x70c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ORR X23, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4328e8 <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x7a8> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UDIV X22, X19, X9 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 5-20 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADDVL X26, X21, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
DUP Z21.D, Z20.D[0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 |
ADDVL X27, X5, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X28, X6, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X30, X7, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
MADD X23, X22, X9, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADDVL X22, X20, #1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB X25, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
HINT #0 | ||||||||||||||||||
CBZ X25, 4324ac <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x36c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
UBFM X28, X23, #61, #60 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ORR X24, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB X22, X19, X23 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X23, X20, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X25, X7, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X26, X6, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X27, X5, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X28, X21, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
B 4324ac <_ZN16miniqmcreference19MultiBsplineEvalRef10evaluate_vIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_m+0x36c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |