Loop Id: 374 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
Loop Id: 374 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
0x4207e0 VXORPD %XMM9,%XMM9,%XMM9 |
0x4207e5 VMOVSD 0x38(%RSP),%XMM0 |
0x4207eb VSUBSD %XMM9,%XMM0,%XMM0 |
0x4207f0 VZEROUPPER |
0x4207f3 CALL 4d5d50 <exp> |
0x4207f8 MOV 0x30(%RSP),%RCX |
0x4207fd MOV (%RCX),%RAX |
0x420800 MOV 0x40(%RSP),%R15 |
0x420805 VMOVSD %XMM0,(%RAX,%R15,8) |
0x42080b INC %R15 |
0x42080e MOV 0x8(%RCX),%RCX |
0x420812 SUB %RAX,%RCX |
0x420815 SAR $0x3,%RCX |
0x420819 CMP %R15,%RCX |
0x42081c MOV 0x28(%RSP),%RCX |
0x420821 MOV 0x20(%RSP),%R14 |
0x420826 JBE 42101d |
0x42082c CMPB $0,0x298(%RCX) |
0x420833 JE 42102c |
0x420839 MOVSXD 0x2a0(%RCX),%R12 |
0x420840 MOV 0xd8(%R14),%RAX |
0x420847 VMOVSD (%RAX,%R12,8),%XMM0 |
0x42084d VMOVSD %XMM0,0x38(%RSP) |
0x420853 MOV 0x290(%RCX),%RBX |
0x42085a MOV 0x248(%R14),%ESI |
0x420861 MOV %RCX,%RDI |
0x420864 CALL 461820 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> |
0x420869 MOV 0xa0(%R14),%RCX |
0x420870 TEST %RCX,%RCX |
0x420873 MOV %R15,0x40(%RSP) |
0x420878 JE 4207e0 |
0x42087e MOV 0x48(%RAX),%RAX |
0x420882 LEA (%R15,%R15,4),%RDX |
0x420886 MOV 0x18(%RAX,%RDX,8),%RSI |
0x42088b MOV 0x18(%RBX),%RAX |
0x42088f MOV (%RAX,%R12,4),%EAX |
0x420893 IMUL %ECX,%EAX |
0x420896 MOV 0x1d0(%R14),%RDX |
0x42089d MOV 0x200(%R14),%RDI |
0x4208a4 MOV %RDI,0x68(%RSP) |
0x4208a9 MOVSXD %EAX,%RDI |
0x4208ac MOV 0x268(%RBX),%RAX |
0x4208b3 DEC %RCX |
0x4208b6 MOV 0x18(%RAX),%R8 |
0x4208ba MOV %RSI,0x58(%RSP) |
0x4208bf LEA 0x38(%RSI),%RAX |
0x4208c3 MOV %RAX,0x50(%RSP) |
0x4208c8 MOV %R12,0x60(%RSP) |
0x4208cd NEG %R12D |
0x4208d0 MOV %R12,0x48(%RSP) |
0x4208d5 VXORPD %XMM9,%XMM9,%XMM9 |
0x4208da XOR %R11D,%R11D |
0x4208dd JMP 4208f1 |
(375) 0x4208e0 VADDSD %XMM0,%XMM9,%XMM9 |
(375) 0x4208e4 CMP %RCX,%R11 |
(375) 0x4208e7 LEA 0x1(%R11),%R11 |
(375) 0x4208eb JE 4207e5 |
(375) 0x4208f1 MOVSXD (%R8,%R11,4),%R9 |
(375) 0x4208f5 MOV 0x4(%R8,%R11,4),%R10D |
(375) 0x4208fa SUB %R9D,%R10D |
(375) 0x4208fd VXORPD %XMM0,%XMM0,%XMM0 |
(375) 0x420901 TEST %R10D,%R10D |
(375) 0x420904 JLE 4208e0 |
(375) 0x420906 LEA (%R11,%RDI,1),%RAX |
(375) 0x42090a MOV 0x68(%RSP),%RSI |
(375) 0x42090f MOV (%RSI,%RAX,8),%R13 |
(375) 0x420913 VMOVSD 0x8(%R13),%XMM1 |
(375) 0x420919 MOV %R10D,%EAX |
(375) 0x42091c XOR %R14D,%R14D |
(375) 0x42091f CMP $0x8,%R10D |
(375) 0x420923 JAE 420e10 |
(375) 0x420929 MOV %EAX,%R10D |
(375) 0x42092c AND $-0x8,%R10D |
(375) 0x420930 CMP %RAX,%R10 |
(375) 0x420933 JAE 420980 |
(375) 0x420935 MOV 0x60(%RSP),%RBX |
(375) 0x42093a SUB %R9D,%EBX |
(375) 0x42093d MOV 0x58(%RSP),%RSI |
(375) 0x420942 LEA (%RSI,%R9,8),%R9 |
(375) 0x420946 JMP 420958 |
(378) 0x420950 INC %R10 |
(378) 0x420953 CMP %R10,%RAX |
(378) 0x420956 JE 420980 |
(378) 0x420958 VMOVSD (%R9,%R10,8),%XMM0 |
(378) 0x42095e VUCOMISD %XMM0,%XMM1 |
(378) 0x420962 JBE 420950 |
(378) 0x420964 CMP %R10D,%EBX |
(378) 0x420967 JE 420950 |
(378) 0x420969 MOVSXD %R14D,%R14 |
(378) 0x42096c VMOVSD %XMM0,(%RDX,%R14,8) |
(378) 0x420972 INC %R14D |
(378) 0x420975 JMP 420950 |
(375) 0x420980 VXORPD %XMM0,%XMM0,%XMM0 |
(375) 0x420984 TEST %R14D,%R14D |
(375) 0x420987 JLE 4208e0 |
(375) 0x42098d VMOVSD %XMM9,0x18(%RSP) |
(375) 0x420993 VMOVSD 0x238(%R13),%XMM10 |
(375) 0x42099c MOV 0x218(%R13),%RAX |
(375) 0x4209a3 VMOVSD 0x18(%R13),%XMM2 |
(375) 0x4209a9 VMOVSD 0x20(%R13),%XMM3 |
(375) 0x4209af VMOVSD 0x28(%R13),%XMM4 |
(375) 0x4209b5 VMOVSD 0x38(%R13),%XMM6 |
(375) 0x4209bb VMOVSD 0x40(%R13),%XMM13 |
(375) 0x4209c1 VMOVSD 0x48(%R13),%XMM5 |
(375) 0x4209c7 VMOVSD 0x58(%R13),%XMM15 |
(375) 0x4209cd VMOVSD 0x60(%R13),%XMM14 |
(375) 0x4209d3 VMOVSD 0x68(%R13),%XMM8 |
(375) 0x4209d9 VMOVSD 0x78(%R13),%XMM0 |
(375) 0x4209df VMOVUPS %XMM0,0x80(%RSP) |
(375) 0x4209e8 VMOVSD 0x80(%R13),%XMM9 |
(375) 0x4209f1 VMOVSD 0x88(%R13),%XMM1 |
(375) 0x4209fa VMOVSD 0x30(%R13),%XMM7 |
(375) 0x420a00 VMOVSD 0x50(%R13),%XMM11 |
(375) 0x420a06 VMOVSD 0x70(%R13),%XMM12 |
(375) 0x420a0c MOV %R14D,%R14D |
(375) 0x420a0f MOV %R14,%R9 |
(375) 0x420a12 VMOVSD 0x90(%R13),%XMM0 |
(375) 0x420a1b VMOVUPD %XMM0,0x70(%RSP) |
(375) 0x420a21 MOV $-0x4,%ESI |
(375) 0x420a26 AND %RSI,%R9 |
(375) 0x420a29 VMOVUPD %XMM1,0x170(%RSP) |
(375) 0x420a32 VMOVUPD %XMM9,0x160(%RSP) |
(375) 0x420a3b JE 420f57 |
(375) 0x420a41 VMOVUPD %XMM10,0x150(%RSP) |
(375) 0x420a4a VBROADCASTSD %XMM10,%YMM10 |
(375) 0x420a4f VMOVUPD %YMM10,0x2c0(%RSP) |
(375) 0x420a58 VMOVUPD %XMM2,0x100(%RSP) |
(375) 0x420a61 VBROADCASTSD %XMM2,%YMM2 |
(375) 0x420a66 VMOVUPD %YMM2,0x2a0(%RSP) |
(375) 0x420a6f VMOVUPD %XMM3,0x130(%RSP) |
(375) 0x420a78 VBROADCASTSD %XMM3,%YMM2 |
(375) 0x420a7d VMOVUPD %YMM2,0x280(%RSP) |
(375) 0x420a86 VMOVUPD %XMM4,0x140(%RSP) |
(375) 0x420a8f VBROADCASTSD %XMM4,%YMM2 |
(375) 0x420a94 VMOVUPD %YMM2,0x260(%RSP) |
(375) 0x420a9d VMOVUPD %XMM7,0xf0(%RSP) |
(375) 0x420aa6 VBROADCASTSD %XMM7,%YMM2 |
(375) 0x420aab VMOVUPD %YMM2,0x240(%RSP) |
(375) 0x420ab4 VMOVUPD %XMM6,0x110(%RSP) |
(375) 0x420abd VBROADCASTSD %XMM6,%YMM2 |
(375) 0x420ac2 VMOVUPD %YMM2,0x220(%RSP) |
(375) 0x420acb VMOVUPD %XMM13,0xb0(%RSP) |
(375) 0x420ad4 VBROADCASTSD %XMM13,%YMM2 |
(375) 0x420ad9 VMOVUPD %YMM2,0x200(%RSP) |
(375) 0x420ae2 VMOVUPD %XMM5,0x120(%RSP) |
(375) 0x420aeb VBROADCASTSD %XMM5,%YMM2 |
(375) 0x420af0 VMOVUPD %YMM2,0x1e0(%RSP) |
(375) 0x420af9 VMOVUPD %XMM11,0xd0(%RSP) |
(375) 0x420b02 VBROADCASTSD %XMM11,%YMM2 |
(375) 0x420b07 VMOVUPD %YMM2,0x1c0(%RSP) |
(375) 0x420b10 VMOVUPD %XMM15,0x90(%RSP) |
(375) 0x420b19 VBROADCASTSD %XMM15,%YMM2 |
(375) 0x420b1e VMOVUPD %YMM2,0x1a0(%RSP) |
(375) 0x420b27 VMOVUPD %XMM14,0xa0(%RSP) |
(375) 0x420b30 VBROADCASTSD %XMM14,%YMM2 |
(375) 0x420b35 VMOVUPD %YMM2,0x180(%RSP) |
(375) 0x420b3e VMOVUPD %XMM8,0xe0(%RSP) |
(375) 0x420b47 VBROADCASTSD %XMM8,%YMM15 |
(375) 0x420b4c VMOVUPD %XMM12,0xc0(%RSP) |
(375) 0x420b55 VBROADCASTSD %XMM12,%YMM2 |
(375) 0x420b5a VBROADCASTSD 0x80(%RSP),%YMM3 |
(375) 0x420b64 VBROADCASTSD %XMM9,%YMM0 |
(375) 0x420b69 VBROADCASTSD %XMM1,%YMM1 |
(375) 0x420b6e XOR %R10D,%R10D |
(375) 0x420b71 VBROADCASTSD 0x70(%RSP),%YMM4 |
(375) 0x420b78 VXORPD %XMM8,%XMM8,%XMM8 |
(375) 0x420b7d VPBROADCASTQ 0xd573a(%RIP),%YMM5 |
(375) 0x420b86 VPBROADCASTQ 0xd5739(%RIP),%YMM6 |
(375) 0x420b8f VPBROADCASTQ 0xd5738(%RIP),%YMM7 |
(375) 0x420b98 NOPL (%RAX,%RAX,1) |
(377) 0x420ba0 VMOVUPD 0x2c0(%RSP),%YMM9 |
(377) 0x420ba9 VMULPD (%RDX,%R10,8),%YMM9,%YMM9 |
(377) 0x420baf VCVTTPD2DQ %YMM9,%XMM10 |
(377) 0x420bb4 VROUNDPD $0xb,%YMM9,%YMM11 |
(377) 0x420bba VSUBPD %YMM11,%YMM9,%YMM9 |
(377) 0x420bbf VPMOVSXDQ %XMM10,%YMM10 |
(377) 0x420bc4 VPSLLQ $0x3,%YMM10,%YMM10 |
(377) 0x420bca VMOVQ %RAX,%XMM11 |
(377) 0x420bcf VPBROADCASTQ %XMM11,%YMM11 |
(377) 0x420bd4 VPADDQ %YMM10,%YMM11,%YMM11 |
(377) 0x420bd9 VMOVQ %XMM11,%RSI |
(377) 0x420bde VEXTRACTI128 $0x1,%YMM11,%XMM10 |
(377) 0x420be4 VMOVQ %XMM10,%RBX |
(377) 0x420be9 VPEXTRQ $0x1,%XMM11,%R15 |
(377) 0x420bef VPEXTRQ $0x1,%XMM10,%R12 |
(377) 0x420bf5 VMOVSD (%RSI),%XMM10 |
(377) 0x420bf9 VMOVSD (%RBX),%XMM12 |
(377) 0x420bfd VPADDQ %YMM7,%YMM11,%YMM13 |
(377) 0x420c01 VMOVQ %XMM13,%RSI |
(377) 0x420c06 VMOVHPD (%R15),%XMM10,%XMM10 |
(377) 0x420c0b VPEXTRQ $0x1,%XMM13,%RBX |
(377) 0x420c11 VMOVHPD (%R12),%XMM12,%XMM12 |
(377) 0x420c17 VEXTRACTI128 $0x1,%YMM13,%XMM13 |
(377) 0x420c1d VPEXTRQ $0x1,%XMM13,%R15 |
(377) 0x420c23 VINSERTF128 $0x1,%XMM12,%YMM10,%YMM10 |
(377) 0x420c29 VMOVQ %XMM13,%R12 |
(377) 0x420c2e VMOVSD (%RSI),%XMM12 |
(377) 0x420c32 VPADDQ %YMM6,%YMM11,%YMM13 |
(377) 0x420c36 VMOVSD (%R12),%XMM14 |
(377) 0x420c3c VPEXTRQ $0x1,%XMM13,%RSI |
(377) 0x420c42 VMOVHPD (%RBX),%XMM12,%XMM12 |
(377) 0x420c46 VMOVQ %XMM13,%RBX |
(377) 0x420c4b VEXTRACTI128 $0x1,%YMM13,%XMM13 |
(377) 0x420c51 VMOVHPD (%R15),%XMM14,%XMM14 |
(377) 0x420c56 VMOVQ %XMM13,%R15 |
(377) 0x420c5b VPEXTRQ $0x1,%XMM13,%R12 |
(377) 0x420c61 VINSERTF128 $0x1,%XMM14,%YMM12,%YMM13 |
(377) 0x420c67 VMOVSD (%R15),%XMM12 |
(377) 0x420c6c VMOVSD (%RBX),%XMM14 |
(377) 0x420c70 VMOVHPD (%R12),%XMM12,%XMM12 |
(377) 0x420c76 VMOVHPD (%RSI),%XMM14,%XMM14 |
(377) 0x420c7a VINSERTF128 $0x1,%XMM12,%YMM14,%YMM12 |
(377) 0x420c80 VPADDQ %YMM5,%YMM11,%YMM11 |
(377) 0x420c84 VMOVQ %XMM11,%RSI |
(377) 0x420c89 VPEXTRQ $0x1,%XMM11,%RBX |
(377) 0x420c8f VEXTRACTI128 $0x1,%YMM11,%XMM11 |
(377) 0x420c95 VMOVQ %XMM11,%R15 |
(377) 0x420c9a VPEXTRQ $0x1,%XMM11,%R12 |
(377) 0x420ca0 VMOVAPD %YMM9,%YMM11 |
(377) 0x420ca5 VMOVUPD 0x280(%RSP),%YMM14 |
(377) 0x420cae VFMADD132PD 0x2a0(%RSP),%YMM14,%YMM11 |
(377) 0x420cb8 VFMADD213PD 0x260(%RSP),%YMM9,%YMM11 |
(377) 0x420cc2 VFMADD213PD 0x240(%RSP),%YMM9,%YMM11 |
(377) 0x420ccc VFMADD213PD %YMM8,%YMM10,%YMM11 |
(377) 0x420cd1 VMOVAPD %YMM9,%YMM8 |
(377) 0x420cd6 VMOVSD (%R15),%XMM10 |
(377) 0x420cdb VMOVUPD 0x200(%RSP),%YMM14 |
(377) 0x420ce4 VFMADD132PD 0x220(%RSP),%YMM14,%YMM8 |
(377) 0x420cee VFMADD213PD 0x1e0(%RSP),%YMM9,%YMM8 |
(377) 0x420cf8 VFMADD213PD 0x1c0(%RSP),%YMM9,%YMM8 |
(377) 0x420d02 VFMADD213PD %YMM11,%YMM13,%YMM8 |
(377) 0x420d07 VMOVSD (%RSI),%XMM11 |
(377) 0x420d0b VMOVAPD %YMM9,%YMM13 |
(377) 0x420d10 VMOVUPD 0x180(%RSP),%YMM14 |
(377) 0x420d19 VFMADD132PD 0x1a0(%RSP),%YMM14,%YMM13 |
(377) 0x420d23 VMOVHPD (%RBX),%XMM11,%XMM11 |
(377) 0x420d27 VFMADD213PD %YMM15,%YMM9,%YMM13 |
(377) 0x420d2c VFMADD213PD %YMM2,%YMM9,%YMM13 |
(377) 0x420d31 VMOVHPD (%R12),%XMM10,%XMM10 |
(377) 0x420d37 VFMADD213PD %YMM8,%YMM12,%YMM13 |
(377) 0x420d3c VINSERTF128 $0x1,%XMM10,%YMM11,%YMM10 |
(377) 0x420d42 VMOVAPD %YMM9,%YMM8 |
(377) 0x420d47 VFMADD213PD %YMM0,%YMM3,%YMM8 |
(377) 0x420d4c VFMADD213PD %YMM1,%YMM9,%YMM8 |
(377) 0x420d51 VFMADD213PD %YMM4,%YMM9,%YMM8 |
(377) 0x420d56 VFMADD213PD %YMM13,%YMM10,%YMM8 |
(377) 0x420d5b ADD $0x4,%R10 |
(377) 0x420d5f CMP %R9,%R10 |
(377) 0x420d62 JB 420ba0 |
(375) 0x420d68 VEXTRACTF128 $0x1,%YMM8,%XMM0 |
(375) 0x420d6e VADDPD %XMM0,%XMM8,%XMM0 |
(375) 0x420d72 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
(375) 0x420d77 VADDSD %XMM1,%XMM0,%XMM0 |
(375) 0x420d7b CMP %R14,%R9 |
(375) 0x420d7e VMOVSD 0x18(%RSP),%XMM9 |
(375) 0x420d84 VMOVUPD 0x150(%RSP),%XMM10 |
(375) 0x420d8d VMOVUPD 0x140(%RSP),%XMM4 |
(375) 0x420d96 VMOVUPD 0x130(%RSP),%XMM3 |
(375) 0x420d9f VMOVUPD 0x120(%RSP),%XMM5 |
(375) 0x420da8 VMOVUPD 0x110(%RSP),%XMM6 |
(375) 0x420db1 VMOVUPD 0x100(%RSP),%XMM2 |
(375) 0x420dba VMOVUPD 0xf0(%RSP),%XMM7 |
(375) 0x420dc3 VMOVUPD 0xe0(%RSP),%XMM8 |
(375) 0x420dcc VMOVUPD 0xd0(%RSP),%XMM11 |
(375) 0x420dd5 VMOVUPD 0xc0(%RSP),%XMM12 |
(375) 0x420dde VMOVUPD 0xb0(%RSP),%XMM13 |
(375) 0x420de7 VMOVUPD 0xa0(%RSP),%XMM14 |
(375) 0x420df0 VMOVUPD 0x90(%RSP),%XMM15 |
(375) 0x420df9 JE 4208e0 |
(375) 0x420dff JMP 420f64 |
(375) 0x420e10 MOV %EAX,%R15D |
(375) 0x420e13 AND $-0x8,%R15D |
(375) 0x420e17 MOV 0x50(%RSP),%RSI |
(375) 0x420e1c LEA (%RSI,%R9,8),%R12 |
(375) 0x420e20 MOV 0x48(%RSP),%RSI |
(375) 0x420e25 LEA (%RSI,%R9,1),%R10D |
(375) 0x420e29 XOR %EBX,%EBX |
(375) 0x420e2b XOR %R14D,%R14D |
(375) 0x420e2e JMP 420e3d |
(379) 0x420e30 ADD $0x8,%RBX |
(379) 0x420e34 CMP %RBX,%R15 |
(379) 0x420e37 JE 420929 |
(379) 0x420e3d VMOVSD -0x38(%R12,%RBX,8),%XMM0 |
(379) 0x420e44 VUCOMISD %XMM0,%XMM1 |
(379) 0x420e48 JBE 420e5d |
(379) 0x420e4a MOV %R10D,%ESI |
(379) 0x420e4d ADD %EBX,%ESI |
(379) 0x420e4f JE 420e5d |
(379) 0x420e51 MOVSXD %R14D,%R14 |
(379) 0x420e54 VMOVSD %XMM0,(%RDX,%R14,8) |
(379) 0x420e5a INC %R14D |
(379) 0x420e5d VMOVSD -0x30(%R12,%RBX,8),%XMM0 |
(379) 0x420e64 VUCOMISD %XMM0,%XMM1 |
(379) 0x420e68 JBE 420e7f |
(379) 0x420e6a LEA (%R10,%RBX,1),%ESI |
(379) 0x420e6e CMP $-0x1,%ESI |
(379) 0x420e71 JE 420e7f |
(379) 0x420e73 MOVSXD %R14D,%R14 |
(379) 0x420e76 VMOVSD %XMM0,(%RDX,%R14,8) |
(379) 0x420e7c INC %R14D |
(379) 0x420e7f VMOVSD -0x28(%R12,%RBX,8),%XMM0 |
(379) 0x420e86 VUCOMISD %XMM0,%XMM1 |
(379) 0x420e8a JBE 420ea1 |
(379) 0x420e8c LEA (%R10,%RBX,1),%ESI |
(379) 0x420e90 CMP $-0x2,%ESI |
(379) 0x420e93 JE 420ea1 |
(379) 0x420e95 MOVSXD %R14D,%R14 |
(379) 0x420e98 VMOVSD %XMM0,(%RDX,%R14,8) |
(379) 0x420e9e INC %R14D |
(379) 0x420ea1 VMOVSD -0x20(%R12,%RBX,8),%XMM0 |
(379) 0x420ea8 VUCOMISD %XMM0,%XMM1 |
(379) 0x420eac JBE 420ec3 |
(379) 0x420eae LEA (%R10,%RBX,1),%ESI |
(379) 0x420eb2 CMP $-0x3,%ESI |
(379) 0x420eb5 JE 420ec3 |
(379) 0x420eb7 MOVSXD %R14D,%R14 |
(379) 0x420eba VMOVSD %XMM0,(%RDX,%R14,8) |
(379) 0x420ec0 INC %R14D |
(379) 0x420ec3 VMOVSD -0x18(%R12,%RBX,8),%XMM0 |
(379) 0x420eca VUCOMISD %XMM0,%XMM1 |
(379) 0x420ece JBE 420ee5 |
(379) 0x420ed0 LEA (%R10,%RBX,1),%ESI |
(379) 0x420ed4 CMP $-0x4,%ESI |
(379) 0x420ed7 JE 420ee5 |
(379) 0x420ed9 MOVSXD %R14D,%R14 |
(379) 0x420edc VMOVSD %XMM0,(%RDX,%R14,8) |
(379) 0x420ee2 INC %R14D |
(379) 0x420ee5 VMOVSD -0x10(%R12,%RBX,8),%XMM0 |
(379) 0x420eec VUCOMISD %XMM0,%XMM1 |
(379) 0x420ef0 JBE 420f07 |
(379) 0x420ef2 LEA (%R10,%RBX,1),%ESI |
(379) 0x420ef6 CMP $-0x5,%ESI |
(379) 0x420ef9 JE 420f07 |
(379) 0x420efb MOVSXD %R14D,%R14 |
(379) 0x420efe VMOVSD %XMM0,(%RDX,%R14,8) |
(379) 0x420f04 INC %R14D |
(379) 0x420f07 VMOVSD -0x8(%R12,%RBX,8),%XMM0 |
(379) 0x420f0e VUCOMISD %XMM0,%XMM1 |
(379) 0x420f12 JBE 420f29 |
(379) 0x420f14 LEA (%R10,%RBX,1),%ESI |
(379) 0x420f18 CMP $-0x6,%ESI |
(379) 0x420f1b JE 420f29 |
(379) 0x420f1d MOVSXD %R14D,%R14 |
(379) 0x420f20 VMOVSD %XMM0,(%RDX,%R14,8) |
(379) 0x420f26 INC %R14D |
(379) 0x420f29 VMOVSD (%R12,%RBX,8),%XMM0 |
(379) 0x420f2f VUCOMISD %XMM0,%XMM1 |
(379) 0x420f33 JBE 420e30 |
(379) 0x420f39 LEA (%R10,%RBX,1),%ESI |
(379) 0x420f3d CMP $-0x7,%ESI |
(379) 0x420f40 JE 420e30 |
(379) 0x420f46 MOVSXD %R14D,%R14 |
(379) 0x420f49 VMOVSD %XMM0,(%RDX,%R14,8) |
(379) 0x420f4f INC %R14D |
(379) 0x420f52 JMP 420e30 |
(375) 0x420f57 VXORPD %XMM0,%XMM0,%XMM0 |
(375) 0x420f5b XOR %R9D,%R9D |
(375) 0x420f5e VMOVSD 0x18(%RSP),%XMM9 |
(375) 0x420f64 VUNPCKLPD 0x160(%RSP),%XMM15,%XMM1 |
(375) 0x420f6d VUNPCKLPD %XMM13,%XMM2,%XMM2 |
(375) 0x420f72 VINSERTF128 $0x1,%XMM1,%YMM2,%YMM1 |
(375) 0x420f78 VUNPCKLPD 0x80(%RSP),%XMM14,%XMM2 |
(375) 0x420f81 VUNPCKLPD %XMM6,%XMM3,%XMM3 |
(375) 0x420f85 VINSERTF128 $0x1,%XMM2,%YMM3,%YMM2 |
(375) 0x420f8b VUNPCKLPD 0x170(%RSP),%XMM8,%XMM3 |
(375) 0x420f94 VUNPCKLPD %XMM5,%XMM4,%XMM4 |
(375) 0x420f98 VINSERTF128 $0x1,%XMM3,%YMM4,%YMM3 |
(375) 0x420f9e VUNPCKLPD 0x70(%RSP),%XMM12,%XMM4 |
(375) 0x420fa4 VUNPCKLPD %XMM11,%XMM7,%XMM5 |
(375) 0x420fa9 VINSERTF128 $0x1,%XMM4,%YMM5,%YMM4 |
(375) 0x420faf NOP |
(376) 0x420fb0 VMULSD (%RDX,%R9,8),%XMM10,%XMM5 |
(376) 0x420fb6 VROUNDSD $0xb,%XMM5,%XMM5,%XMM6 |
(376) 0x420fbc VSUBSD %XMM6,%XMM5,%XMM6 |
(376) 0x420fc0 VMULSD %XMM6,%XMM6,%XMM7 |
(376) 0x420fc4 VMULSD %XMM6,%XMM7,%XMM8 |
(376) 0x420fc8 VUNPCKLPD %XMM8,%XMM7,%XMM7 |
(376) 0x420fcd VPERMPD $0x44,%YMM7,%YMM8 |
(376) 0x420fd3 VCVTTSD2SI %XMM5,%ESI |
(376) 0x420fd7 VFMADD213PD %YMM4,%YMM2,%YMM8 |
(376) 0x420fdc VPERMPD $0x11,%YMM7,%YMM5 |
(376) 0x420fe2 VFMADD213PD %YMM8,%YMM1,%YMM5 |
(376) 0x420fe7 MOVSXD %ESI,%RSI |
(376) 0x420fea VBROADCASTSD %XMM6,%YMM6 |
(376) 0x420fef VFMADD213PD %YMM5,%YMM3,%YMM6 |
(376) 0x420ff4 VMULPD (%RAX,%RSI,8),%YMM6,%YMM5 |
(376) 0x420ff9 VEXTRACTF128 $0x1,%YMM5,%XMM6 |
(376) 0x420fff VADDPD %XMM6,%XMM5,%XMM5 |
(376) 0x421003 VSHUFPD $0x1,%XMM5,%XMM5,%XMM6 |
(376) 0x421008 VADDSD %XMM6,%XMM5,%XMM5 |
(376) 0x42100c VADDSD %XMM5,%XMM0,%XMM0 |
(376) 0x421010 INC %R9 |
(376) 0x421013 CMP %R9,%R14 |
(376) 0x421016 JNE 420fb0 |
(375) 0x421018 JMP 4208e0 |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/optional: 433 - 950 |
-------------------------------------------------------------------------------- |
433: { return static_cast<const _Dp*>(this)->_M_payload._M_engaged; } |
[...] |
950: if (this->_M_is_engaged()) |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 107 - 132 |
-------------------------------------------------------------------------------- |
107: for (int k = 0; k < ratios.size(); ++k) |
108: ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.getRefPS(), VP.refPtcl, VP.getDistTableAB(myTableID).getDistRow(k).data())); |
[...] |
126: const int igt = P.GroupID[iat] * NumGroups; |
127: for (int jg = 0; jg < NumGroups; ++jg) |
128: { |
129: const FuncType& f2(*F[igt + jg]); |
130: int iStart = P.first(jg); |
131: int iEnd = P.last(jg); |
132: curUat += f2.evaluateV(iat, iStart, iEnd, dist, DistCompressed.data()); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 316 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
314: |
315: ///return the last index of a group i |
316: inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/shared_ptr_base.h: 1296 - 1296 |
-------------------------------------------------------------------------------- |
1296: { return _M_ptr; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 919 - 1169 |
-------------------------------------------------------------------------------- |
919: { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } |
[...] |
1046: return *(this->_M_impl._M_start + __n); |
[...] |
1064: return *(this->_M_impl._M_start + __n); |
[...] |
1169: { return _M_data_ptr(this->_M_impl._M_start); } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/refwrap.h: 338 - 338 |
-------------------------------------------------------------------------------- |
338: { return *_M_data; } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 249 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
[...] |
249: inline const_pointer data() const { return X; } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 236 - 260 |
-------------------------------------------------------------------------------- |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.67 |
CQA speedup if FP arith vectorized | 2.26 |
CQA speedup if fully vectorized | 18.10 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.27 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | optional:433-433,optional:950-950,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127,shared_ptr_base.h:1296-1296,stl_vector.h:919-919,stl_vector.h:1046-1046,stl_vector.h:1064-1064,stl_vector.h:1169-1169,refwrap.h:338-338,OhmmsVector.h:223-223,OhmmsVector.h:229-229,OhmmsVector.h:249-249 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 9.33 |
CQA cycles if no scalar integer | 3.50 |
CQA cycles if FP arith vectorized | 4.13 |
CQA cycles if fully vectorized | 0.52 |
Front-end cycles | 9.33 |
DIV/SQRT cycles | 2.40 |
P0 cycles | 2.40 |
P1 cycles | 7.33 |
P2 cycles | 7.33 |
P3 cycles | 5.00 |
P4 cycles | 2.40 |
P5 cycles | 2.40 |
P6 cycles | 5.00 |
P7 cycles | 5.00 |
P8 cycles | 5.00 |
P9 cycles | 2.40 |
P10 cycles | 7.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 10.38 |
Stall cycles (UFS) | 1.34 |
Nb insns | 53.00 |
Nb uops | 56.00 |
Nb loads | 22.00 |
Nb stores | 8.00 |
Nb stack references | 10.00 |
FLOP/cycle | 0.11 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 23.68 |
Bytes prefetched | 0.00 |
Bytes loaded | 157.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 13.64 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 33.33 |
Vector-efficiency ratio all | 13.42 |
Vector-efficiency ratio load | 10.31 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 14.76 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.67 |
CQA speedup if FP arith vectorized | 2.26 |
CQA speedup if fully vectorized | 18.10 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.27 |
Bottlenecks | micro-operation queue, |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source | optional:433-433,optional:950-950,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127,shared_ptr_base.h:1296-1296,stl_vector.h:919-919,stl_vector.h:1046-1046,stl_vector.h:1064-1064,stl_vector.h:1169-1169,refwrap.h:338-338,OhmmsVector.h:223-223,OhmmsVector.h:229-229,OhmmsVector.h:249-249 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 9.33 |
CQA cycles if no scalar integer | 3.50 |
CQA cycles if FP arith vectorized | 4.13 |
CQA cycles if fully vectorized | 0.52 |
Front-end cycles | 9.33 |
DIV/SQRT cycles | 2.40 |
P0 cycles | 2.40 |
P1 cycles | 7.33 |
P2 cycles | 7.33 |
P3 cycles | 5.00 |
P4 cycles | 2.40 |
P5 cycles | 2.40 |
P6 cycles | 5.00 |
P7 cycles | 5.00 |
P8 cycles | 5.00 |
P9 cycles | 2.40 |
P10 cycles | 7.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 10.38 |
Stall cycles (UFS) | 1.34 |
Nb insns | 53.00 |
Nb uops | 56.00 |
Nb loads | 22.00 |
Nb stores | 8.00 |
Nb stack references | 10.00 |
FLOP/cycle | 0.11 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 23.68 |
Bytes prefetched | 0.00 |
Bytes loaded | 157.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 13.64 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 33.33 |
Vector-efficiency ratio all | 13.42 |
Vector-efficiency ratio load | 10.31 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 14.76 |
Path / |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 53 |
nb uops | 56 |
loop length | 255 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 9.33 cycles |
front end | 9.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.40 | 2.40 | 7.33 | 7.33 | 5.00 | 2.40 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 7.33 |
cycles | 2.40 | 2.40 | 7.33 | 7.33 | 5.00 | 2.40 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 7.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.38 |
Stall cycles | 1.34 |
LM full (events) | 2.98 |
Front-end | 9.33 |
Dispatch | 7.33 |
Overall L1 | 9.33 |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 13% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 12% |
load | 8% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x38(%RSP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM9,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4d5d50 <exp> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RAX,%R15,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
CMP %R15,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JBE 42101d <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x87d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPB $0,0x298(%RCX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 42102c <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x88c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2a0(%RCX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%R12,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x290(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x248(%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 461820 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0xa0(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV %R15,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 4207e0 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R15,%R15,4),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R12,4),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %ECX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x1d0(%R14),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x200(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %EAX,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x268(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x38(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4208f1 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x151> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
Function | _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 53 |
nb uops | 56 |
loop length | 255 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 9.33 cycles |
front end | 9.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.40 | 2.40 | 7.33 | 7.33 | 5.00 | 2.40 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 7.33 |
cycles | 2.40 | 2.40 | 7.33 | 7.33 | 5.00 | 2.40 | 2.40 | 5.00 | 5.00 | 5.00 | 2.40 | 7.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.38 |
Stall cycles | 1.34 |
LM full (events) | 2.98 |
Front-end | 9.33 |
Dispatch | 7.33 |
Overall L1 | 9.33 |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 13% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 33% |
all | 12% |
load | 8% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x38(%RSP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM9,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4d5d50 <exp> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RAX,%R15,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
CMP %R15,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JBE 42101d <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x87d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPB $0,0x298(%RCX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 42102c <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x88c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2a0(%RCX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%R12,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x290(%RCX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x248(%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 461820 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0xa0(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV %R15,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 4207e0 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x40> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R15,%R15,4),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R12,4),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %ECX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x1d0(%R14),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x200(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %EAX,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0x268(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x38(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 4208f1 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x151> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |