Loop Id: 372 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
Loop Id: 372 | Module: exec | Source: TwoBodyJastrowRef.h:107-132 [...] | Coverage: 0.01% |
---|
0x420010 VXORPD %XMM9,%XMM9,%XMM9 |
0x420015 VMOVSD 0x48(%RSP),%XMM0 |
0x42001b VSUBSD %XMM9,%XMM0,%XMM0 |
0x420020 VZEROUPPER |
0x420023 CALL 4d6610 <exp> |
0x420028 MOV 0x40(%RSP),%RCX |
0x42002d MOV (%RCX),%RAX |
0x420030 MOV 0x50(%RSP),%R15 |
0x420035 VMOVSD %XMM0,(%RAX,%R15,8) |
0x42003b INC %R15 |
0x42003e MOV 0x8(%RCX),%RCX |
0x420042 SUB %RAX,%RCX |
0x420045 SAR $0x3,%RCX |
0x420049 CMP %R15,%RCX |
0x42004c MOV 0x38(%RSP),%RDI |
0x420051 MOV 0x30(%RSP),%R14 |
0x420056 JBE 42088d |
0x42005c CMPB $0,0x298(%RDI) |
0x420063 JE 42089c |
0x420069 MOVSXD 0x2a0(%RDI),%R12 |
0x420070 MOV 0xd8(%R14),%RAX |
0x420077 VMOVSD (%RAX,%R12,8),%XMM0 |
0x42007d VMOVSD %XMM0,0x48(%RSP) |
0x420083 MOV 0x290(%RDI),%RBX |
0x42008a MOV 0x248(%R14),%ESI |
0x420091 VZEROUPPER |
0x420094 CALL 460d00 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> |
0x420099 MOV 0xa0(%R14),%RCX |
0x4200a0 TEST %RCX,%RCX |
0x4200a3 MOV %R15,0x50(%RSP) |
0x4200a8 JE 420010 |
0x4200ae MOV 0x48(%RAX),%RAX |
0x4200b2 LEA (%R15,%R15,4),%RDX |
0x4200b6 MOV 0x18(%RAX,%RDX,8),%RSI |
0x4200bb MOV 0x18(%RBX),%RAX |
0x4200bf MOV (%RAX,%R12,4),%EAX |
0x4200c3 IMUL %ECX,%EAX |
0x4200c6 MOV 0x1d0(%R14),%RDX |
0x4200cd MOV 0x200(%R14),%RDI |
0x4200d4 MOV %RDI,0x80(%RSP) |
0x4200dc CLTQ |
0x4200de MOV %RAX,0x78(%RSP) |
0x4200e3 MOV 0x268(%RBX),%RAX |
0x4200ea DEC %RCX |
0x4200ed MOV 0x18(%RAX),%RAX |
0x4200f1 MOV %RAX,0x88(%RSP) |
0x4200f9 MOV (%RAX),%R10D |
0x4200fc MOV %RSI,0x68(%RSP) |
0x420101 LEA 0x38(%RSI),%RAX |
0x420105 MOV %RAX,0x60(%RSP) |
0x42010a MOV %R12,0x70(%RSP) |
0x42010f NEG %R12D |
0x420112 MOV %R12,0x58(%RSP) |
0x420117 VXORPD %XMM9,%XMM9,%XMM9 |
0x42011c XOR %R13D,%R13D |
0x42011f JMP 420141 |
(373) 0x420130 VADDSD %XMM0,%XMM9,%XMM9 |
(373) 0x420134 CMP %RCX,%R13 |
(373) 0x420137 LEA 0x1(%R13),%R13 |
(373) 0x42013b JE 420015 |
(373) 0x420141 MOV %R10D,%EAX |
(373) 0x420144 MOV 0x88(%RSP),%RSI |
(373) 0x42014c MOV 0x4(%RSI,%R13,4),%R10D |
(373) 0x420151 MOV %R10D,%ESI |
(373) 0x420154 SUB %EAX,%ESI |
(373) 0x420156 VXORPD %XMM0,%XMM0,%XMM0 |
(373) 0x42015a TEST %ESI,%ESI |
(373) 0x42015c JLE 420130 |
(373) 0x42015e MOV 0x78(%RSP),%RDI |
(373) 0x420163 ADD %R13,%RDI |
(373) 0x420166 MOV 0x80(%RSP),%R8 |
(373) 0x42016e MOV (%R8,%RDI,8),%R14 |
(373) 0x420172 VMOVSD 0x8(%R14),%XMM1 |
(373) 0x420178 MOV %ESI,%EBX |
(373) 0x42017a XOR %R15D,%R15D |
(373) 0x42017d MOVSXD %EAX,%R12 |
(373) 0x420180 CMP $0x8,%ESI |
(373) 0x420183 JAE 420610 |
(373) 0x420189 MOV %EBX,%ESI |
(373) 0x42018b AND $-0x8,%ESI |
(373) 0x42018e CMP %RBX,%RSI |
(373) 0x420191 JNE 420760 |
(373) 0x420197 VXORPD %XMM0,%XMM0,%XMM0 |
(373) 0x42019b TEST %R15D,%R15D |
(373) 0x42019e JLE 420130 |
(373) 0x4201a0 VMOVSD %XMM9,0x8(%RSP) |
(373) 0x4201a6 VMOVSD 0x238(%R14),%XMM10 |
(373) 0x4201af MOV 0x218(%R14),%RAX |
(373) 0x4201b6 VMOVSD 0x18(%R14),%XMM2 |
(373) 0x4201bc VMOVSD 0x20(%R14),%XMM3 |
(373) 0x4201c2 VMOVSD 0x28(%R14),%XMM4 |
(373) 0x4201c8 VMOVSD 0x30(%R14),%XMM0 |
(373) 0x4201ce VMOVUPS %XMM0,0x20(%RSP) |
(373) 0x4201d4 VMOVSD 0x38(%R14),%XMM8 |
(373) 0x4201da VMOVSD 0x40(%R14),%XMM13 |
(373) 0x4201e0 VMOVSD 0x48(%R14),%XMM7 |
(373) 0x4201e6 VMOVSD 0x50(%R14),%XMM6 |
(373) 0x4201ec VMOVSD 0x58(%R14),%XMM0 |
(373) 0x4201f2 VMOVSD 0x60(%R14),%XMM14 |
(373) 0x4201f8 VMOVSD 0x68(%R14),%XMM12 |
(373) 0x4201fe VMOVSD 0x70(%R14),%XMM11 |
(373) 0x420204 VMOVSD 0x78(%R14),%XMM9 |
(373) 0x42020a VMOVSD 0x80(%R14),%XMM5 |
(373) 0x420213 VMOVSD 0x88(%R14),%XMM1 |
(373) 0x42021c MOV %R15D,%R9D |
(373) 0x42021f MOV %R9,%R11 |
(373) 0x420222 VMOVSD 0x90(%R14),%XMM15 |
(373) 0x42022b VMOVUPS %XMM15,0x10(%RSP) |
(373) 0x420231 MOV $-0x4,%ESI |
(373) 0x420236 AND %RSI,%R11 |
(373) 0x420239 VMOVUPD %XMM1,0x170(%RSP) |
(373) 0x420242 VMOVUPD %XMM0,0x160(%RSP) |
(373) 0x42024b VMOVUPD %XMM9,0x150(%RSP) |
(373) 0x420254 VMOVUPD %XMM5,0x140(%RSP) |
(373) 0x42025d JE 4207a9 |
(373) 0x420263 VMOVUPD %XMM10,0x130(%RSP) |
(373) 0x42026c VBROADCASTSD %XMM10,%YMM10 |
(373) 0x420271 VMOVUPD %YMM10,0x320(%RSP) |
(373) 0x42027a VMOVUPD %XMM2,0xe0(%RSP) |
(373) 0x420283 VBROADCASTSD %XMM2,%YMM2 |
(373) 0x420288 VMOVUPD %YMM2,0x300(%RSP) |
(373) 0x420291 VMOVUPD %XMM3,0x100(%RSP) |
(373) 0x42029a VBROADCASTSD %XMM3,%YMM2 |
(373) 0x42029f VMOVUPD %YMM2,0x2e0(%RSP) |
(373) 0x4202a8 VMOVUPD %XMM4,0x120(%RSP) |
(373) 0x4202b1 VBROADCASTSD %XMM4,%YMM2 |
(373) 0x4202b6 VMOVUPD %YMM2,0x2c0(%RSP) |
(373) 0x4202bf VBROADCASTSD 0x20(%RSP),%YMM2 |
(373) 0x4202c6 VMOVUPS %YMM2,0x2a0(%RSP) |
(373) 0x4202cf VMOVUPD %XMM8,0xd0(%RSP) |
(373) 0x4202d8 VBROADCASTSD %XMM8,%YMM2 |
(373) 0x4202dd VMOVUPD %YMM2,0x280(%RSP) |
(373) 0x4202e6 VMOVUPD %XMM13,0xa0(%RSP) |
(373) 0x4202ef VBROADCASTSD %XMM13,%YMM2 |
(373) 0x4202f4 VMOVUPD %YMM2,0x260(%RSP) |
(373) 0x4202fd VMOVUPD %XMM7,0xf0(%RSP) |
(373) 0x420306 VBROADCASTSD %XMM7,%YMM2 |
(373) 0x42030b VMOVUPD %YMM2,0x240(%RSP) |
(373) 0x420314 VMOVUPD %XMM6,0x110(%RSP) |
(373) 0x42031d VBROADCASTSD %XMM6,%YMM2 |
(373) 0x420322 VMOVUPD %YMM2,0x220(%RSP) |
(373) 0x42032b VBROADCASTSD %XMM0,%YMM0 |
(373) 0x420330 VMOVUPD %YMM0,0x200(%RSP) |
(373) 0x420339 VMOVUPD %XMM14,0x90(%RSP) |
(373) 0x420342 VBROADCASTSD %XMM14,%YMM0 |
(373) 0x420347 VMOVUPD %YMM0,0x1e0(%RSP) |
(373) 0x420350 VMOVUPD %XMM12,0xb0(%RSP) |
(373) 0x420359 VBROADCASTSD %XMM12,%YMM2 |
(373) 0x42035e VMOVUPD %XMM11,0xc0(%RSP) |
(373) 0x420367 VBROADCASTSD %XMM11,%YMM15 |
(373) 0x42036c VBROADCASTSD %XMM9,%YMM3 |
(373) 0x420371 VBROADCASTSD %XMM5,%YMM0 |
(373) 0x420376 VBROADCASTSD %XMM1,%YMM1 |
(373) 0x42037b XOR %ESI,%ESI |
(373) 0x42037d VBROADCASTSD 0x10(%RSP),%YMM4 |
(373) 0x420384 VXORPD %XMM5,%XMM5,%XMM5 |
(373) 0x420388 VMOVDQU 0x1c0(%RSP),%YMM12 |
(373) 0x420391 VMOVDQU 0x1a0(%RSP),%YMM13 |
(373) 0x42039a VMOVDQU 0x180(%RSP),%YMM14 |
(373) 0x4203a3 NOPW %CS:(%RAX,%RAX,1) |
(375) 0x4203b0 VMOVUPD 0x320(%RSP),%YMM6 |
(375) 0x4203b9 VMULPD (%RDX,%RSI,8),%YMM6,%YMM6 |
(375) 0x4203be VCVTTPD2DQ %YMM6,%XMM7 |
(375) 0x4203c2 VROUNDPD $0xb,%YMM6,%YMM8 |
(375) 0x4203c8 VSUBPD %YMM8,%YMM6,%YMM6 |
(375) 0x4203cd VPMOVSXDQ %XMM7,%YMM7 |
(375) 0x4203d2 VPSLLQ $0x3,%YMM7,%YMM7 |
(375) 0x4203d7 VMOVQ %RAX,%XMM8 |
(375) 0x4203dc VPBROADCASTQ %XMM8,%YMM8 |
(375) 0x4203e1 VPADDQ %YMM7,%YMM8,%YMM7 |
(375) 0x4203e5 VMOVQ %XMM7,%RDI |
(375) 0x4203ea VEXTRACTI128 $0x1,%YMM7,%XMM8 |
(375) 0x4203f0 VMOVQ %XMM8,%R8 |
(375) 0x4203f5 VPEXTRQ $0x1,%XMM7,%RBX |
(375) 0x4203fb VPEXTRQ $0x1,%XMM8,%R14 |
(375) 0x420401 VMOVSD (%RDI),%XMM8 |
(375) 0x420405 VMOVSD (%R8),%XMM9 |
(375) 0x42040a VPADDQ %YMM7,%YMM14,%YMM10 |
(375) 0x42040e VMOVQ %XMM10,%RDI |
(375) 0x420413 VMOVHPD (%RBX),%XMM8,%XMM8 |
(375) 0x420417 VPEXTRQ $0x1,%XMM10,%R8 |
(375) 0x42041d VMOVHPD (%R14),%XMM9,%XMM9 |
(375) 0x420422 VEXTRACTI128 $0x1,%YMM10,%XMM10 |
(375) 0x420428 VPEXTRQ $0x1,%XMM10,%RBX |
(375) 0x42042e VINSERTF128 $0x1,%XMM9,%YMM8,%YMM8 |
(375) 0x420434 VMOVQ %XMM10,%R14 |
(375) 0x420439 VMOVSD (%RDI),%XMM9 |
(375) 0x42043d VPADDQ %YMM7,%YMM13,%YMM10 |
(375) 0x420441 VMOVSD (%R14),%XMM11 |
(375) 0x420446 VPEXTRQ $0x1,%XMM10,%RDI |
(375) 0x42044c VMOVHPD (%R8),%XMM9,%XMM9 |
(375) 0x420451 VMOVQ %XMM10,%R8 |
(375) 0x420456 VEXTRACTI128 $0x1,%YMM10,%XMM10 |
(375) 0x42045c VMOVHPD (%RBX),%XMM11,%XMM11 |
(375) 0x420460 VMOVQ %XMM10,%RBX |
(375) 0x420465 VPEXTRQ $0x1,%XMM10,%R14 |
(375) 0x42046b VINSERTF128 $0x1,%XMM11,%YMM9,%YMM10 |
(375) 0x420471 VMOVSD (%RBX),%XMM9 |
(375) 0x420475 VMOVSD (%R8),%XMM11 |
(375) 0x42047a VMOVHPD (%R14),%XMM9,%XMM9 |
(375) 0x42047f VMOVHPD (%RDI),%XMM11,%XMM11 |
(375) 0x420483 VINSERTF128 $0x1,%XMM9,%YMM11,%YMM9 |
(375) 0x420489 VPADDQ %YMM7,%YMM12,%YMM7 |
(375) 0x42048d VMOVQ %XMM7,%R8 |
(375) 0x420492 VPEXTRQ $0x1,%XMM7,%RBX |
(375) 0x420498 VEXTRACTI128 $0x1,%YMM7,%XMM7 |
(375) 0x42049e VMOVQ %XMM7,%R14 |
(375) 0x4204a3 VPEXTRQ $0x1,%XMM7,%RDI |
(375) 0x4204a9 VMOVAPD %YMM6,%YMM7 |
(375) 0x4204ad VMOVUPD 0x2e0(%RSP),%YMM11 |
(375) 0x4204b6 VFMADD132PD 0x300(%RSP),%YMM11,%YMM7 |
(375) 0x4204c0 VFMADD213PD 0x2c0(%RSP),%YMM6,%YMM7 |
(375) 0x4204ca VFMADD213PD 0x2a0(%RSP),%YMM6,%YMM7 |
(375) 0x4204d4 VFMADD213PD %YMM5,%YMM8,%YMM7 |
(375) 0x4204d9 VMOVAPD %YMM6,%YMM5 |
(375) 0x4204dd VMOVSD (%R14),%XMM8 |
(375) 0x4204e2 VMOVUPD 0x260(%RSP),%YMM11 |
(375) 0x4204eb VFMADD132PD 0x280(%RSP),%YMM11,%YMM5 |
(375) 0x4204f5 VFMADD213PD 0x240(%RSP),%YMM6,%YMM5 |
(375) 0x4204ff VFMADD213PD 0x220(%RSP),%YMM6,%YMM5 |
(375) 0x420509 VFMADD213PD %YMM7,%YMM10,%YMM5 |
(375) 0x42050e VMOVSD (%R8),%XMM7 |
(375) 0x420513 VMOVAPD %YMM6,%YMM10 |
(375) 0x420517 VMOVUPD 0x1e0(%RSP),%YMM11 |
(375) 0x420520 VFMADD132PD 0x200(%RSP),%YMM11,%YMM10 |
(375) 0x42052a VMOVHPD (%RBX),%XMM7,%XMM7 |
(375) 0x42052e VFMADD213PD %YMM2,%YMM6,%YMM10 |
(375) 0x420533 VFMADD213PD %YMM15,%YMM6,%YMM10 |
(375) 0x420538 VMOVHPD (%RDI),%XMM8,%XMM8 |
(375) 0x42053c VFMADD213PD %YMM5,%YMM9,%YMM10 |
(375) 0x420541 VINSERTF128 $0x1,%XMM8,%YMM7,%YMM7 |
(375) 0x420547 VMOVAPD %YMM6,%YMM5 |
(375) 0x42054b VFMADD213PD %YMM0,%YMM3,%YMM5 |
(375) 0x420550 VFMADD213PD %YMM1,%YMM6,%YMM5 |
(375) 0x420555 VFMADD213PD %YMM4,%YMM6,%YMM5 |
(375) 0x42055a VFMADD213PD %YMM10,%YMM7,%YMM5 |
(375) 0x42055f ADD $0x4,%RSI |
(375) 0x420563 CMP %R11,%RSI |
(375) 0x420566 JB 4203b0 |
(373) 0x42056c VEXTRACTF128 $0x1,%YMM5,%XMM0 |
(373) 0x420572 VADDPD %XMM0,%XMM5,%XMM0 |
(373) 0x420576 VSHUFPD $0x1,%XMM0,%XMM0,%XMM1 |
(373) 0x42057b VADDSD %XMM1,%XMM0,%XMM0 |
(373) 0x42057f CMP %R9,%R11 |
(373) 0x420582 VMOVSD 0x8(%RSP),%XMM9 |
(373) 0x420588 VMOVUPD 0x130(%RSP),%XMM10 |
(373) 0x420591 VMOVUPD 0x20(%RSP),%XMM15 |
(373) 0x420597 VMOVUPD 0x120(%RSP),%XMM4 |
(373) 0x4205a0 VMOVUPD 0x110(%RSP),%XMM6 |
(373) 0x4205a9 VMOVUPD 0x100(%RSP),%XMM3 |
(373) 0x4205b2 VMOVUPD 0xf0(%RSP),%XMM7 |
(373) 0x4205bb VMOVUPD 0xe0(%RSP),%XMM2 |
(373) 0x4205c4 VMOVUPD 0xd0(%RSP),%XMM8 |
(373) 0x4205cd VMOVUPD 0xc0(%RSP),%XMM11 |
(373) 0x4205d6 VMOVUPD 0xb0(%RSP),%XMM12 |
(373) 0x4205df VMOVUPD 0xa0(%RSP),%XMM13 |
(373) 0x4205e8 VMOVUPD 0x90(%RSP),%XMM14 |
(373) 0x4205f1 VMOVUPD 0x10(%RSP),%XMM5 |
(373) 0x4205f7 JE 420130 |
(373) 0x4205fd JMP 4207c2 |
(373) 0x420610 MOV 0x60(%RSP),%RSI |
(373) 0x420615 LEA (%RSI,%R12,8),%R11 |
(373) 0x420619 MOV %EBX,%R9D |
(373) 0x42061c AND $-0x8,%R9D |
(373) 0x420620 MOV 0x58(%RSP),%RSI |
(373) 0x420625 ADD %EAX,%ESI |
(373) 0x420627 XOR %EDI,%EDI |
(373) 0x420629 XOR %R15D,%R15D |
(373) 0x42062c JMP 42063d |
(377) 0x420630 ADD $0x8,%RDI |
(377) 0x420634 CMP %RDI,%R9 |
(377) 0x420637 JE 420189 |
(377) 0x42063d VMOVSD -0x38(%R11,%RDI,8),%XMM0 |
(377) 0x420644 VUCOMISD %XMM0,%XMM1 |
(377) 0x420648 JBE 42065e |
(377) 0x42064a MOV %ESI,%R8D |
(377) 0x42064d ADD %EDI,%R8D |
(377) 0x420650 JE 42065e |
(377) 0x420652 MOVSXD %R15D,%R15 |
(377) 0x420655 VMOVSD %XMM0,(%RDX,%R15,8) |
(377) 0x42065b INC %R15D |
(377) 0x42065e VMOVSD -0x30(%R11,%RDI,8),%XMM0 |
(377) 0x420665 VUCOMISD %XMM0,%XMM1 |
(377) 0x420669 JBE 420681 |
(377) 0x42066b LEA (%RSI,%RDI,1),%R8D |
(377) 0x42066f CMP $-0x1,%R8D |
(377) 0x420673 JE 420681 |
(377) 0x420675 MOVSXD %R15D,%R15 |
(377) 0x420678 VMOVSD %XMM0,(%RDX,%R15,8) |
(377) 0x42067e INC %R15D |
(377) 0x420681 VMOVSD -0x28(%R11,%RDI,8),%XMM0 |
(377) 0x420688 VUCOMISD %XMM0,%XMM1 |
(377) 0x42068c JBE 4206a4 |
(377) 0x42068e LEA (%RSI,%RDI,1),%R8D |
(377) 0x420692 CMP $-0x2,%R8D |
(377) 0x420696 JE 4206a4 |
(377) 0x420698 MOVSXD %R15D,%R15 |
(377) 0x42069b VMOVSD %XMM0,(%RDX,%R15,8) |
(377) 0x4206a1 INC %R15D |
(377) 0x4206a4 VMOVSD -0x20(%R11,%RDI,8),%XMM0 |
(377) 0x4206ab VUCOMISD %XMM0,%XMM1 |
(377) 0x4206af JBE 4206c7 |
(377) 0x4206b1 LEA (%RSI,%RDI,1),%R8D |
(377) 0x4206b5 CMP $-0x3,%R8D |
(377) 0x4206b9 JE 4206c7 |
(377) 0x4206bb MOVSXD %R15D,%R15 |
(377) 0x4206be VMOVSD %XMM0,(%RDX,%R15,8) |
(377) 0x4206c4 INC %R15D |
(377) 0x4206c7 VMOVSD -0x18(%R11,%RDI,8),%XMM0 |
(377) 0x4206ce VUCOMISD %XMM0,%XMM1 |
(377) 0x4206d2 JBE 4206ea |
(377) 0x4206d4 LEA (%RSI,%RDI,1),%R8D |
(377) 0x4206d8 CMP $-0x4,%R8D |
(377) 0x4206dc JE 4206ea |
(377) 0x4206de MOVSXD %R15D,%R15 |
(377) 0x4206e1 VMOVSD %XMM0,(%RDX,%R15,8) |
(377) 0x4206e7 INC %R15D |
(377) 0x4206ea VMOVSD -0x10(%R11,%RDI,8),%XMM0 |
(377) 0x4206f1 VUCOMISD %XMM0,%XMM1 |
(377) 0x4206f5 JBE 42070d |
(377) 0x4206f7 LEA (%RSI,%RDI,1),%R8D |
(377) 0x4206fb CMP $-0x5,%R8D |
(377) 0x4206ff JE 42070d |
(377) 0x420701 MOVSXD %R15D,%R15 |
(377) 0x420704 VMOVSD %XMM0,(%RDX,%R15,8) |
(377) 0x42070a INC %R15D |
(377) 0x42070d VMOVSD -0x8(%R11,%RDI,8),%XMM0 |
(377) 0x420714 VUCOMISD %XMM0,%XMM1 |
(377) 0x420718 JBE 420730 |
(377) 0x42071a LEA (%RSI,%RDI,1),%R8D |
(377) 0x42071e CMP $-0x6,%R8D |
(377) 0x420722 JE 420730 |
(377) 0x420724 MOVSXD %R15D,%R15 |
(377) 0x420727 VMOVSD %XMM0,(%RDX,%R15,8) |
(377) 0x42072d INC %R15D |
(377) 0x420730 VMOVSD (%R11,%RDI,8),%XMM0 |
(377) 0x420736 VUCOMISD %XMM0,%XMM1 |
(377) 0x42073a JBE 420630 |
(377) 0x420740 LEA (%RSI,%RDI,1),%R8D |
(377) 0x420744 CMP $-0x7,%R8D |
(377) 0x420748 JE 420630 |
(377) 0x42074e MOVSXD %R15D,%R15 |
(377) 0x420751 VMOVSD %XMM0,(%RDX,%R15,8) |
(377) 0x420757 INC %R15D |
(377) 0x42075a JMP 420630 |
(373) 0x420760 MOV 0x70(%RSP),%RDI |
(373) 0x420765 SUB %EAX,%EDI |
(373) 0x420767 MOV 0x68(%RSP),%RAX |
(373) 0x42076c LEA (%RAX,%R12,8),%RAX |
(373) 0x420770 JMP 42078c |
(376) 0x420780 INC %RSI |
(376) 0x420783 CMP %RSI,%RBX |
(376) 0x420786 JE 420197 |
(376) 0x42078c VMOVSD (%RAX,%RSI,8),%XMM0 |
(376) 0x420791 VUCOMISD %XMM0,%XMM1 |
(376) 0x420795 JBE 420780 |
(376) 0x420797 CMP %ESI,%EDI |
(376) 0x420799 JE 420780 |
(376) 0x42079b MOVSXD %R15D,%R15 |
(376) 0x42079e VMOVSD %XMM0,(%RDX,%R15,8) |
(376) 0x4207a4 INC %R15D |
(376) 0x4207a7 JMP 420780 |
(373) 0x4207a9 VXORPD %XMM0,%XMM0,%XMM0 |
(373) 0x4207ad XOR %R11D,%R11D |
(373) 0x4207b0 VMOVSD 0x8(%RSP),%XMM9 |
(373) 0x4207b6 VMOVUPD 0x10(%RSP),%XMM5 |
(373) 0x4207bc VMOVUPD 0x20(%RSP),%XMM15 |
(373) 0x4207c2 VMOVUPD 0x160(%RSP),%XMM1 |
(373) 0x4207cb VUNPCKLPD 0x140(%RSP),%XMM1,%XMM1 |
(373) 0x4207d4 VUNPCKLPD %XMM13,%XMM2,%XMM2 |
(373) 0x4207d9 VINSERTF128 $0x1,%XMM1,%YMM2,%YMM1 |
(373) 0x4207df VUNPCKLPD 0x150(%RSP),%XMM14,%XMM2 |
(373) 0x4207e8 VUNPCKLPD %XMM8,%XMM3,%XMM3 |
(373) 0x4207ed VINSERTF128 $0x1,%XMM2,%YMM3,%YMM2 |
(373) 0x4207f3 VUNPCKLPD 0x170(%RSP),%XMM12,%XMM3 |
(373) 0x4207fc VUNPCKLPD %XMM7,%XMM4,%XMM4 |
(373) 0x420800 VINSERTF128 $0x1,%XMM3,%YMM4,%YMM3 |
(373) 0x420806 VUNPCKLPD %XMM5,%XMM11,%XMM4 |
(373) 0x42080a VUNPCKLPD %XMM6,%XMM15,%XMM5 |
(373) 0x42080e VINSERTF128 $0x1,%XMM4,%YMM5,%YMM4 |
(373) 0x420814 NOPW %CS:(%RAX,%RAX,1) |
(374) 0x420820 VMULSD (%RDX,%R11,8),%XMM10,%XMM5 |
(374) 0x420826 VROUNDSD $0xb,%XMM5,%XMM5,%XMM6 |
(374) 0x42082c VSUBSD %XMM6,%XMM5,%XMM6 |
(374) 0x420830 VMULSD %XMM6,%XMM6,%XMM7 |
(374) 0x420834 VMULSD %XMM6,%XMM7,%XMM8 |
(374) 0x420838 VUNPCKLPD %XMM8,%XMM7,%XMM7 |
(374) 0x42083d VPERMPD $0x44,%YMM7,%YMM8 |
(374) 0x420843 VCVTTSD2SI %XMM5,%ESI |
(374) 0x420847 VFMADD213PD %YMM4,%YMM2,%YMM8 |
(374) 0x42084c VPERMPD $0x11,%YMM7,%YMM5 |
(374) 0x420852 VFMADD213PD %YMM8,%YMM1,%YMM5 |
(374) 0x420857 MOVSXD %ESI,%RSI |
(374) 0x42085a VBROADCASTSD %XMM6,%YMM6 |
(374) 0x42085f VFMADD213PD %YMM5,%YMM3,%YMM6 |
(374) 0x420864 VMULPD (%RAX,%RSI,8),%YMM6,%YMM5 |
(374) 0x420869 VEXTRACTF128 $0x1,%YMM5,%XMM6 |
(374) 0x42086f VADDPD %XMM6,%XMM5,%XMM5 |
(374) 0x420873 VSHUFPD $0x1,%XMM5,%XMM5,%XMM6 |
(374) 0x420878 VADDSD %XMM6,%XMM5,%XMM5 |
(374) 0x42087c VADDSD %XMM5,%XMM0,%XMM0 |
(374) 0x420880 INC %R11 |
(374) 0x420883 CMP %R11,%R9 |
(374) 0x420886 JNE 420820 |
(373) 0x420888 JMP 420130 |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/refwrap.h: 313 - 313 |
-------------------------------------------------------------------------------- |
313: { return *_M_data; } |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/shared_ptr_base.h: 1308 - 1308 |
-------------------------------------------------------------------------------- |
1308: { return _M_ptr; } |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 249 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
[...] |
249: inline const_pointer data() const { return X; } |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/optional: 897 - 1213 |
-------------------------------------------------------------------------------- |
897: { return this->_M_payload._M_engaged; } |
[...] |
1213: return this->_M_is_engaged() |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 236 - 260 |
-------------------------------------------------------------------------------- |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 107 - 132 |
-------------------------------------------------------------------------------- |
107: for (int k = 0; k < ratios.size(); ++k) |
108: ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.getRefPS(), VP.refPtcl, VP.getDistTableAB(myTableID).getDistRow(k).data())); |
[...] |
126: const int igt = P.GroupID[iat] * NumGroups; |
127: for (int jg = 0; jg < NumGroups; ++jg) |
128: { |
129: const FuncType& f2(*F[igt + jg]); |
130: int iStart = P.first(jg); |
131: int iEnd = P.last(jg); |
132: curUat += f2.evaluateV(iat, iStart, iEnd, dist, DistCompressed.data()); |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_vector.h: 806 - 1056 |
-------------------------------------------------------------------------------- |
806: { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } |
[...] |
933: return *(this->_M_impl._M_start + __n); |
[...] |
951: return *(this->_M_impl._M_start + __n); |
[...] |
1056: { return _M_data_ptr(this->_M_impl._M_start); } |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 316 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
314: |
315: ///return the last index of a group i |
316: inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | qmcplusplus::WaveFunction::eva[...] | stl_vector.h:806 | exec |
○ | qmcplusplus::NonLocalPP<double[...] | NonLocalPP.hpp:135 | exec |
○ | main.extracted.110 | NewTimer.h:249 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.86 |
CQA speedup if FP arith vectorized | 1.95 |
CQA speedup if fully vectorized | 15.61 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.30 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | refwrap.h:313-313,shared_ptr_base.h:1308-1308,OhmmsVector.h:223-223,OhmmsVector.h:229-229,OhmmsVector.h:249-249,optional:897-897,optional:1213-1213,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127,stl_vector.h:806-806,stl_vector.h:933-933,stl_vector.h:951-951,stl_vector.h:1056-1056,ParticleSet.h:313-313 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 10.00 |
CQA cycles if no scalar integer | 3.50 |
CQA cycles if FP arith vectorized | 5.13 |
CQA cycles if fully vectorized | 0.64 |
Front-end cycles | 10.00 |
DIV/SQRT cycles | 2.40 |
P0 cycles | 2.50 |
P1 cycles | 7.67 |
P2 cycles | 7.67 |
P3 cycles | 6.00 |
P4 cycles | 2.30 |
P5 cycles | 2.40 |
P6 cycles | 6.00 |
P7 cycles | 6.00 |
P8 cycles | 6.00 |
P9 cycles | 2.40 |
P10 cycles | 7.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 10.83 |
Stall cycles (UFS) | 1.27 |
Nb insns | 56.00 |
Nb uops | 60.00 |
Nb loads | 23.00 |
Nb stores | 10.00 |
Nb stack references | 12.00 |
FLOP/cycle | 0.10 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 24.10 |
Bytes prefetched | 0.00 |
Bytes loaded | 161.00 |
Bytes stored | 80.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 17.39 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 44.44 |
Vector-efficiency ratio all | 13.65 |
Vector-efficiency ratio load | 8.20 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 16.15 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.86 |
CQA speedup if FP arith vectorized | 1.95 |
CQA speedup if fully vectorized | 15.61 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.30 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | refwrap.h:313-313,shared_ptr_base.h:1308-1308,OhmmsVector.h:223-223,OhmmsVector.h:229-229,OhmmsVector.h:249-249,optional:897-897,optional:1213-1213,TwoBodyJastrowRef.h:107-108,TwoBodyJastrowRef.h:126-127,stl_vector.h:806-806,stl_vector.h:933-933,stl_vector.h:951-951,stl_vector.h:1056-1056,ParticleSet.h:313-313 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 10.00 |
CQA cycles if no scalar integer | 3.50 |
CQA cycles if FP arith vectorized | 5.13 |
CQA cycles if fully vectorized | 0.64 |
Front-end cycles | 10.00 |
DIV/SQRT cycles | 2.40 |
P0 cycles | 2.50 |
P1 cycles | 7.67 |
P2 cycles | 7.67 |
P3 cycles | 6.00 |
P4 cycles | 2.30 |
P5 cycles | 2.40 |
P6 cycles | 6.00 |
P7 cycles | 6.00 |
P8 cycles | 6.00 |
P9 cycles | 2.40 |
P10 cycles | 7.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 10.83 |
Stall cycles (UFS) | 1.27 |
Nb insns | 56.00 |
Nb uops | 60.00 |
Nb loads | 23.00 |
Nb stores | 10.00 |
Nb stack references | 12.00 |
FLOP/cycle | 0.10 |
Nb FLOP add-sub | 1.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 24.10 |
Bytes prefetched | 0.00 |
Bytes loaded | 161.00 |
Bytes stored | 80.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 17.39 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 44.44 |
Vector-efficiency ratio all | 13.65 |
Vector-efficiency ratio load | 8.20 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 16.15 |
Path / |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 56 |
nb uops | 60 |
loop length | 273 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 10.00 cycles |
front end | 10.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.40 | 2.50 | 7.67 | 7.67 | 6.00 | 2.30 | 2.40 | 6.00 | 6.00 | 6.00 | 2.40 | 7.67 |
cycles | 2.40 | 2.50 | 7.67 | 7.67 | 6.00 | 2.30 | 2.40 | 6.00 | 6.00 | 6.00 | 2.40 | 7.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.83 |
Stall cycles | 1.27 |
LM full (events) | 3.09 |
Front-end | 10.00 |
Dispatch | 7.67 |
Overall L1 | 10.00 |
all | 12% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 28% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 17% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 44% |
all | 12% |
load | 3% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 8% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x48(%RSP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM9,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4d6610 <exp> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x40(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RAX,%R15,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
CMP %R15,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JBE 42088d <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x8fd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPB $0,0x298(%RDI) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 42089c <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x90c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2a0(%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%R12,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x290(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x248(%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 460d00 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0xa0(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV %R15,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 420010 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R15,%R15,4),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R12,4),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %ECX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x1d0(%R14),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x200(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x268(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x38(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 420141 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1b1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | TwoBodyJastrowRef.h:107-132 |
Module | exec |
nb instructions | 56 |
nb uops | 60 |
loop length | 273 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 10.00 cycles |
front end | 10.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.40 | 2.50 | 7.67 | 7.67 | 6.00 | 2.30 | 2.40 | 6.00 | 6.00 | 6.00 | 2.40 | 7.67 |
cycles | 2.40 | 2.50 | 7.67 | 7.67 | 6.00 | 2.30 | 2.40 | 6.00 | 6.00 | 6.00 | 2.40 | 7.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.83 |
Stall cycles | 1.27 |
LM full (events) | 3.09 |
Front-end | 10.00 |
Dispatch | 7.67 |
Overall L1 | 10.00 |
all | 12% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 28% |
all | 28% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 17% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 44% |
all | 12% |
load | 3% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 16% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 13% |
load | 8% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x48(%RSP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSUBSD %XMM9,%XMM0,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4d6610 <exp> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x40(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,(%RAX,%R15,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
CMP %R15,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JBE 42088d <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x8fd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMPB $0,0x298(%RDI) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 42089c <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x90c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2a0(%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd8(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%RAX,%R12,8),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM0,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x290(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x248(%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 460d00 <_ZNK11qmcplusplus11ParticleSet14getDistTableABEi> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0xa0(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV %R15,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 420010 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x48(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R15,%R15,4),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R12,4),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %ECX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x1d0(%R14),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x200(%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x268(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x18(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x38(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 420141 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1b1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |