Function: _ZN6Kripke12ParallelComm12testRecievesEv | Module: exec | Source: ParallelComm.cpp:214-251 [...] | Coverage: 0.01% |
---|
Function: _ZN6Kripke12ParallelComm12testRecievesEv | Module: exec | Source: ParallelComm.cpp:214-251 [...] | Coverage: 0.01% |
---|
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_construct.h: 119 - 119 |
-------------------------------------------------------------------------------- |
119: ::new((void*)__p) _Tp(std::forward<_Args>(__args)...); |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/ext/new_allocator.h: 127 - 145 |
-------------------------------------------------------------------------------- |
127: return static_cast<_Tp*>(::operator new(__n * sizeof(_Tp))); |
[...] |
145: ::operator delete(__p |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 919 - 1046 |
-------------------------------------------------------------------------------- |
919: { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } |
[...] |
1046: return *(this->_M_impl._M_start + __n); |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_algobase.h: 430 - 1115 |
-------------------------------------------------------------------------------- |
430: if (_Num) |
431: __builtin_memmove(__result, __first, sizeof(_Tp) * _Num); |
[...] |
911: for (; __first != __last; ++__first) |
912: *__first = __value; |
[...] |
1115: if (__n <= 0) |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_iterator.h: 1028 - 1182 |
-------------------------------------------------------------------------------- |
1028: : _M_current(__i) { } |
[...] |
1091: { return __normal_iterator(_M_current + __n); } |
[...] |
1182: { return __lhs.base() != __rhs.base(); } |
/home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/build/Kripke/src/Kripke/ParallelComm.cpp: 214 - 251 |
-------------------------------------------------------------------------------- |
214: void ParallelComm::testRecieves(void){ |
215: #ifdef KRIPKE_USE_MPI |
216: // Check for any recv requests that have completed |
217: int num_requests = recv_requests.size(); |
218: bool done = false; |
219: while(!done && num_requests > 0){ |
220: // Create array of status variables |
221: std::vector<MPI_Status> recv_status(num_requests); |
222: |
223: // Ask if either one or none of the recvs have completed? |
224: int index; // this will be the index of request that completed |
225: int complete_flag; // this is set to TRUE if somthing completed |
226: MPI_Testany(num_requests, &recv_requests[0], &index, &complete_flag, &recv_status[0]); |
227: |
228: if(complete_flag != 0){ |
229: |
230: // get subdomain that this completed for |
231: int sdom_id = recv_subdomains[index]; |
232: |
233: // remove the request from the list |
234: recv_requests.erase(recv_requests.begin()+index); |
235: recv_subdomains.erase(recv_subdomains.begin()+index); |
236: num_requests --; |
237: |
238: // decrement the dependency count for that subdomain |
239: for(size_t i = 0;i < queue_sdom_ids.size();++ i){ |
240: if(queue_sdom_ids[i] == sdom_id){ |
241: queue_depends[i] --; |
[...] |
251: } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/vector.tcc: 174 - 176 |
-------------------------------------------------------------------------------- |
174: if (__position + 1 != end()) |
175: _GLIBCXX_MOVE3(__position + 1, end(), __position); |
176: --this->_M_impl._M_finish; |
0x4987b0 PUSH %RBP |
0x4987b1 MOV %RSP,%RBP |
0x4987b4 PUSH %R15 |
0x4987b6 PUSH %R14 |
0x4987b8 PUSH %R13 |
0x4987ba PUSH %R12 |
0x4987bc PUSH %RBX |
0x4987bd SUB $0x18,%RSP |
0x4987c1 MOV 0x30(%RDI),%R12 |
0x4987c5 SUB 0x28(%RDI),%R12 |
0x4987c9 SHR $0x2,%R12 |
0x4987cd TEST %R12D,%R12D |
0x4987d0 JLE 498b4f |
0x4987d6 MOV %RDI,%RBX |
0x4987d9 XOR %R14D,%R14D |
0x4987dc NOPL (%RAX) |
(1451) 0x4987e0 MOV %R14,-0x30(%RBP) |
(1451) 0x4987e4 MOV %R12D,%R14D |
(1451) 0x4987e7 LEA (,%R14,4),%RAX |
(1451) 0x4987ef LEA (%RAX,%RAX,4),%R15 |
(1451) 0x4987f3 MOV %R15,%RDI |
(1451) 0x4987f6 CALL 403260 <_Znwm@plt> |
(1451) 0x4987fb MOV %RAX,%R13 |
(1451) 0x4987fe VPXOR %XMM0,%XMM0,%XMM0 |
(1451) 0x498802 VMOVDQU %XMM0,(%RAX) |
(1451) 0x498806 MOVL $0,0x10(%RAX) |
(1451) 0x49880d CMP $0x1,%R12D |
(1451) 0x498811 JE 49894a |
(1451) 0x498817 DEC %R14 |
(1451) 0x49881a CMP $0x8,%R14 |
(1451) 0x49881e JB 4988d8 |
(1451) 0x498824 MOV %R14,%RAX |
(1451) 0x498827 SHR $0x3,%RAX |
(1451) 0x49882b LEA 0x14(%R13),%RCX |
(1451) 0x49882f NOP |
(1457) 0x498830 MOV 0x10(%R13),%EDX |
(1457) 0x498834 MOV %EDX,0x10(%RCX) |
(1457) 0x498837 VMOVUPS (%R13),%XMM0 |
(1457) 0x49883d VMOVUPS %XMM0,(%RCX) |
(1457) 0x498841 MOV 0x10(%R13),%EDX |
(1457) 0x498845 MOV %EDX,0x24(%RCX) |
(1457) 0x498848 VMOVUPS (%R13),%XMM0 |
(1457) 0x49884e VMOVUPS %XMM0,0x14(%RCX) |
(1457) 0x498853 MOV 0x10(%R13),%EDX |
(1457) 0x498857 MOV %EDX,0x38(%RCX) |
(1457) 0x49885a VMOVUPS (%R13),%XMM0 |
(1457) 0x498860 VMOVUPS %XMM0,0x28(%RCX) |
(1457) 0x498865 MOV 0x10(%R13),%EDX |
(1457) 0x498869 MOV %EDX,0x4c(%RCX) |
(1457) 0x49886c VMOVUPS (%R13),%XMM0 |
(1457) 0x498872 VMOVUPS %XMM0,0x3c(%RCX) |
(1457) 0x498877 MOV 0x10(%R13),%EDX |
(1457) 0x49887b MOV %EDX,0x60(%RCX) |
(1457) 0x49887e VMOVUPS (%R13),%XMM0 |
(1457) 0x498884 VMOVUPS %XMM0,0x50(%RCX) |
(1457) 0x498889 VMOVUPS (%R13),%XMM0 |
(1457) 0x49888f VMOVUPS %XMM0,0x64(%RCX) |
(1457) 0x498894 MOV 0x10(%R13),%EDX |
(1457) 0x498898 MOV %EDX,0x74(%RCX) |
(1457) 0x49889b VMOVUPS (%R13),%XMM0 |
(1457) 0x4988a1 VMOVUPS %XMM0,0x78(%RCX) |
(1457) 0x4988a6 MOV 0x10(%R13),%EDX |
(1457) 0x4988aa MOV %EDX,0x88(%RCX) |
(1457) 0x4988b0 MOV 0x10(%R13),%EDX |
(1457) 0x4988b4 MOV %EDX,0x9c(%RCX) |
(1457) 0x4988ba VMOVDQU (%R13),%XMM0 |
(1457) 0x4988c0 VMOVDQU %XMM0,0x8c(%RCX) |
(1457) 0x4988c8 ADD $0xa0,%RCX |
(1457) 0x4988cf DEC %RAX |
(1457) 0x4988d2 JNE 498830 |
(1451) 0x4988d8 MOV %R14,%RCX |
(1451) 0x4988db AND $-0x8,%RCX |
(1451) 0x4988df LEA -0x14(%R15),%RDX |
(1451) 0x4988e3 MOV $-0x3333333333333333,%RAX |
(1451) 0x4988ed MULX %RAX,%RAX,%RAX |
(1451) 0x4988f2 SHR $0x4,%RAX |
(1451) 0x4988f6 CMP %RAX,%RCX |
(1451) 0x4988f9 JAE 49894a |
(1451) 0x4988fb ADD $-0x28,%R15 |
(1451) 0x4988ff MOV %R15,%RDX |
(1451) 0x498902 MOV $-0x3333333333333333,%RAX |
(1451) 0x49890c MULX %RAX,%RAX,%RAX |
(1451) 0x498911 SHR $0x4,%RAX |
(1451) 0x498915 SUB %RCX,%RAX |
(1451) 0x498918 INC %RAX |
(1451) 0x49891b SHR $0x3,%R14 |
(1451) 0x49891f LEA (%R14,%R14,4),%RCX |
(1451) 0x498923 SAL $0x5,%RCX |
(1451) 0x498927 LEA 0x14(%R13,%RCX,1),%RCX |
(1451) 0x49892c NOPL (%RAX) |
(1456) 0x498930 MOV 0x10(%R13),%EDX |
(1456) 0x498934 MOV %EDX,0x10(%RCX) |
(1456) 0x498937 VMOVDQU (%R13),%XMM0 |
(1456) 0x49893d VMOVDQU %XMM0,(%RCX) |
(1456) 0x498941 ADD $0x14,%RCX |
(1456) 0x498945 DEC %RAX |
(1456) 0x498948 JNE 498930 |
(1451) 0x49894a MOV 0x28(%RBX),%RSI |
(1451) 0x49894e MOV %R12D,%EDI |
(1451) 0x498951 LEA -0x34(%RBP),%RDX |
(1451) 0x498955 LEA -0x38(%RBP),%RCX |
(1451) 0x498959 MOV %R13,%R8 |
(1451) 0x49895c CALL 4033c0 <MPI_Testany@plt> |
(1451) 0x498961 MOV $0x1,%EAX |
(1451) 0x498966 CMPL $0,-0x38(%RBP) |
(1451) 0x49896a JE 4989b0 |
(1451) 0x49896c MOVSXD -0x34(%RBP),%RAX |
(1451) 0x498970 MOV 0x40(%RBX),%RCX |
(1451) 0x498974 MOV (%RCX,%RAX,4),%R15D |
(1451) 0x498978 MOV 0x28(%RBX),%R8 |
(1451) 0x49897c MOV 0x30(%RBX),%RDI |
(1451) 0x498980 LEA 0x4(%R8,%RAX,4),%RSI |
(1451) 0x498985 CMP %RDI,%RSI |
(1451) 0x498988 JE 4989bb |
(1451) 0x49898a MOV %RDI,%RDX |
(1451) 0x49898d SUB %RSI,%RDX |
(1451) 0x498990 JE 4989b8 |
(1451) 0x498992 LEA (%R8,%RAX,4),%RDI |
(1451) 0x498996 CALL 4034a0 <memmove@plt> |
(1451) 0x49899b MOV 0x30(%RBX),%RSI |
(1451) 0x49899f MOV 0x40(%RBX),%RCX |
(1451) 0x4989a3 MOVSXD -0x34(%RBP),%RAX |
(1451) 0x4989a7 JMP 4989bb |
0x4989a9 NOPL (%RAX) |
(1451) 0x4989b0 MOV %EAX,%R14D |
(1451) 0x4989b3 JMP 498b35 |
(1451) 0x4989b8 MOV %RDI,%RSI |
(1451) 0x4989bb ADD $-0x4,%RSI |
(1451) 0x4989bf MOV %RSI,0x30(%RBX) |
(1451) 0x4989c3 LEA 0x4(%RCX,%RAX,4),%RSI |
(1451) 0x4989c8 MOV 0x48(%RBX),%RDI |
(1451) 0x4989cc CMP %RDI,%RSI |
(1451) 0x4989cf JE 4989f0 |
(1451) 0x4989d1 MOV %RDI,%RDX |
(1451) 0x4989d4 SUB %RSI,%RDX |
(1451) 0x4989d7 JE 4989f3 |
(1451) 0x4989d9 LEA (%RCX,%RAX,4),%RDI |
(1451) 0x4989dd CALL 4034a0 <memmove@plt> |
(1451) 0x4989e2 MOV 0x48(%RBX),%RDI |
(1451) 0x4989e6 JMP 4989f3 |
0x4989e8 NOPL (%RAX,%RAX,1) |
(1451) 0x4989f0 MOV %RSI,%RDI |
(1451) 0x4989f3 ADD $-0x4,%RDI |
(1451) 0x4989f7 MOV %RDI,0x48(%RBX) |
(1451) 0x4989fb DEC %R12D |
(1451) 0x4989fe MOV 0x58(%RBX),%RAX |
(1451) 0x498a02 MOV 0x60(%RBX),%RCX |
(1451) 0x498a06 SUB %RAX,%RCX |
(1451) 0x498a09 JE 498b31 |
(1451) 0x498a0f SAR $0x2,%RCX |
(1451) 0x498a13 CMP $0x1,%RCX |
(1451) 0x498a17 ADC $0,%RCX |
(1451) 0x498a1b MOV %EAX,%EDX |
(1451) 0x498a1d AND $0x3f,%EDX |
(1451) 0x498a20 MOV $0x40,%EDI |
(1451) 0x498a25 SUB %EDX,%EDI |
(1451) 0x498a27 SHR $0x2,%EDI |
(1451) 0x498a2a CMP %RDI,%RCX |
(1451) 0x498a2d MOV %RDI,%RDX |
(1451) 0x498a30 CMOVB %RCX,%RDX |
(1451) 0x498a34 TEST %RDX,%RDX |
(1451) 0x498a37 JE 498aa0 |
(1451) 0x498a39 CMP $0x1,%RDX |
(1451) 0x498a3d JE 498a6e |
(1451) 0x498a3f MOV %RDX,%R8 |
(1451) 0x498a42 SHR $0x1,%R8 |
(1451) 0x498a45 MOV $0x1,%ESI |
(1451) 0x498a4a NOPW (%RAX,%RAX,1) |
(1455) 0x498a50 CMP %R15D,-0x4(%RAX,%RSI,4) |
(1455) 0x498a55 JE 498b10 |
(1455) 0x498a5b CMP %R15D,(%RAX,%RSI,4) |
(1455) 0x498a5f JE 498b2a |
(1455) 0x498a65 ADD $0x2,%RSI |
(1455) 0x498a69 DEC %R8 |
(1455) 0x498a6c JNE 498a50 |
(1451) 0x498a6e MOV %EDX,%ESI |
(1451) 0x498a70 AND $0x1e,%ESI |
(1451) 0x498a73 CMP %RDX,%RSI |
(1451) 0x498a76 JAE 498aa0 |
(1451) 0x498a78 NOPL (%RAX,%RAX,1) |
(1454) 0x498a80 CMP %R15D,(%RAX,%RSI,4) |
(1454) 0x498a84 JE 498b2a |
(1454) 0x498a8a INC %RSI |
(1454) 0x498a8d CMP %RSI,%RDX |
(1454) 0x498a90 JNE 498a80 |
(1451) 0x498a92 NOPW %CS:(%RAX,%RAX,1) |
(1451) 0x498aa0 CMP %RDI,%RCX |
(1451) 0x498aa3 JBE 498b31 |
(1451) 0x498aa9 MOV %RCX,%RDI |
(1451) 0x498aac SUB %RDX,%RDI |
(1451) 0x498aaf MOV %RDI,%RSI |
(1451) 0x498ab2 AND $-0x10,%RSI |
(1451) 0x498ab6 JE 498aee |
(1451) 0x498ab8 LEA -0x1(%RSI),%R9 |
(1451) 0x498abc VPBROADCASTD %R15D,%YMM0 |
(1451) 0x498ac2 LEA (%RAX,%RDX,4),%R10 |
(1451) 0x498ac6 XOR %R8D,%R8D |
(1451) 0x498ac9 NOPL (%RAX) |
(1453) 0x498ad0 VPCMPEQD (%R10,%R8,4),%YMM0,%K0 |
(1453) 0x498ad7 VPCMPEQD 0x20(%R10,%R8,4),%YMM0,%K1 |
(1453) 0x498adf KORTESTB %K1,%K0 |
(1453) 0x498ae3 JNE 498b15 |
(1453) 0x498ae5 ADD $0x10,%R8 |
(1453) 0x498ae9 CMP %R9,%R8 |
(1453) 0x498aec JBE 498ad0 |
(1451) 0x498aee CMP %RDI,%RSI |
(1451) 0x498af1 JAE 498b31 |
(1451) 0x498af3 ADD %RSI,%RDX |
(1451) 0x498af6 MOV %RDX,%RSI |
(1451) 0x498af9 NOPL (%RAX) |
(1452) 0x498b00 CMP %R15D,(%RAX,%RSI,4) |
(1452) 0x498b04 JE 498b2a |
(1452) 0x498b06 INC %RSI |
(1452) 0x498b09 CMP %RSI,%RCX |
(1452) 0x498b0c JNE 498b00 |
(1451) 0x498b0e JMP 498b31 |
(1451) 0x498b10 DEC %RSI |
(1451) 0x498b13 JMP 498b2a |
(1451) 0x498b15 KUNPCKBW %K0,%K1,%K0 |
(1451) 0x498b19 KMOVD %K0,%EAX |
(1451) 0x498b1d TZCNT %EAX,%EAX |
(1451) 0x498b21 ADD %R8,%RDX |
(1451) 0x498b24 ADD %RAX,%RDX |
(1451) 0x498b27 MOV %RDX,%RSI |
(1451) 0x498b2a MOV 0x70(%RBX),%RAX |
(1451) 0x498b2e DECL (%RAX,%RSI,4) |
(1451) 0x498b31 MOV -0x30(%RBP),%R14 |
(1451) 0x498b35 MOV %R13,%RDI |
(1451) 0x498b38 VZEROUPPER |
(1451) 0x498b3b CALL 403250 <_ZdlPv@plt> |
(1451) 0x498b40 TEST $0x1,%R14B |
(1451) 0x498b44 JNE 498b4f |
(1451) 0x498b46 TEST %R12D,%R12D |
(1451) 0x498b49 JG 4987e0 |
0x498b4f ADD $0x18,%RSP |
0x498b53 POP %RBX |
0x498b54 POP %R12 |
0x498b56 POP %R13 |
0x498b58 POP %R14 |
0x498b5a POP %R15 |
0x498b5c POP %RBP |
0x498b5d RET |
0x498b5e MOV %RAX,%RBX |
0x498b61 MOV %R13,%RDI |
0x498b64 CALL 403250 <_ZdlPv@plt> |
0x498b69 MOV %RBX,%RDI |
0x498b6c CALL 4034f0 <_Unwind_Resume@plt> |
0x498b71 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | ParallelComm.cpp:214-251 |
Module | exec |
nb instructions | 32 |
nb uops | 34 |
loop length | 112 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 5.67 cycles |
front end | 5.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 0.80 | 3.00 | 3.00 | 4.00 | 0.60 | 1.50 | 4.00 | 4.00 | 4.00 | 0.60 | 3.00 |
cycles | 1.50 | 0.80 | 3.00 | 3.00 | 4.00 | 0.60 | 1.50 | 4.00 | 4.00 | 4.00 | 0.60 | 3.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.43-5.44 |
Stall cycles | 0.00 |
Front-end | 5.67 |
Dispatch | 4.00 |
Overall L1 | 5.67 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x18,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB 0x28(%RDI),%R12 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
SHR $0x2,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
TEST %R12D,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 498b4f <_ZN6Kripke12ParallelComm12testRecievesEv+0x39f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x18,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403250 <_ZdlPv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4034f0 <_Unwind_Resume@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | ParallelComm.cpp:214-251 |
Module | exec |
nb instructions | 32 |
nb uops | 34 |
loop length | 112 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 5.67 cycles |
front end | 5.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 0.80 | 3.00 | 3.00 | 4.00 | 0.60 | 1.50 | 4.00 | 4.00 | 4.00 | 0.60 | 3.00 |
cycles | 1.50 | 0.80 | 3.00 | 3.00 | 4.00 | 0.60 | 1.50 | 4.00 | 4.00 | 4.00 | 0.60 | 3.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.43-5.44 |
Stall cycles | 0.00 |
Front-end | 5.67 |
Dispatch | 4.00 |
Overall L1 | 5.67 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x18,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB 0x28(%RDI),%R12 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
SHR $0x2,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
TEST %R12D,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 498b4f <_ZN6Kripke12ParallelComm12testRecievesEv+0x39f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x18,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403250 <_ZdlPv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4034f0 <_Unwind_Resume@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Run 2x1 | Number processes: 2Number nodes: 1Number processes per node: 2Run Command: <executable> --groups 1024 --procs 2,1,1MPI Command: mpirun -np <number_processes>Dataset: Run Directory: /home/eoseret/qaas_runs_CPU_9468/171-147-9160/intel/Kripke/run/oneview_runs/compilers/icx_5/oneview_run_1711486959I_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threadsOMP_PROC_BIND: spreadOMP_NUM_THREADS: 1 |
---|---|
Run 2x2 | OMP_NUM_THREADS: 2I_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threadsOMP_PROC_BIND: spread |
Run 2x4 | OMP_NUM_THREADS: 4I_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threadsOMP_PROC_BIND: spread |
Run 2x8 | OMP_NUM_THREADS: 8I_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threadsOMP_PROC_BIND: spread |
Run 2x16 | OMP_NUM_THREADS: 16I_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threadsOMP_PROC_BIND: spread |
Run 2x32 | OMP_NUM_THREADS: 32I_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threadsOMP_PROC_BIND: spread |
Run 2x48 | OMP_NUM_THREADS: 48I_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threadsOMP_PROC_BIND: spread |
(2x1) Efficiency | (2x1) Potential Speed-Up (%) | (2x2) Efficiency | (2x2) Potential Speed-Up (%) | (2x4) Efficiency | (2x4) Potential Speed-Up (%) | (2x8) Efficiency | (2x8) Potential Speed-Up (%) | (2x16) Efficiency | (2x16) Potential Speed-Up (%) | (2x32) Efficiency | (2x32) Potential Speed-Up (%) | (2x48) Efficiency | (2x48) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 1.67 | -0 | 2.5 | -0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
2x1 | 1 | 1 | 1 | 1 | 0.2 | 0.01 |
2x2 | 1 | 1.67 | 3.33 | 2 | 0.12 | 0 |
2x4 | 1 | 2.5 | 10 | 4 | 0.07 | 0 |
2x8 | 1 | 1 | 1 | 8 | 0.06 | 0 |
2x16 | 1 | 1 | 1 | 16 | 0.01 | 0 |
2x32 | 2 | 1 | 1 | 32 | 0.01 | 0 |
2x48 | 1 | 1 | 1 | 48 | 0.07 | 0 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN6Kripke12ParallelComm12testRecievesEv– | 0.01 | 0.1 |
▼Loop 1451 - ParallelComm.cpp:219-241 - exec– | 0 | 0.05 |
○Loop 1453 - ParallelComm.cpp:239-240 - exec | 0 | 0 |
○Loop 1454 - ParallelComm.cpp:239-240 - exec | 0 | 0 |
○Loop 1452 - ParallelComm.cpp:239-240 - exec | 0 | 0 |
○Loop 1457 - stl_algobase.h:911-912 - exec | 0 | 0 |
○Loop 1456 - stl_algobase.h:911-912 - exec | 0 | 0.1 |
○Loop 1455 - ParallelComm.cpp:239-240 - exec | 0 | 0 |