Function: generate_chunk(int, global_variables&) [clone ._omp_fn.1] [clone .lto_priv.0] | Module: exec | Source: generate_chunk.cpp:85-123 [...] | Coverage: 0.02% |
---|
Function: generate_chunk(int, global_variables&) [clone ._omp_fn.1] [clone .lto_priv.0] | Module: exec | Source: generate_chunk.cpp:85-123 [...] | Coverage: 0.02% |
---|
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/generate_chunk.cpp: 85 - 123 |
-------------------------------------------------------------------------------- |
85: #pragma omp parallel for simd collapse(2) |
86: for (int j = (0); j < (yrange); j++) { |
87: for (int i = (0); i < (xrange); i++) { |
88: double x_cent = state_xmin[state]; |
89: double y_cent = state_ymin[state]; |
90: if (state_geometry[state] == g_rect) { |
91: if (field.vertexx[i + 1] >= state_xmin[state] && field.vertexx[i] < state_xmax[state]) { |
92: if (field.vertexy[j + 1] >= state_ymin[state] && field.vertexy[j] < state_ymax[state]) { |
93: field.energy0(i, j) = state_energy[state]; |
94: field.density0(i, j) = state_density[state]; |
95: for (int kt = j; kt <= j + 1; ++kt) { |
96: for (int jt = i; jt <= i + 1; ++jt) { |
97: field.xvel0(jt, kt) = state_xvel[state]; |
98: field.yvel0(jt, kt) = state_yvel[state]; |
99: } |
100: } |
101: } |
102: } |
103: } else if (state_geometry[state] == g_circ) { |
104: double radius = |
105: std::sqrt((field.cellx[i] - x_cent) * (field.cellx[i] - x_cent) + (field.celly[j] - y_cent) * (field.celly[j] - y_cent)); |
106: if (radius <= state_radius[state]) { |
107: field.energy0(i, j) = state_energy[state]; |
108: field.density0(i, j) = state_density[state]; |
109: for (int kt = j; kt <= j + 1; ++kt) { |
110: for (int jt = i; jt <= i + 1; ++jt) { |
111: field.xvel0(jt, kt) = state_xvel[state]; |
112: field.yvel0(jt, kt) = state_yvel[state]; |
113: } |
114: } |
115: } |
116: } else if (state_geometry[state] == g_point) { |
117: if (field.vertexx[i] == x_cent && field.vertexy[j] == y_cent) { |
118: field.energy0(i, j) = state_energy[state]; |
119: field.density0(i, j) = state_density[state]; |
120: for (int kt = j; kt <= j + 1; ++kt) { |
121: for (int jt = i; jt <= i + 1; ++jt) { |
122: field.xvel0(jt, kt) = state_xvel[state]; |
123: field.yvel0(jt, kt) = state_yvel[state]; |
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x444200 PUSH %RBP |
0x444201 MOV %RSP,%RBP |
0x444204 PUSH %R15 |
0x444206 PUSH %R14 |
0x444208 PUSH %R13 |
0x44420a PUSH %R12 |
0x44420c PUSH %RBX |
0x44420d SUB $0xa8,%RSP |
0x444214 MOV %RDI,-0x38(%RBP) |
0x444218 MOV 0x5c(%RDI),%R13D |
0x44421c MOV 0x58(%RDI),%R12D |
0x444220 TEST %R13D,%R13D |
0x444223 JLE 4443f0 |
0x444229 TEST %R12D,%R12D |
0x44422c JLE 4443f0 |
0x444232 CALL 4046c0 <omp_get_num_threads@plt> |
0x444237 MOV %EAX,%EBX |
0x444239 CALL 4045b0 <omp_get_thread_num@plt> |
0x44423e XOR %EDX,%EDX |
0x444240 MOV -0x38(%RBP),%R9 |
0x444244 MOV %EAX,%ECX |
0x444246 MOV %R13D,%EAX |
0x444249 IMUL %R12D,%EAX |
0x44424d DIV %EBX |
0x44424f CMP %EDX,%ECX |
0x444251 MOV %EAX,%EDI |
0x444253 JB 444937 |
0x444259 IMUL %EDI,%ECX |
0x44425c LEA (%RCX,%RDX,1),%ESI |
0x44425f LEA (%RDI,%RSI,1),%R14D |
0x444263 CMP %R14D,%ESI |
0x444266 JAE 4443f0 |
0x44426c MOV %ESI,%EAX |
0x44426e XOR %EDX,%EDX |
0x444270 MOVSXD 0x60(%R9),%R10 |
0x444274 MOV 0x20(%R9),%R8 |
0x444278 DIV %R12D |
0x44427b MOV 0x30(%R9),%R15 |
0x44427f MOV 0x48(%R9),%RBX |
0x444283 MOV %R12D,-0x7c(%RBP) |
0x444287 MOV %R8,-0x38(%RBP) |
0x44428b MOV %R15,-0x50(%RBP) |
0x44428f MOV 0x50(%R9),%R15 |
0x444293 MOV %R13D,-0x40(%RBP) |
0x444297 MOV %R9,-0x78(%RBP) |
0x44429b MOV %R14D,-0x80(%RBP) |
0x44429f LEA (,%R10,8),%R11 |
0x4442a7 LEA (,%R10,4),%R8 |
0x4442af MOV 0x18(%R9),%R10 |
0x4442b3 MOV %R12D,%ECX |
0x4442b6 MOV %R11,%R13 |
0x4442b9 MOV -0x38(%RBP),%R11 |
0x4442bd MOV %R10,-0xc0(%RBP) |
0x4442c4 MOV 0x28(%R9),%R10 |
0x4442c8 MOV %R10,-0x60(%RBP) |
0x4442cc MOV %EAX,-0x3c(%RBP) |
0x4442cf MOV 0x10(%R9),%RAX |
0x4442d3 SUB %EDX,%ECX |
0x4442d5 MOV %RAX,-0xb8(%RBP) |
0x4442dc MOV 0x40(%R9),%RAX |
0x4442e0 MOV -0x50(%RBP),%R9 |
0x4442e4 MOV %RAX,-0x48(%RBP) |
0x4442e8 MOVSXD -0x3c(%RBP),%RAX |
0x4442ec MOV %RAX,%R14 |
0x4442ef NOP |
(458) 0x4442f0 CMP %ECX,%EDI |
(458) 0x4442f2 CMOVBE %EDI,%ECX |
(458) 0x4442f5 INCL -0x3c(%RBP) |
(458) 0x4442f8 LEA (%RSI,%RCX,1),%R10D |
(458) 0x4442fc CMP %R10D,%ESI |
(458) 0x4442ff JAE 4443d3 |
(458) 0x444305 MOV 0x8(%RBX),%RSI |
(458) 0x444309 MOV 0x8(%R11),%RDI |
(458) 0x44430d LEA (,%R14,8),%RAX |
(458) 0x444315 MOVSXD %EDX,%RDX |
(458) 0x444318 MOV 0x8(%R9),%R12 |
(458) 0x44431c MOV %RAX,-0x38(%RBP) |
(458) 0x444320 DEC %ECX |
(458) 0x444322 LEA 0x1(%RDX),%RAX |
(458) 0x444326 MOV (%RSI,%R8,1),%ESI |
(458) 0x44432a ADD %R13,%RDI |
(458) 0x44432d ADD %RAX,%RCX |
(458) 0x444330 ADD %R13,%R12 |
(458) 0x444333 MOV %RDI,-0x50(%RBP) |
(458) 0x444337 VMOVSD (%RDI),%XMM15 |
(458) 0x44433b MOV %R12,-0x58(%RBP) |
(458) 0x44433f VMOVSD (%R12),%XMM1 |
(458) 0x444345 CMP $0x1,%ESI |
(458) 0x444348 JNE 444408 |
(458) 0x44434e MOV %R9,-0x68(%RBP) |
(458) 0x444352 MOV 0x258(%R15),%RSI |
(458) 0x444359 MOV %R8,-0x70(%RBP) |
(458) 0x44435d JMP 444363 |
0x44435f NOP |
(460) 0x444360 INC %RAX |
(460) 0x444363 VCOMISD 0x8(%RSI,%RDX,8),%XMM15 |
(460) 0x444369 JA 4443c0 |
(460) 0x44436b MOV -0x60(%RBP),%R8 |
(460) 0x44436f MOV 0x8(%R8),%R12 |
(460) 0x444373 VMOVSD (%R12,%R13,1),%XMM9 |
(460) 0x444379 VCOMISD (%RSI,%RDX,8),%XMM9 |
(460) 0x44437e JBE 4443c0 |
(460) 0x444380 MOV 0x278(%R15),%RDI |
(460) 0x444387 MOV -0x38(%RBP),%R9 |
(460) 0x44438b VCOMISD 0x8(%RDI,%R9,1),%XMM1 |
(460) 0x444392 JA 4443c0 |
(460) 0x444394 MOV -0x78(%RBP),%R8 |
(460) 0x444398 MOV 0x38(%R8),%R12 |
(460) 0x44439c MOV -0x38(%RBP),%R8 |
(460) 0x4443a0 MOV 0x8(%R12),%R9 |
(460) 0x4443a5 VMOVSD (%R9,%R13,1),%XMM10 |
(460) 0x4443ab VCOMISD (%RDI,%R8,1),%XMM10 |
(460) 0x4443b1 JA 444770 |
(460) 0x4443b7 NOPW (%RAX,%RAX,1) |
(460) 0x4443c0 MOV %RAX,%RDX |
(460) 0x4443c3 CMP %RCX,%RAX |
(460) 0x4443c6 JNE 444360 |
(458) 0x4443c8 MOV -0x68(%RBP),%R9 |
(458) 0x4443cc MOV -0x70(%RBP),%R8 |
(458) 0x4443d0 MOV %R10D,%ESI |
(458) 0x4443d3 MOV -0x3c(%RBP),%EAX |
(458) 0x4443d6 INC %R14 |
(458) 0x4443d9 CMP %EAX,-0x40(%RBP) |
(458) 0x4443dc JLE 4443f0 |
(458) 0x4443de MOV -0x80(%RBP),%EDI |
(458) 0x4443e1 MOV -0x7c(%RBP),%ECX |
(458) 0x4443e4 XOR %EDX,%EDX |
(458) 0x4443e6 SUB %ESI,%EDI |
(458) 0x4443e8 JMP 4442f0 |
0x4443ed NOPL (%RAX) |
0x4443f0 ADD $0xa8,%RSP |
0x4443f7 POP %RBX |
0x4443f8 POP %R12 |
0x4443fa POP %R13 |
0x4443fc POP %R14 |
0x4443fe POP %R15 |
0x444400 POP %RBP |
0x444401 RET |
0x444402 NOPW (%RAX,%RAX,1) |
(458) 0x444408 MOVSXD -0x3c(%RBP),%RDI |
(458) 0x44440c MOV %R8,-0xa8(%RBP) |
(458) 0x444413 MOV %R14,%R12 |
(458) 0x444416 MOV %RCX,%R8 |
(458) 0x444419 MOV %R10D,-0x88(%RBP) |
(458) 0x444420 MOV %RDI,-0xb0(%RBP) |
(458) 0x444427 MOV %R11,-0x90(%RBP) |
(458) 0x44442e MOV %R9,-0x98(%RBP) |
(458) 0x444435 MOV %RBX,-0xa0(%RBP) |
(458) 0x44443c JMP 444458 |
0x44443e XCHG %AX,%AX |
(459) 0x444440 CMP $0x3,%ESI |
(459) 0x444443 JE 4445f0 |
(459) 0x444449 MOV %RAX,%RDX |
(459) 0x44444c CMP %R8,%RAX |
(459) 0x44444f JE 444740 |
(459) 0x444455 INC %RAX |
(459) 0x444458 CMP $0x2,%ESI |
(459) 0x44445b JNE 444440 |
(459) 0x44445d MOV 0x238(%R15),%RDI |
(459) 0x444464 MOV -0x38(%RBP),%RBX |
(459) 0x444468 MOV 0x218(%R15),%R9 |
(459) 0x44446f MOV -0x48(%RBP),%RCX |
(459) 0x444473 VMOVSD (%RDI,%RBX,1),%XMM12 |
(459) 0x444478 VMOVSD (%R9,%RDX,8),%XMM10 |
(459) 0x44447e MOV 0x8(%RCX),%R14 |
(459) 0x444482 VSUBSD %XMM1,%XMM12,%XMM13 |
(459) 0x444486 VSUBSD %XMM15,%XMM10,%XMM11 |
(459) 0x44448b VMULSD %XMM13,%XMM13,%XMM14 |
(459) 0x444490 VFMADD132SD %XMM11,%XMM14,%XMM11 |
(459) 0x444495 VSQRTSD %XMM11,%XMM11,%XMM11 |
(459) 0x44449a VCOMISD (%R14,%R13,1),%XMM11 |
(459) 0x4444a0 JA 444449 |
(459) 0x4444a2 MOV -0x78(%RBP),%R11 |
(459) 0x4444a6 MOV 0x30(%R15),%RBX |
(459) 0x4444aa MOV 0x40(%R15),%RDI |
(459) 0x4444ae MOV 0x8(%R11),%R10 |
(459) 0x4444b2 IMUL %R12,%RBX |
(459) 0x4444b6 MOV (%R11),%RCX |
(459) 0x4444b9 MOV (%R15),%R11 |
(459) 0x4444bc MOV 0x8(%R10),%R9 |
(459) 0x4444c0 MOV 0x8(%RCX),%R14 |
(459) 0x4444c4 ADD %RDX,%RBX |
(459) 0x4444c7 IMUL %R12,%R11 |
(459) 0x4444cb MOV 0x10(%R15),%R10 |
(459) 0x4444cf MOV 0xd8(%R15),%RCX |
(459) 0x4444d6 VMOVSD (%R9,%R13,1),%XMM15 |
(459) 0x4444dc MOV -0xb8(%RBP),%R9 |
(459) 0x4444e3 VMOVSD %XMM15,(%RDI,%RBX,8) |
(459) 0x4444e8 MOV 0xa8(%R15),%RDI |
(459) 0x4444ef MOV -0xc0(%RBP),%RBX |
(459) 0x4444f6 ADD %RDX,%R11 |
(459) 0x4444f9 VMOVSD (%R14,%R13,1),%XMM0 |
(459) 0x4444ff MOV 0xe8(%R15),%R14 |
(459) 0x444506 VMOVSD %XMM0,(%R10,%R11,8) |
(459) 0x44450c MOV 0x8(%R9),%R11 |
(459) 0x444510 MOV 0x8(%RBX),%R9 |
(459) 0x444514 MOV %RDI,%RBX |
(459) 0x444517 IMUL %R12,%RBX |
(459) 0x44451b MOV 0xb8(%R15),%R10 |
(459) 0x444522 ADD %R13,%R11 |
(459) 0x444525 ADD %R13,%R9 |
(459) 0x444528 VMOVSD (%R11),%XMM1 |
(459) 0x44452d MOV %RBX,-0x70(%RBP) |
(459) 0x444531 MOV %RCX,%RBX |
(459) 0x444534 IMUL %R12,%RBX |
(459) 0x444538 MOV %RBX,-0x68(%RBP) |
(459) 0x44453c MOV -0x70(%RBP),%RBX |
(459) 0x444540 ADD %RDX,%RBX |
(459) 0x444543 VMOVSD %XMM1,(%R10,%RBX,8) |
(459) 0x444549 MOV -0x68(%RBP),%RBX |
(459) 0x44454d VMOVSD (%R9),%XMM2 |
(459) 0x444552 ADD %RDX,%RBX |
(459) 0x444555 VMOVSD %XMM2,(%R14,%RBX,8) |
(459) 0x44455b MOV -0x70(%RBP),%RBX |
(459) 0x44455f VMOVSD (%R11),%XMM3 |
(459) 0x444564 ADD %RAX,%RBX |
(459) 0x444567 VMOVSD %XMM3,(%R10,%RBX,8) |
(459) 0x44456d VMOVSD (%R9),%XMM4 |
(459) 0x444572 MOV -0x68(%RBP),%RBX |
(459) 0x444576 ADD %RAX,%RBX |
(459) 0x444579 VMOVSD %XMM4,(%R14,%RBX,8) |
(459) 0x44457f MOV -0xb0(%RBP),%RBX |
(459) 0x444586 VMOVSD (%R11),%XMM5 |
(459) 0x44458b IMUL %RBX,%RDI |
(459) 0x44458f IMUL %RBX,%RCX |
(459) 0x444593 LEA (%RDI,%RDX,1),%RBX |
(459) 0x444597 ADD %RAX,%RDI |
(459) 0x44459a VMOVSD %XMM5,(%R10,%RBX,8) |
(459) 0x4445a0 ADD %RCX,%RDX |
(459) 0x4445a3 ADD %RAX,%RCX |
(459) 0x4445a6 VMOVSD (%R9),%XMM6 |
(459) 0x4445ab VMOVSD %XMM6,(%R14,%RDX,8) |
(459) 0x4445b1 MOV %RAX,%RDX |
(459) 0x4445b4 VMOVSD (%R11),%XMM7 |
(459) 0x4445b9 VMOVSD %XMM7,(%R10,%RDI,8) |
(459) 0x4445bf VMOVSD (%R9),%XMM8 |
(459) 0x4445c4 VMOVSD %XMM8,(%R14,%RCX,8) |
(459) 0x4445ca CMP %R8,%RAX |
(459) 0x4445cd JE 444740 |
(459) 0x4445d3 MOV -0x50(%RBP),%R11 |
(459) 0x4445d7 MOV -0x58(%RBP),%R10 |
(459) 0x4445db VMOVSD (%R11),%XMM15 |
(459) 0x4445e0 VMOVSD (%R10),%XMM1 |
(459) 0x4445e5 JMP 444455 |
0x4445ea NOPW (%RAX,%RAX,1) |
(459) 0x4445f0 MOV 0x258(%R15),%RBX |
(459) 0x4445f7 VCOMISD (%RBX,%RDX,8),%XMM15 |
(459) 0x4445fc JNE 444449 |
(459) 0x444602 MOV 0x278(%R15),%RCX |
(459) 0x444609 MOV -0x38(%RBP),%R9 |
(459) 0x44460d VCOMISD (%RCX,%R9,1),%XMM1 |
(459) 0x444613 JNE 444449 |
(459) 0x444619 MOV -0x78(%RBP),%R14 |
(459) 0x44461d MOV 0x30(%R15),%RBX |
(459) 0x444621 MOV 0x40(%R15),%RDI |
(459) 0x444625 MOV 0x8(%R14),%R11 |
(459) 0x444629 IMUL %R12,%RBX |
(459) 0x44462d MOV (%R14),%RCX |
(459) 0x444630 MOV (%R15),%R14 |
(459) 0x444633 MOV 0x8(%R11),%R10 |
(459) 0x444637 MOV 0x8(%RCX),%R9 |
(459) 0x44463b ADD %RDX,%RBX |
(459) 0x44463e IMUL %R12,%R14 |
(459) 0x444642 MOV 0x10(%R15),%R11 |
(459) 0x444646 MOV 0xe8(%R15),%RCX |
(459) 0x44464d VMOVSD (%R10,%R13,1),%XMM0 |
(459) 0x444653 MOV -0xb8(%RBP),%R10 |
(459) 0x44465a VMOVSD %XMM0,(%RDI,%RBX,8) |
(459) 0x44465f ADD %RDX,%R14 |
(459) 0x444662 MOV -0xc0(%RBP),%RDI |
(459) 0x444669 MOV 0xb8(%R15),%RBX |
(459) 0x444670 VMOVSD (%R9,%R13,1),%XMM1 |
(459) 0x444676 MOV 0x8(%R10),%R9 |
(459) 0x44467a MOV 0x8(%RDI),%RDI |
(459) 0x44467e VMOVSD %XMM1,(%R11,%R14,8) |
(459) 0x444684 MOV 0xd8(%R15),%R14 |
(459) 0x44468b MOV 0xa8(%R15),%R11 |
(459) 0x444692 ADD %R13,%R9 |
(459) 0x444695 VMOVSD (%R9),%XMM2 |
(459) 0x44469a ADD %R13,%RDI |
(459) 0x44469d MOV %R14,%R10 |
(459) 0x4446a0 MOV %R11,-0x70(%RBP) |
(459) 0x4446a4 IMUL %R12,%R11 |
(459) 0x4446a8 IMUL %R12,%R10 |
(459) 0x4446ac MOV %R10,-0x68(%RBP) |
(459) 0x4446b0 LEA (%R11,%RDX,1),%R10 |
(459) 0x4446b4 ADD %RAX,%R11 |
(459) 0x4446b7 VMOVSD %XMM2,(%RBX,%R10,8) |
(459) 0x4446bd VMOVSD (%RDI),%XMM3 |
(459) 0x4446c1 MOV -0x68(%RBP),%R10 |
(459) 0x4446c5 ADD %RDX,%R10 |
(459) 0x4446c8 VMOVSD %XMM3,(%RCX,%R10,8) |
(459) 0x4446ce VMOVSD (%R9),%XMM4 |
(459) 0x4446d3 VMOVSD %XMM4,(%RBX,%R11,8) |
(459) 0x4446d9 MOV -0x68(%RBP),%R11 |
(459) 0x4446dd VMOVSD (%RDI),%XMM5 |
(459) 0x4446e1 ADD %RAX,%R11 |
(459) 0x4446e4 VMOVSD %XMM5,(%RCX,%R11,8) |
(459) 0x4446ea MOVSXD -0x3c(%RBP),%R10 |
(459) 0x4446ee MOV -0x70(%RBP),%R11 |
(459) 0x4446f2 VMOVSD (%R9),%XMM6 |
(459) 0x4446f7 IMUL %R10,%R11 |
(459) 0x4446fb IMUL %R10,%R14 |
(459) 0x4446ff LEA (%R11,%RDX,1),%R10 |
(459) 0x444703 ADD %RAX,%R11 |
(459) 0x444706 VMOVSD %XMM6,(%RBX,%R10,8) |
(459) 0x44470c ADD %R14,%RDX |
(459) 0x44470f ADD %RAX,%R14 |
(459) 0x444712 VMOVSD (%RDI),%XMM7 |
(459) 0x444716 VMOVSD %XMM7,(%RCX,%RDX,8) |
(459) 0x44471b MOV %RAX,%RDX |
(459) 0x44471e VMOVSD (%R9),%XMM8 |
(459) 0x444723 VMOVSD %XMM8,(%RBX,%R11,8) |
(459) 0x444729 VMOVSD (%RDI),%XMM9 |
(459) 0x44472d VMOVSD %XMM9,(%RCX,%R14,8) |
(459) 0x444733 CMP %R8,%RAX |
(459) 0x444736 JNE 4445d3 |
(458) 0x44473c NOPL (%RAX) |
(458) 0x444740 MOV -0x88(%RBP),%R10D |
(458) 0x444747 MOV -0x90(%RBP),%R11 |
(458) 0x44474e MOV %R12,%R14 |
(458) 0x444751 MOV -0x98(%RBP),%R9 |
(458) 0x444758 MOV -0xa0(%RBP),%RBX |
(458) 0x44475f MOV -0xa8(%RBP),%R8 |
(458) 0x444766 JMP 4443d0 |
0x44476b NOPL (%RAX,%RAX,1) |
(460) 0x444770 MOV -0x78(%RBP),%R9 |
(460) 0x444774 MOV 0x30(%R15),%R8 |
(460) 0x444778 MOV 0x8(%R9),%RDI |
(460) 0x44477c IMUL %R14,%R8 |
(460) 0x444780 MOV (%R9),%R9 |
(460) 0x444783 MOV 0x8(%RDI),%R12 |
(460) 0x444787 MOV 0x40(%R15),%RDI |
(460) 0x44478b ADD %RDX,%R8 |
(460) 0x44478e VMOVSD (%R12,%R13,1),%XMM11 |
(460) 0x444794 MOV 0x8(%R9),%R12 |
(460) 0x444798 MOV -0xb8(%RBP),%R9 |
(460) 0x44479f VMOVSD %XMM11,(%RDI,%R8,8) |
(460) 0x4447a5 MOV (%R15),%R8 |
(460) 0x4447a8 MOV 0x10(%R15),%RDI |
(460) 0x4447ac VMOVSD (%R12,%R13,1),%XMM12 |
(460) 0x4447b2 MOV 0x8(%R9),%R12 |
(460) 0x4447b6 IMUL %R14,%R8 |
(460) 0x4447ba MOV -0xc0(%RBP),%R9 |
(460) 0x4447c1 ADD %R13,%R12 |
(460) 0x4447c4 MOV %R12,-0xa0(%RBP) |
(460) 0x4447cb ADD %RDX,%R8 |
(460) 0x4447ce VMOVSD %XMM12,(%RDI,%R8,8) |
(460) 0x4447d4 MOV 0xb8(%R15),%R8 |
(460) 0x4447db MOV 0xa8(%R15),%RDI |
(460) 0x4447e2 VMOVSD (%R12),%XMM13 |
(460) 0x4447e8 MOV %R8,-0x90(%RBP) |
(460) 0x4447ef MOV 0x8(%R9),%R8 |
(460) 0x4447f3 MOV 0xe8(%R15),%R9 |
(460) 0x4447fa MOV %RDI,-0xc8(%RBP) |
(460) 0x444801 IMUL %R14,%RDI |
(460) 0x444805 ADD %R13,%R8 |
(460) 0x444808 MOV %R8,-0x88(%RBP) |
(460) 0x44480f MOV -0x90(%RBP),%R8 |
(460) 0x444816 MOV %R9,-0x98(%RBP) |
(460) 0x44481d MOV 0xd8(%R15),%R9 |
(460) 0x444824 LEA (%RDI,%RDX,1),%R12 |
(460) 0x444828 ADD %RAX,%RDI |
(460) 0x44482b VMOVSD %XMM13,(%R8,%R12,8) |
(460) 0x444831 MOV %R9,-0xb0(%RBP) |
(460) 0x444838 IMUL %R14,%R9 |
(460) 0x44483c MOV -0x88(%RBP),%R8 |
(460) 0x444843 VMOVSD (%R8),%XMM14 |
(460) 0x444848 LEA (%R9,%RDX,1),%R12 |
(460) 0x44484c MOV %R9,-0xa8(%RBP) |
(460) 0x444853 MOV -0x98(%RBP),%R9 |
(460) 0x44485a VMOVSD %XMM14,(%R9,%R12,8) |
(460) 0x444860 MOV -0xa0(%RBP),%R12 |
(460) 0x444867 VMOVSD (%R12),%XMM0 |
(460) 0x44486d MOV -0x90(%RBP),%R12 |
(460) 0x444874 VMOVSD %XMM0,(%R12,%RDI,8) |
(460) 0x44487a MOV -0xa8(%RBP),%RDI |
(460) 0x444881 VMOVSD (%R8),%XMM2 |
(460) 0x444886 MOV -0xa0(%RBP),%R8 |
(460) 0x44488d LEA (%RDI,%RAX,1),%R12 |
(460) 0x444891 MOV -0xc8(%RBP),%RDI |
(460) 0x444898 VMOVSD %XMM2,(%R9,%R12,8) |
(460) 0x44489e MOVSXD -0x3c(%RBP),%R12 |
(460) 0x4448a2 MOV -0xb0(%RBP),%R9 |
(460) 0x4448a9 VMOVSD (%R8),%XMM3 |
(460) 0x4448ae MOV -0x90(%RBP),%R8 |
(460) 0x4448b5 IMUL %R12,%RDI |
(460) 0x4448b9 IMUL %R9,%R12 |
(460) 0x4448bd LEA (%RDI,%RDX,1),%R9 |
(460) 0x4448c1 ADD %RAX,%RDI |
(460) 0x4448c4 VMOVSD %XMM3,(%R8,%R9,8) |
(460) 0x4448ca MOV -0x88(%RBP),%R9 |
(460) 0x4448d1 ADD %R12,%RDX |
(460) 0x4448d4 MOV -0x90(%RBP),%R8 |
(460) 0x4448db LEA (%R12,%RAX,1),%R12 |
(460) 0x4448df VMOVSD (%R9),%XMM4 |
(460) 0x4448e4 MOV -0x98(%RBP),%R9 |
(460) 0x4448eb VMOVSD %XMM4,(%R9,%RDX,8) |
(460) 0x4448f1 MOV -0xa0(%RBP),%RDX |
(460) 0x4448f8 VMOVSD (%RDX),%XMM5 |
(460) 0x4448fc MOV %RAX,%RDX |
(460) 0x4448ff VMOVSD %XMM5,(%R8,%RDI,8) |
(460) 0x444905 MOV -0x88(%RBP),%RDI |
(460) 0x44490c VMOVSD (%RDI),%XMM6 |
(460) 0x444910 VMOVSD %XMM6,(%R9,%R12,8) |
(460) 0x444916 CMP %RCX,%RAX |
(460) 0x444919 JE 4443c8 |
(460) 0x44491f MOV -0x50(%RBP),%R12 |
(460) 0x444923 MOV -0x58(%RBP),%R9 |
(460) 0x444927 VMOVSD (%R12),%XMM15 |
(460) 0x44492d VMOVSD (%R9),%XMM1 |
(460) 0x444932 JMP 444360 |
0x444937 INC %EDI |
0x444939 XOR %EDX,%EDX |
0x44493b JMP 444259 |
Path / |
Source file and lines | generate_chunk.cpp:85-123 |
Module | exec |
nb instructions | 82 |
nb uops | 90 |
loop length | 290 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.70 | 8.00 | 7.33 | 7.33 | 10.00 | 3.80 | 3.70 | 10.00 | 10.00 | 10.00 | 3.80 | 7.33 |
cycles | 3.70 | 10.00 | 7.33 | 7.33 | 10.00 | 3.80 | 3.70 | 10.00 | 10.00 | 10.00 | 3.80 | 7.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.48-14.54 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 10.00 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 9% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xa8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x5c(%RDI),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R13D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4443f0 <_Z14generate_chunkiR16global_variables._omp_fn.1.lto_priv.0+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R12D,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4443f0 <_Z14generate_chunkiR16global_variables._omp_fn.1.lto_priv.0+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %EBX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JB 444937 <_Z14generate_chunkiR16global_variables._omp_fn.1.lto_priv.0+0x737> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RCX,%RDX,1),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%RSI,1),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4443f0 <_Z14generate_chunkiR16global_variables._omp_fn.1.lto_priv.0+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD 0x60(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R9),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0x30(%R9),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%R9),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%R9),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13D,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R10,4),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R11,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R9),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%R9),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD -0x3c(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0xa8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 444259 <_Z14generate_chunkiR16global_variables._omp_fn.1.lto_priv.0+0x59> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Source file and lines | generate_chunk.cpp:85-123 |
Module | exec |
nb instructions | 82 |
nb uops | 90 |
loop length | 290 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.70 | 8.00 | 7.33 | 7.33 | 10.00 | 3.80 | 3.70 | 10.00 | 10.00 | 10.00 | 3.80 | 7.33 |
cycles | 3.70 | 10.00 | 7.33 | 7.33 | 10.00 | 3.80 | 3.70 | 10.00 | 10.00 | 10.00 | 3.80 | 7.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.48-14.54 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 10.00 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 9% |
load | 10% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xa8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x5c(%RDI),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R13D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4443f0 <_Z14generate_chunkiR16global_variables._omp_fn.1.lto_priv.0+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R12D,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4443f0 <_Z14generate_chunkiR16global_variables._omp_fn.1.lto_priv.0+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %EBX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JB 444937 <_Z14generate_chunkiR16global_variables._omp_fn.1.lto_priv.0+0x737> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RCX,%RDX,1),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%RSI,1),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 4443f0 <_Z14generate_chunkiR16global_variables._omp_fn.1.lto_priv.0+0x1f0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD 0x60(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%R9),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0x30(%R9),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%R9),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%R9),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13D,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14D,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R10,4),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R11,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%R9),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R9),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%R9),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD -0x3c(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0xa8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 444259 <_Z14generate_chunkiR16global_variables._omp_fn.1.lto_priv.0+0x59> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼generate_chunk(int, global_variables&) [clone ._omp_fn.1] [clone .lto_priv.0]– | 0.02 | 0.01 |
▼Loop 458 - generate_chunk.cpp:85-123 - exec– | 0 | 0 |
○Loop 460 - context.h:46-69 - exec | 0.02 | 0.01 |
○Loop 459 - generate_chunk.cpp:88-123 - exec | 0 | 0 |