| | | | | | | requested parallelism | walltime sum (s) | nb instances | any sync average per thread time (s) | any wait average per thread time (s) | parallelism overhead (%) | local speedup if perfectly balanced | global speedup if perfectly balanced |
start addr | function name | source location | level | ancestor thread num | invoker | parallel or teams | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 | m1o1 | m1o2 | m1o4 | m1o8 | m1o16 | m1o26 | m1o52 |
libqmckl.so.0.0.0:0x45a0d | qmckl_compute_dtmp_c_hpc | qmckl_jastrow_champ.c:4158 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 23.962 | 12.047 | 6.954 | 3.576 | 1.885 | 1.380 | 1.292 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 0.0 | 4.85 E-3 | 0.672 | 0.264 | 0.163 | 0.108 | 0.131 | 0.0 | 4.83 E-3 | 0.672 | 0.264 | 0.163 | 0.108 | 0.131 | 0 | 0.04 | 9.66 | 7.39 | 8.67 | 7.86 | 10.2 | 1.000 | 1.000 | 1.107 | 1.080 | 1.095 | 1.085 | 1.113 | 1.000 | 1.000 | 1.029 | 1.017 | 1.014 | 1.011 | 1.013 |
libqmckl.so.0.0.0:0x4522a | qmckl_compute_tmp_c_hpc | qmckl_jastrow_champ.c:4088 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 11.532 | 5.788 | 3.033 | 1.569 | 0.804 | 0.548 | 0.444 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 7.48 E-3 | 98.8 E-3 | 65.4 E-3 | 42.5 E-3 | 29.3 E-3 | 49.7 E-3 | 0.0 | 7.46 E-3 | 98.8 E-3 | 65.4 E-3 | 42.5 E-3 | 29.3 E-3 | 49.7 E-3 | 0 | 0.13 | 3.26 | 4.17 | 5.28 | 5.35 | 11.2 | 1.000 | 1.001 | 1.034 | 1.043 | 1.056 | 1.056 | 1.126 | 1.000 | 1.000 | 1.004 | 1.004 | 1.004 | 1.003 | 1.005 |
libqmckl.so.0.0.0:0x42bb5 | qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc | qmckl_jastrow_champ.c:3220 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 8.385 | 4.232 | 3.366 | 1.805 | 1.131 | 0.891 | 0.811 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 0.0 | 10.5 E-3 | 0.669 | 0.239 | 0.175 | 0.119 | 0.144 | 0.0 | 10.5 E-3 | 0.669 | 0.239 | 0.175 | 0.119 | 0.144 | 0 | 0.25 | 19.9 | 13.2 | 15.5 | 13.3 | 17.8 | 1.000 | 1.002 | 1.248 | 1.153 | 1.183 | 1.153 | 1.216 | 1.000 | 1.000 | 1.029 | 1.016 | 1.015 | 1.012 | 1.014 |
libqmckl.so.0.0.0:0x46b17 | qmckl_compute_jastrow_champ_factor_een_gl_hpc | qmckl_jastrow_champ.c:4540 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 6.093 | 3.102 | 1.865 | 1.029 | 0.737 | 0.658 | 0.777 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 0.0 | 4.00 E-3 | 0.205 | 0.121 | 0.115 | 54.7 E-3 | 57.5 E-3 | 0.0 | 3.97 E-3 | 0.205 | 0.121 | 0.115 | 54.7 E-3 | 57.5 E-3 | 0 | 0.13 | 11.0 | 11.7 | 15.7 | 8.31 | 7.40 | 1.000 | 1.001 | 1.123 | 1.133 | 1.186 | 1.091 | 1.080 | 1.000 | 1.000 | 1.009 | 1.008 | 1.010 | 1.005 | 1.006 |
libqmckl.so.0.0.0:0x3d2d9 | qmckl_compute_jastrow_champ_factor_ee_gl_hpc | qmckl_jastrow_champ.c:1652 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 3.955 | 1.982 | 0.992 | 0.517 | 0.280 | 0.162 | 83.7 E-3 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 0.0 | 1.63 E-3 | 1.94 E-3 | 21.2 E-3 | 32.0 E-3 | 8.47 E-3 | 5.34 E-3 | 0.0 | 1.62 E-3 | 1.93 E-3 | 21.2 E-3 | 32.0 E-3 | 8.46 E-3 | 5.33 E-3 | 0 | 0.08 | 0.20 | 4.10 | 11.4 | 5.23 | 6.38 | 1.000 | 1.001 | 1.002 | 1.043 | 1.129 | 1.055 | 1.068 | 1.000 | 1.000 | 1.000 | 1.001 | 1.003 | 1.001 | 1.001 |
libqmckl.so.0.0.0:0x4118b | qmckl_compute_een_rescaled_e_hpc | qmckl_jastrow_champ.c:2991 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 3.289 | 1.688 | 0.894 | 0.501 | 0.315 | 0.264 | 0.226 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 108 E-6 | 7.86 E-3 | 43.1 E-3 | 53.5 E-3 | 47.9 E-3 | 42.2 E-3 | 37.3 E-3 | 25.6 E-6 | 7.79 E-3 | 43.1 E-3 | 53.4 E-3 | 47.8 E-3 | 42.1 E-3 | 37.3 E-3 | 0.00 | 0.47 | 4.83 | 10.7 | 15.2 | 16.0 | 16.5 | 1.000 | 1.005 | 1.051 | 1.120 | 1.179 | 1.190 | 1.198 | 1.000 | 1.000 | 1.002 | 1.003 | 1.004 | 1.004 | 1.004 |
libqmckl.so.0.0.0:0x3cd61 | qmckl_compute_ee_distance_rescaled_gl_hpc | qmckl_jastrow_champ.c:1994 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 1.262 | 0.633 | 0.337 | 0.182 | 0.111 | 88.4 E-3 | 79.3 E-3 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 55.4 E-6 | 566 E-6 | 15.0 E-3 | 13.6 E-3 | 15.6 E-3 | 7.72 E-3 | 8.82 E-3 | 11.3 E-6 | 524 E-6 | 14.9 E-3 | 13.6 E-3 | 15.6 E-3 | 7.68 E-3 | 8.74 E-3 | 0.00 | 0.09 | 4.45 | 7.49 | 14.1 | 8.73 | 11.1 | 1.000 | 1.001 | 1.047 | 1.081 | 1.164 | 1.096 | 1.125 | 1.000 | 1.000 | 1.001 | 1.001 | 1.001 | 1.001 | 1.001 |
libqmckl.so.0.0.0:0x400b9 | qmckl_compute_jastrow_champ_factor_en_gl_hpc | qmckl_jastrow_champ.c:2456 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 0.993 | 0.498 | 0.250 | 0.131 | 70.5 E-3 | 42.0 E-3 | 22.1 E-3 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 50.0 | 0.0 | 463 E-6 | 939 E-6 | 5.82 E-3 | 7.31 E-3 | 2.81 E-3 | 2.10 E-3 | 0.0 | 453 E-6 | 928 E-6 | 5.81 E-3 | 7.30 E-3 | 2.80 E-3 | 2.09 E-3 | 0 | 0.09 | 0.38 | 4.43 | 10.4 | 6.69 | 9.49 | 1.000 | 1.001 | 1.004 | 1.046 | 1.116 | 1.072 | 1.105 | 1.000 | 1.000 | 1.000 | 1.000 | 1.001 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x3ed28 | qmckl_compute_jastrow_champ_factor_en_hpc | qmckl_jastrow_champ.c:2287 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 0.581 | 0.296 | 0.152 | 80.1 E-3 | 43.4 E-3 | 27.2 E-3 | 16.9 E-3 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 1.10 E-3 | 2.62 E-3 | 4.90 E-3 | 5.52 E-3 | 3.14 E-3 | 3.09 E-3 | 0.0 | 1.08 E-3 | 2.60 E-3 | 4.88 E-3 | 5.51 E-3 | 3.12 E-3 | 3.07 E-3 | 0 | 0.37 | 1.73 | 6.11 | 12.7 | 11.5 | 18.3 | 1.000 | 1.004 | 1.018 | 1.065 | 1.146 | 1.131 | 1.224 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x3b875 | qmckl_compute_jastrow_champ_factor_ee_hpc | qmckl_jastrow_champ.c:1432 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 0.341 | 0.348 | 0.348 | 0.343 | 0.358 | 0.368 | 0.422 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 264 E-6 | 716 E-6 | 545 E-6 | 1.79 E-3 | 5.11 E-3 | 10.6 E-3 | 0.0 | 223 E-6 | 689 E-6 | 515 E-6 | 1.76 E-3 | 5.08 E-3 | 10.5 E-3 | 0 | 0.08 | 0.21 | 0.16 | 0.50 | 1.39 | 2.51 | 1.000 | 1.001 | 1.002 | 1.002 | 1.005 | 1.014 | 1.026 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.001 | 1.001 |
libqmckl.so.0.0.0:0x3b3f9 | qmckl_compute_ee_distance_rescaled_hpc | qmckl_jastrow_champ.c:1861 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 11.7 E-3 | 6.07 E-3 | 3.59 E-3 | 2.71 E-3 | 2.72 E-3 | 4.23 E-3 | 32.0 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 2.78 E-6 | 58.4 E-6 | 281 E-6 | 124 E-6 | 172 E-6 | 175 E-6 | 22.5 E-3 | 114 E-9 | 52.7 E-6 | 273 E-6 | 116 E-6 | 164 E-6 | 166 E-6 | 22.5 E-3 | 0.02 | 0.96 | 7.83 | 4.56 | 6.34 | 4.13 | 70.4 | 1.000 | 1.010 | 1.085 | 1.048 | 1.068 | 1.043 | 3.379 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.002 |
libqmckl.so.0.0.0:0x3f88e | qmckl_compute_en_distance_rescaled_gl_hpc | qmckl_jastrow_champ.c:2819 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 5.98 E-3 | 3.04 E-3 | 1.60 E-3 | 910 E-6 | 554 E-6 | 472 E-6 | 517 E-6 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 820 E-9 | 30.0 E-6 | 54.6 E-6 | 85.0 E-6 | 83.8 E-6 | 65.3 E-6 | 70.4 E-6 | 199 E-9 | 29.0 E-6 | 53.6 E-6 | 84.0 E-6 | 82.9 E-6 | 64.6 E-6 | 69.5 E-6 | 0.01 | 0.99 | 3.42 | 9.34 | 15.1 | 13.9 | 13.6 | 1.000 | 1.010 | 1.035 | 1.103 | 1.178 | 1.161 | 1.158 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x3e605 | qmckl_compute_en_distance_rescaled_hpc | qmckl_jastrow_champ.c:2663 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 2.89 E-3 | 1.47 E-3 | 758 E-6 | 414 E-6 | 250 E-6 | 183 E-6 | 479 E-6 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 806 E-9 | 9.14 E-6 | 12.5 E-6 | 22.4 E-6 | 32.9 E-6 | 18.7 E-6 | 332 E-6 | 64.0 E-9 | 8.59 E-6 | 11.8 E-6 | 21.7 E-6 | 32.3 E-6 | 18.0 E-6 | 332 E-6 | 0.03 | 0.62 | 1.65 | 5.41 | 13.2 | 10.2 | 69.3 | 1.000 | 1.006 | 1.017 | 1.057 | 1.152 | 1.113 | 3.263 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |