| | | | | | | requested parallelism | walltime sum (s) | nb instances | any sync average per thread time (s) | any wait average per thread time (s) | parallelism overhead (%) | local speedup if perfectly balanced | global speedup if perfectly balanced |
start addr | function name | source location | level | ancestor thread num | invoker | parallel or teams | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x48 |
exec:0x40d25b | ljForce | ljForce.c:172 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 247.925 | 124.069 | 62.167 | 31.388 | 15.848 | 9.999 | 7.920 | 101 | 101 | 101 | 101 | 101 | 101 | 101 | 81.1 E-6 | 0.117 | 0.135 | 0.348 | 0.298 | 0.923 | 0.937 | 30.1 E-6 | 0.117 | 0.135 | 0.348 | 0.298 | 0.923 | 0.937 | 0.00 | 0.09 | 0.22 | 1.11 | 1.88 | 9.23 | 11.8 | 1.000 | 1.001 | 1.002 | 1.011 | 1.019 | 1.102 | 1.134 | 1.000 | 1.001 | 1.002 | 1.009 | 1.014 | 1.062 | 1.069 |
exec:0x4110be | timestep | timestep.c:47 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 4.994 | 2.574 | 1.292 | 0.707 | 0.503 | 0.495 | 0.686 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 45.7 E-3 | 45.6 E-3 | 33.6 E-3 | 50.4 E-3 | 0.129 | 0.197 | 0.0 | 45.7 E-3 | 45.6 E-3 | 33.6 E-3 | 50.4 E-3 | 0.129 | 0.197 | 0 | 1.77 | 3.53 | 4.75 | 10.0 | 26.0 | 28.6 | 1.000 | 1.018 | 1.037 | 1.050 | 1.111 | 1.351 | 1.401 | 1.000 | 1.000 | 1.001 | 1.001 | 1.002 | 1.008 | 1.014 |
exec:0x40d1cf | ljForce | ljForce.c:157 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 3.638 | 2.090 | 1.060 | 0.558 | 0.333 | 0.242 | 0.249 | 101 | 101 | 101 | 101 | 101 | 101 | 101 | 0.0 | 0.112 | 49.3 E-3 | 42.3 E-3 | 44.5 E-3 | 19.2 E-3 | 39.2 E-3 | 0.0 | 0.112 | 49.3 E-3 | 42.3 E-3 | 44.5 E-3 | 19.2 E-3 | 39.1 E-3 | 0 | 5.37 | 4.65 | 7.57 | 13.3 | 7.96 | 15.7 | 1.000 | 1.057 | 1.049 | 1.082 | 1.154 | 1.087 | 1.186 | 1.000 | 1.001 | 1.001 | 1.001 | 1.002 | 1.001 | 1.003 |
exec:0x411051 | timestep | timestep.c:43 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 1.723 | 0.874 | 0.422 | 0.251 | 0.221 | 0.223 | 0.343 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 20.5 E-3 | 10.8 E-3 | 13.2 E-3 | 15.9 E-3 | 49.1 E-3 | 0.112 | 0.0 | 20.5 E-3 | 10.8 E-3 | 13.2 E-3 | 15.9 E-3 | 49.1 E-3 | 0.112 | 0 | 2.34 | 2.55 | 5.25 | 6.82 | 21.9 | 32.5 | 1.000 | 1.024 | 1.026 | 1.055 | 1.073 | 1.280 | 1.483 | 1.000 | 1.000 | 1.000 | 1.000 | 1.001 | 1.003 | 1.008 |
exec:0x411124 | timestep | timestep.c:71 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 1.493 | 0.777 | 0.398 | 0.230 | 0.174 | 0.187 | 0.194 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 15.6 E-3 | 11.1 E-3 | 8.92 E-3 | 13.4 E-3 | 33.6 E-3 | 46.7 E-3 | 0.0 | 15.5 E-3 | 11.1 E-3 | 8.91 E-3 | 13.4 E-3 | 33.6 E-3 | 46.7 E-3 | 0 | 2.00 | 2.79 | 3.88 | 7.73 | 18.0 | 24.0 | 1.000 | 1.020 | 1.029 | 1.040 | 1.084 | 1.220 | 1.316 | 1.000 | 1.000 | 1.000 | 1.000 | 1.001 | 1.002 | 1.003 |
exec:0x41100d | timestep | timestep.c:39 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 1.490 | 0.764 | 0.376 | 0.223 | 0.168 | 0.183 | 0.182 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 11.9 E-3 | 5.41 E-3 | 6.86 E-3 | 12.4 E-3 | 35.2 E-3 | 46.1 E-3 | 0.0 | 11.9 E-3 | 5.40 E-3 | 6.85 E-3 | 12.4 E-3 | 35.1 E-3 | 46.1 E-3 | 0 | 1.56 | 1.44 | 3.08 | 7.36 | 19.2 | 25.3 | 1.000 | 1.016 | 1.015 | 1.032 | 1.079 | 1.238 | 1.339 | 1.000 | 1.000 | 1.000 | 1.000 | 1.001 | 1.002 | 1.003 |
exec:0x40a653 | setTemperature | initAtoms.c:151 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 0.180 | 90.5 E-3 | 45.5 E-3 | 23.6 E-3 | 18.7 E-3 | 10.4 E-3 | 9.56 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 242 E-6 | 205 E-6 | 335 E-6 | 4.16 E-3 | 811 E-6 | 705 E-6 | 0.0 | 242 E-6 | 205 E-6 | 335 E-6 | 4.16 E-3 | 811 E-6 | 705 E-6 | 0 | 0.27 | 0.45 | 1.42 | 22.3 | 7.79 | 7.37 | 1.000 | 1.003 | 1.005 | 1.014 | 1.287 | 1.084 | 1.080 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x411186 | timestep | timestep.c:120 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 93.0 E-3 | 48.5 E-3 | 24.6 E-3 | 13.5 E-3 | 9.11 E-3 | 11.1 E-3 | 10.9 E-3 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 5.60 E-6 | 696 E-6 | 679 E-6 | 800 E-6 | 1.11 E-3 | 2.18 E-3 | 3.14 E-3 | 2.24 E-6 | 674 E-6 | 645 E-6 | 790 E-6 | 1.10 E-3 | 2.17 E-3 | 3.13 E-3 | 0.01 | 1.44 | 2.76 | 5.87 | 11.9 | 19.5 | 28.8 | 1.000 | 1.015 | 1.028 | 1.062 | 1.135 | 1.242 | 1.404 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40a74a | randomDisplacements | initAtoms.c:205 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 63.7 E-3 | 36.1 E-3 | 18.4 E-3 | 9.53 E-3 | 5.13 E-3 | 4.44 E-3 | 4.87 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 2.09 E-3 | 748 E-6 | 410 E-6 | 306 E-6 | 348 E-6 | 1.03 E-3 | 0.0 | 2.09 E-3 | 748 E-6 | 410 E-6 | 306 E-6 | 348 E-6 | 1.03 E-3 | 0 | 5.79 | 4.07 | 4.30 | 5.95 | 7.85 | 21.1 | 1.000 | 1.061 | 1.042 | 1.045 | 1.063 | 1.085 | 1.268 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x404712 | main | CoMD.c:207 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 49.3 E-3 | 26.8 E-3 | 14.9 E-3 | 9.89 E-3 | 8.58 E-3 | 9.62 E-3 | 10.4 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 415 E-6 | 426 E-6 | 290 E-6 | 184 E-6 | 1.72 E-3 | 2.24 E-3 | 0.0 | 415 E-6 | 426 E-6 | 290 E-6 | 184 E-6 | 1.72 E-3 | 2.24 E-3 | 0 | 1.54 | 2.85 | 2.93 | 2.15 | 17.9 | 21.4 | 1.000 | 1.016 | 1.029 | 1.030 | 1.022 | 1.217 | 1.273 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x4112f2 | kineticEnergy | timestep.c:120 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 26.4 E-3 | 16.7 E-3 | 9.74 E-3 | 7.21 E-3 | 7.10 E-3 | 9.11 E-3 | 10.1 E-3 | 3.00 | 3.00 | 3.00 | 3.00 | 3.00 | 3.00 | 3.00 | 2.17 E-6 | 1.43 E-3 | 683 E-6 | 349 E-6 | 191 E-6 | 1.52 E-3 | 2.13 E-3 | 746 E-9 | 1.43 E-3 | 682 E-6 | 348 E-6 | 190 E-6 | 1.51 E-3 | 2.13 E-3 | 0.01 | 8.56 | 7.01 | 4.84 | 2.69 | 16.6 | 21.1 | 1.000 | 1.094 | 1.075 | 1.051 | 1.028 | 1.200 | 1.267 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40a5f8 | setVcm | initAtoms.c:123 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 11.9 E-3 | 7.14 E-3 | 4.40 E-3 | 3.17 E-3 | 2.77 E-3 | 3.31 E-3 | 3.85 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 397 E-6 | 230 E-6 | 114 E-6 | 53.9 E-6 | 732 E-6 | 1.07 E-3 | 0.0 | 396 E-6 | 230 E-6 | 114 E-6 | 53.8 E-6 | 732 E-6 | 1.07 E-3 | 0 | 5.55 | 5.23 | 3.61 | 1.94 | 22.1 | 27.7 | 1.000 | 1.059 | 1.055 | 1.037 | 1.020 | 1.283 | 1.383 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40a4f1 | setVcm | initAtoms.c:218 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 8.95 E-3 | 5.82 E-3 | 3.43 E-3 | 2.56 E-3 | 2.50 E-3 | 3.03 E-3 | 3.09 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.66 E-6 | 518 E-6 | 253 E-6 | 120 E-6 | 75.1 E-6 | 480 E-6 | 679 E-6 | 354 E-9 | 517 E-6 | 253 E-6 | 120 E-6 | 74.2 E-6 | 479 E-6 | 678 E-6 | 0.02 | 8.91 | 7.38 | 4.71 | 3.01 | 15.8 | 22.0 | 1.000 | 1.098 | 1.080 | 1.049 | 1.031 | 1.188 | 1.282 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40a6fa | setTemperature | initAtoms.c:184 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 48 | 4.22 E-3 | 2.63 E-3 | 1.93 E-3 | 1.58 E-3 | 1.40 E-3 | 1.46 E-3 | 1.92 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 63.0 E-6 | 49.3 E-6 | 32.8 E-6 | 219 E-6 | 385 E-6 | 693 E-6 | 0.0 | 63.0 E-6 | 49.2 E-6 | 32.7 E-6 | 219 E-6 | 385 E-6 | 692 E-6 | 0 | 2.39 | 2.55 | 2.08 | 15.7 | 26.4 | 36.1 | 1.000 | 1.025 | 1.026 | 1.021 | 1.186 | 1.359 | 1.566 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |