Time (%) | Total Time (ns) | Num Calls | Avg (ns) | Med (ns) | Min (ns) | Max (ns) | StdDev (ns) | Name |
---|
97.8 | 97235404319 | 972950 | 99938.7 | 1527 | 1050 | 78600813 | 599605.7 | cuStreamSynchronize |
1.6 | 1628778317 | 402800 | 4043.6 | 3097 | 2613 | 43751705 | 170543.1 | cuLaunchKernel |
0.2 | 241164615 | 21064 | 11449.1 | 2574 | 1978 | 34920005 | 377042.9 | cuMemcpyHtoDAsync_v2 |
0.1 | 146186826 | 6238 | 23434.9 | 10101 | 8700 | 7546408 | 198513.3 | cuMemcpyDtoHAsync_v2 |
0.1 | 143228362 | 1 | 143228362 | 143228362 | 143228362 | 143228362 | 0 | cudaGetFuncBySymbol_v11000 |
0 | 28124400 | 515 | 54610.5 | 15909 | 1499 | 390842 | 65588.4 | cuMemAlloc_v2 |
0 | 26850317 | 11834 | 2268.9 | 1882 | 1418 | 40483 | 1337.9 | cuMemsetD32Async |
0 | 20899408 | 1 | 20899408 | 20899408 | 20899408 | 20899408 | 0 | cuMemHostAlloc |
0 | 776370 | 1 | 776370 | 776370 | 776370 | 776370 | 0 | cuMemAllocHost_v2 |
0 | 98734 | 2 | 49367 | 49367 | 27694 | 71040 | 30650.3 | cuMemGetInfo_v2 |
0 | 86055 | 405 | 212.5 | 168 | 98 | 5923 | 361.8 | cuGetProcAddress_v2 |
0 | 2472 | 2 | 1236 | 1236 | 1120 | 1352 | 164 | cuInit |
0 | 1775 | 4 | 443.8 | 352 | 199 | 872 | 302 | cuCtxSetCurrent |
0 | 555 | 1 | 555 | 555 | 555 | 555 | 0 | cuFuncGetModule |
0 | 149 | 1 | 149 | 149 | 149 | 149 | 0 | cuModuleGetLoadingMode |