2050 lines
228 KiB
Plaintext
2050 lines
228 KiB
Plaintext
==PROF== Connected to process 100431 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v2/bitonicCUDA)
|
|
==PROF== Profiling "prephase" - 1: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 6 passes
|
|
==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 6 passes
|
|
==PROF== Disconnected from process 100431
|
|
[100431] bitonicCUDA@127.0.0.1
|
|
void prephase<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum msecond 2.56
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 237,568
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 237,568
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 237,568
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 3,801,088
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 209,070.94
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 209,334
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 208,875
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 3,345,135
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 1,692,604.61
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 16,926.05
|
|
smsp__inst_executed.avg inst 1,953,951.83
|
|
smsp__inst_executed.max inst 1,954,175
|
|
smsp__inst_executed.min inst 1,953,723
|
|
smsp__inst_executed.sum inst 125,052,917
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 15.35
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.15
|
|
smsp__cycles_active.avg cycle 3,559,774.03
|
|
smsp__cycles_active.sum cycle 227,825,538
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.90
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.03
|
|
smsp__inst_executed.max inst 12,569
|
|
smsp__inst_executed.min inst 11,654
|
|
smsp__inst_executed.sum inst 787,778
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,062.50
|
|
smsp__cycles_active.sum cycle 4,676,000
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 435.49
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 31,913.38
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 32,394
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 31,370
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 510,614
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 353,960.85
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 3,539.61
|
|
smsp__inst_executed.avg inst 313,131.16
|
|
smsp__inst_executed.max inst 313,277
|
|
smsp__inst_executed.min inst 312,868
|
|
smsp__inst_executed.sum inst 20,040,394
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 19.30
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.19
|
|
smsp__cycles_active.avg cycle 598,137.94
|
|
smsp__cycles_active.sum cycle 38,280,828
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.24
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.58
|
|
smsp__inst_executed.max inst 12,573
|
|
smsp__inst_executed.min inst 12,056
|
|
smsp__inst_executed.sum inst 787,109
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,585.64
|
|
smsp__cycles_active.sum cycle 4,517,481
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.94
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.23
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.78
|
|
smsp__inst_executed.max inst 12,547
|
|
smsp__inst_executed.min inst 12,011
|
|
smsp__inst_executed.sum inst 787,762
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,957.47
|
|
smsp__cycles_active.sum cycle 4,669,278
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 434.34
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 31,894.31
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 32,021
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 31,779
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 510,309
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 351,861.41
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 3,518.61
|
|
smsp__inst_executed.avg inst 313,124.58
|
|
smsp__inst_executed.max inst 313,358
|
|
smsp__inst_executed.min inst 312,849
|
|
smsp__inst_executed.sum inst 20,039,973
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 19.19
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.19
|
|
smsp__cycles_active.avg cycle 597,902.02
|
|
smsp__cycles_active.sum cycle 38,265,729
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.30
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.72
|
|
smsp__inst_executed.max inst 12,585
|
|
smsp__inst_executed.min inst 11,832
|
|
smsp__inst_executed.sum inst 786,798
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,044.09
|
|
smsp__cycles_active.sum cycle 4,546,822
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:33, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.11
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,297.98
|
|
smsp__inst_executed.max inst 12,581
|
|
smsp__inst_executed.min inst 11,984
|
|
smsp__inst_executed.sum inst 787,071
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,504.80
|
|
smsp__cycles_active.sum cycle 4,512,307
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.74
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.89
|
|
smsp__inst_executed.max inst 12,884
|
|
smsp__inst_executed.min inst 11,903
|
|
smsp__inst_executed.sum inst 787,833
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,146.27
|
|
smsp__cycles_active.sum cycle 4,681,361
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 434.91
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 31,897.94
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 32,370
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 31,387
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 510,367
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 355,060.44
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 3,550.60
|
|
smsp__inst_executed.avg inst 313,126.61
|
|
smsp__inst_executed.max inst 313,456
|
|
smsp__inst_executed.min inst 312,810
|
|
smsp__inst_executed.sum inst 20,040,103
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 19.38
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.19
|
|
smsp__cycles_active.avg cycle 597,783.91
|
|
smsp__cycles_active.sum cycle 38,258,170
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.30
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.58
|
|
smsp__inst_executed.max inst 12,562
|
|
smsp__inst_executed.min inst 11,884
|
|
smsp__inst_executed.sum inst 786,597
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,651.47
|
|
smsp__cycles_active.sum cycle 4,521,694
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.34
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.42
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.06
|
|
smsp__inst_executed.max inst 12,557
|
|
smsp__inst_executed.min inst 11,928
|
|
smsp__inst_executed.sum inst 786,756
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,246.70
|
|
smsp__cycles_active.sum cycle 4,559,789
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.18
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.75
|
|
smsp__inst_executed.max inst 12,670
|
|
smsp__inst_executed.min inst 11,749
|
|
smsp__inst_executed.sum inst 787,120
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,653.20
|
|
smsp__cycles_active.sum cycle 4,521,805
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.78
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.36
|
|
smsp__inst_executed.max inst 12,811
|
|
smsp__inst_executed.min inst 11,692
|
|
smsp__inst_executed.sum inst 787,799
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,714.31
|
|
smsp__cycles_active.sum cycle 4,653,716
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 434.11
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 31,905.69
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 31,983
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 31,807
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 510,491
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 354,022.84
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 3,540.23
|
|
smsp__inst_executed.avg inst 313,135.20
|
|
smsp__inst_executed.max inst 318,231
|
|
smsp__inst_executed.min inst 308,095
|
|
smsp__inst_executed.sum inst 20,040,653
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 19.31
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.19
|
|
smsp__cycles_active.avg cycle 597,940.59
|
|
smsp__cycles_active.sum cycle 38,268,198
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.92
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.25
|
|
smsp__inst_executed.max inst 12,640
|
|
smsp__inst_executed.min inst 11,916
|
|
smsp__inst_executed.sum inst 786,512
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,480.17
|
|
smsp__cycles_active.sum cycle 4,510,731
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.43
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.56
|
|
smsp__inst_executed.max inst 12,505
|
|
smsp__inst_executed.min inst 12,076
|
|
smsp__inst_executed.sum inst 786,596
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,671.28
|
|
smsp__cycles_active.sum cycle 4,522,962
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.59
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,292.92
|
|
smsp__inst_executed.max inst 12,757
|
|
smsp__inst_executed.min inst 11,856
|
|
smsp__inst_executed.sum inst 786,747
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,114.81
|
|
smsp__cycles_active.sum cycle 4,551,348
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:34, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.98
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.41
|
|
smsp__inst_executed.max inst 12,687
|
|
smsp__inst_executed.min inst 11,920
|
|
smsp__inst_executed.sum inst 787,098
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,416.27
|
|
smsp__cycles_active.sum cycle 4,506,641
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.68
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.94
|
|
smsp__inst_executed.max inst 12,697
|
|
smsp__inst_executed.min inst 11,640
|
|
smsp__inst_executed.sum inst 787,772
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,201.34
|
|
smsp__cycles_active.sum cycle 4,684,886
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 433.86
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 31,913.81
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 31,996
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 31,782
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 510,621
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 354,697.47
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 3,546.97
|
|
smsp__inst_executed.avg inst 313,117.39
|
|
smsp__inst_executed.max inst 318,197
|
|
smsp__inst_executed.min inst 308,095
|
|
smsp__inst_executed.sum inst 20,039,513
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 19.33
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.19
|
|
smsp__cycles_active.avg cycle 598,408.55
|
|
smsp__cycles_active.sum cycle 38,298,147
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.03
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.61
|
|
smsp__inst_executed.max inst 12,676
|
|
smsp__inst_executed.min inst 11,864
|
|
smsp__inst_executed.sum inst 786,471
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,508.12
|
|
smsp__cycles_active.sum cycle 4,704,520
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.21
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.09
|
|
smsp__inst_executed.max inst 12,660
|
|
smsp__inst_executed.min inst 12,078
|
|
smsp__inst_executed.sum inst 786,566
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,077.06
|
|
smsp__cycles_active.sum cycle 4,548,932
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.24
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.83
|
|
smsp__inst_executed.max inst 12,628
|
|
smsp__inst_executed.min inst 11,908
|
|
smsp__inst_executed.sum inst 786,549
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,525.67
|
|
smsp__cycles_active.sum cycle 4,513,643
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.66
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.44
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.34
|
|
smsp__inst_executed.max inst 12,521
|
|
smsp__inst_executed.min inst 11,630
|
|
smsp__inst_executed.sum inst 786,774
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,924.95
|
|
smsp__cycles_active.sum cycle 4,539,197
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.92
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,297.78
|
|
smsp__inst_executed.max inst 12,697
|
|
smsp__inst_executed.min inst 12,067
|
|
smsp__inst_executed.sum inst 787,058
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,093.91
|
|
smsp__cycles_active.sum cycle 4,550,010
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.62
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.45
|
|
smsp__inst_executed.max inst 12,732
|
|
smsp__inst_executed.min inst 11,792
|
|
smsp__inst_executed.sum inst 787,741
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,599.72
|
|
smsp__cycles_active.sum cycle 4,646,382
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 434.43
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 31,869.75
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 32,359
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 31,446
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 509,916
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 352,782.28
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 3,527.82
|
|
smsp__inst_executed.avg inst 313,121.81
|
|
smsp__inst_executed.max inst 313,218
|
|
smsp__inst_executed.min inst 312,990
|
|
smsp__inst_executed.sum inst 20,039,796
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 19.20
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.19
|
|
smsp__cycles_active.avg cycle 599,438.23
|
|
smsp__cycles_active.sum cycle 38,364,047
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:35, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.07
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.05
|
|
smsp__inst_executed.max inst 12,492
|
|
smsp__inst_executed.min inst 11,906
|
|
smsp__inst_executed.sum inst 786,435
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,595.95
|
|
smsp__cycles_active.sum cycle 4,582,141
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.22
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.66
|
|
smsp__inst_executed.max inst 12,512
|
|
smsp__inst_executed.min inst 11,704
|
|
smsp__inst_executed.sum inst 786,474
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,743.53
|
|
smsp__cycles_active.sum cycle 4,655,586
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.18
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.08
|
|
smsp__inst_executed.max inst 12,463
|
|
smsp__inst_executed.min inst 11,886
|
|
smsp__inst_executed.sum inst 786,501
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,541.78
|
|
smsp__cycles_active.sum cycle 4,514,674
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.18
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.47
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.53
|
|
smsp__inst_executed.max inst 12,514
|
|
smsp__inst_executed.min inst 12,088
|
|
smsp__inst_executed.sum inst 786,594
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,334.75
|
|
smsp__cycles_active.sum cycle 4,501,424
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.62
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.53
|
|
smsp__inst_executed.max inst 12,555
|
|
smsp__inst_executed.min inst 11,987
|
|
smsp__inst_executed.sum inst 786,786
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,984.45
|
|
smsp__cycles_active.sum cycle 4,543,005
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.82
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.36
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.67
|
|
smsp__inst_executed.max inst 12,569
|
|
smsp__inst_executed.min inst 11,918
|
|
smsp__inst_executed.sum inst 787,115
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,573.64
|
|
smsp__cycles_active.sum cycle 4,516,713
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.30
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.23
|
|
smsp__inst_executed.max inst 12,720
|
|
smsp__inst_executed.min inst 11,718
|
|
smsp__inst_executed.sum inst 787,727
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,732.94
|
|
smsp__cycles_active.sum cycle 4,654,908
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 434.11
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 31,883.31
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 32,395
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 31,412
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 510,133
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 354,700.81
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 3,547.01
|
|
smsp__inst_executed.avg inst 313,122.11
|
|
smsp__inst_executed.max inst 318,197
|
|
smsp__inst_executed.min inst 308,040
|
|
smsp__inst_executed.sum inst 20,039,815
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 19.35
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.19
|
|
smsp__cycles_active.avg cycle 597,850.22
|
|
smsp__cycles_active.sum cycle 38,262,414
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.23
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.16
|
|
smsp__inst_executed.max inst 12,688
|
|
smsp__inst_executed.min inst 11,888
|
|
smsp__inst_executed.sum inst 786,378
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,841.34
|
|
smsp__cycles_active.sum cycle 4,597,846
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.75
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.83
|
|
smsp__inst_executed.max inst 12,660
|
|
smsp__inst_executed.min inst 11,928
|
|
smsp__inst_executed.sum inst 786,485
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,607.83
|
|
smsp__cycles_active.sum cycle 4,582,901
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.87
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.42
|
|
smsp__inst_executed.max inst 12,700
|
|
smsp__inst_executed.min inst 11,680
|
|
smsp__inst_executed.sum inst 786,459
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,464.98
|
|
smsp__cycles_active.sum cycle 4,701,759
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:36, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.21
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.50
|
|
smsp__inst_executed.max inst 12,656
|
|
smsp__inst_executed.min inst 11,732
|
|
smsp__inst_executed.sum inst 786,528
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,751.97
|
|
smsp__cycles_active.sum cycle 4,528,126
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.05
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.84
|
|
smsp__inst_executed.max inst 12,520
|
|
smsp__inst_executed.min inst 12,034
|
|
smsp__inst_executed.sum inst 786,614
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,176.25
|
|
smsp__cycles_active.sum cycle 4,555,280
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.56
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,292.58
|
|
smsp__inst_executed.max inst 12,547
|
|
smsp__inst_executed.min inst 11,776
|
|
smsp__inst_executed.sum inst 786,725
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,971.50
|
|
smsp__cycles_active.sum cycle 4,542,176
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.14
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.37
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.96
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.45
|
|
smsp__inst_executed.max inst 12,555
|
|
smsp__inst_executed.min inst 12,073
|
|
smsp__inst_executed.sum inst 787,101
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,221.45
|
|
smsp__cycles_active.sum cycle 4,494,173
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.36
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.89
|
|
smsp__inst_executed.max inst 12,584
|
|
smsp__inst_executed.min inst 11,684
|
|
smsp__inst_executed.sum inst 787,769
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,809.75
|
|
smsp__cycles_active.sum cycle 4,659,824
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 435.17
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 31,881.75
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 32,405
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 31,298
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 510,108
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 351,267.26
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 3,512.67
|
|
smsp__inst_executed.avg inst 313,146.62
|
|
smsp__inst_executed.max inst 318,289
|
|
smsp__inst_executed.min inst 308,129
|
|
smsp__inst_executed.sum inst 20,041,384
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 19.17
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.19
|
|
smsp__cycles_active.avg cycle 597,515.77
|
|
smsp__cycles_active.sum cycle 38,241,009
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.70
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.25
|
|
smsp__inst_executed.max inst 12,664
|
|
smsp__inst_executed.min inst 11,904
|
|
smsp__inst_executed.sum inst 786,448
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 74,531.66
|
|
smsp__cycles_active.sum cycle 4,770,026
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.82
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.38
|
|
smsp__inst_executed.max inst 12,490
|
|
smsp__inst_executed.min inst 12,092
|
|
smsp__inst_executed.sum inst 786,456
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,103.58
|
|
smsp__cycles_active.sum cycle 4,614,629
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.69
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.58
|
|
smsp__inst_executed.max inst 12,486
|
|
smsp__inst_executed.min inst 11,896
|
|
smsp__inst_executed.sum inst 786,469
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,726.84
|
|
smsp__cycles_active.sum cycle 4,590,518
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.86
|
|
smsp__inst_executed.max inst 12,664
|
|
smsp__inst_executed.min inst 11,716
|
|
smsp__inst_executed.sum inst 786,487
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,256.52
|
|
smsp__cycles_active.sum cycle 4,688,417
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.18
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.66
|
|
smsp__inst_executed.max inst 12,852
|
|
smsp__inst_executed.min inst 11,702
|
|
smsp__inst_executed.sum inst 786,538
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,927.86
|
|
smsp__cycles_active.sum cycle 4,539,383
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:37, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.37
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,291.56
|
|
smsp__inst_executed.max inst 12,652
|
|
smsp__inst_executed.min inst 11,912
|
|
smsp__inst_executed.sum inst 786,660
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,620.88
|
|
smsp__cycles_active.sum cycle 4,519,736
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:38, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.01
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.44
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,294.31
|
|
smsp__inst_executed.max inst 12,766
|
|
smsp__inst_executed.min inst 11,720
|
|
smsp__inst_executed.sum inst 786,836
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,559.72
|
|
smsp__cycles_active.sum cycle 4,579,822
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:38, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.18
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,301.61
|
|
smsp__inst_executed.max inst 12,512
|
|
smsp__inst_executed.min inst 12,076
|
|
smsp__inst_executed.sum inst 787,303
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,958.73
|
|
smsp__cycles_active.sum cycle 4,541,359
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:38, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.42
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.21
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,314.73
|
|
smsp__inst_executed.max inst 12,723
|
|
smsp__inst_executed.min inst 11,867
|
|
smsp__inst_executed.sum inst 788,143
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,355.83
|
|
smsp__cycles_active.sum cycle 4,630,773
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:38, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 433.79
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 31,889.94
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 32,543
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 31,317
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 510,239
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 353,670.79
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 3,536.71
|
|
smsp__inst_executed.avg inst 313,134.42
|
|
smsp__inst_executed.max inst 318,339
|
|
smsp__inst_executed.min inst 308,068
|
|
smsp__inst_executed.sum inst 20,040,603
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 19.31
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.19
|
|
smsp__cycles_active.avg cycle 597,595.56
|
|
smsp__cycles_active.sum cycle 38,246,116
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:38, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 56.96
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.81
|
|
smsp__inst_executed.max inst 12,680
|
|
smsp__inst_executed.min inst 11,900
|
|
smsp__inst_executed.sum inst 786,420
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 68,992.86
|
|
smsp__cycles_active.sum cycle 4,415,543
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:38, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.80
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.44
|
|
smsp__inst_executed.max inst 12,672
|
|
smsp__inst_executed.min inst 11,908
|
|
smsp__inst_executed.sum inst 786,460
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 74,819.06
|
|
smsp__cycles_active.sum cycle 4,788,420
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:38, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.14
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.36
|
|
smsp__inst_executed.max inst 12,484
|
|
smsp__inst_executed.min inst 12,088
|
|
smsp__inst_executed.sum inst 786,391
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,808.34
|
|
smsp__cycles_active.sum cycle 4,595,734
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:38, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.94
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.64
|
|
smsp__inst_executed.max inst 12,656
|
|
smsp__inst_executed.min inst 12,092
|
|
smsp__inst_executed.sum inst 786,473
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,451.16
|
|
smsp__cycles_active.sum cycle 4,636,874
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:38, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 60.54
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.49
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4.00
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.56
|
|
smsp__inst_executed.max inst 12,677
|
|
smsp__inst_executed.min inst 11,720
|
|
smsp__inst_executed.sum inst 786,468
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,851.78
|
|
smsp__cycles_active.sum cycle 4,662,514
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:38, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.98
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.48
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.55
|
|
smsp__inst_executed.max inst 12,492
|
|
smsp__inst_executed.min inst 11,924
|
|
smsp__inst_executed.sum inst 786,467
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,639.48
|
|
smsp__cycles_active.sum cycle 4,520,927
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:38, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.11
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.46
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.99
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.67
|
|
smsp__inst_executed.max inst 12,540
|
|
smsp__inst_executed.min inst 12,048
|
|
smsp__inst_executed.sum inst 786,603
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,134.56
|
|
smsp__cycles_active.sum cycle 4,552,612
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:39, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.56
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.43
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.98
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,292.61
|
|
smsp__inst_executed.max inst 12,727
|
|
smsp__inst_executed.min inst 11,881
|
|
smsp__inst_executed.sum inst 786,727
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,327.62
|
|
smsp__cycles_active.sum cycle 4,564,968
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:39, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.30
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.35
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.95
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.61
|
|
smsp__inst_executed.max inst 12,810
|
|
smsp__inst_executed.min inst 11,926
|
|
smsp__inst_executed.sum inst 787,111
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,631.98
|
|
smsp__cycles_active.sum cycle 4,520,447
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:39, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.52
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.06
|
|
smsp__inst_executed.max inst 12,561
|
|
smsp__inst_executed.min inst 12,043
|
|
smsp__inst_executed.sum inst 787,780
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,662.08
|
|
smsp__cycles_active.sum cycle 4,650,373
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 21:59:39, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 436.35
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 31,827.38
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 32,385
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 31,390
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 509,238
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 358,185.65
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 3,581.86
|
|
smsp__inst_executed.avg inst 312,880.55
|
|
smsp__inst_executed.max inst 317,962
|
|
smsp__inst_executed.min inst 307,889
|
|
smsp__inst_executed.sum inst 20,024,355
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 19.51
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.20
|
|
smsp__cycles_active.avg cycle 598,707.28
|
|
smsp__cycles_active.sum cycle 38,317,266
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|