PDS/homework_3/reportv1.3

1918 lines
212 KiB
Groff

==PROF== Connected to process 19677 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v1/bitonicCUDA)
==PROF== Profiling "prephase" - 1: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 5 passes
==PROF== Disconnected from process 19677
[19677] bitonicCUDA@127.0.0.1
void prephase<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum msecond 1.06
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.22
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.91
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 1,054,215.71
smsp__average_warp_latency_issue_stalled_barrier.ratio 10,542.16
smsp__inst_executed.avg inst 770,278.16
smsp__inst_executed.max inst 770,517
smsp__inst_executed.min inst 770,078
smsp__inst_executed.sum inst 49,297,802
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 23.29
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.23
smsp__cycles_active.avg cycle 1,464,763.30
smsp__cycles_active.sum cycle 93,744,851
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.59
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,434.70
smsp__inst_executed.max inst 12,627
smsp__inst_executed.min inst 12,202
smsp__inst_executed.sum inst 795,821
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,275.05
smsp__cycles_active.sum cycle 4,625,603
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 185.54
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 160,167.14
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,601.67
smsp__inst_executed.avg inst 132,203.41
smsp__inst_executed.max inst 134,386
smsp__inst_executed.min inst 130,079
smsp__inst_executed.sum inst 8,461,018
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.86
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
smsp__cycles_active.avg cycle 253,150.12
smsp__cycles_active.sum cycle 16,201,608
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.34
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,300.38
smsp__inst_executed.max inst 12,564
smsp__inst_executed.min inst 12,036
smsp__inst_executed.sum inst 787,224
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,561.48
smsp__cycles_active.sum cycle 4,579,935
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.62
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,308.33
smsp__inst_executed.max inst 12,555
smsp__inst_executed.min inst 12,038
smsp__inst_executed.sum inst 787,733
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,921.98
smsp__cycles_active.sum cycle 4,667,007
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 183.49
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 160,010.27
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,600.10
smsp__inst_executed.avg inst 132,209.20
smsp__inst_executed.max inst 134,250
smsp__inst_executed.min inst 130,144
smsp__inst_executed.sum inst 8,461,389
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.92
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
smsp__cycles_active.avg cycle 252,547.31
smsp__cycles_active.sum cycle 16,163,028
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.59
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,294.86
smsp__inst_executed.max inst 12,694
smsp__inst_executed.min inst 12,054
smsp__inst_executed.sum inst 786,871
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,596.58
smsp__cycles_active.sum cycle 4,582,181
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.98
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,299
smsp__inst_executed.max inst 12,638
smsp__inst_executed.min inst 11,881
smsp__inst_executed.sum inst 787,136
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,894.47
smsp__cycles_active.sum cycle 4,601,246
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.53
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,309.91
smsp__inst_executed.max inst 12,636
smsp__inst_executed.min inst 11,910
smsp__inst_executed.sum inst 787,834
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,313.89
smsp__cycles_active.sum cycle 4,564,089
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 184.90
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 158,555.84
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,585.56
smsp__inst_executed.avg inst 132,207.33
smsp__inst_executed.max inst 134,301
smsp__inst_executed.min inst 130,116
smsp__inst_executed.sum inst 8,461,269
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.73
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
smsp__cycles_active.avg cycle 252,473.81
smsp__cycles_active.sum cycle 16,158,324
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.98
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,291.09
smsp__inst_executed.max inst 12,593
smsp__inst_executed.min inst 11,856
smsp__inst_executed.sum inst 786,630
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,576.33
smsp__cycles_active.sum cycle 4,516,885
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.18
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,293.56
smsp__inst_executed.max inst 12,684
smsp__inst_executed.min inst 11,908
smsp__inst_executed.sum inst 786,788
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,507.47
smsp__cycles_active.sum cycle 4,576,478
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.73
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,298.97
smsp__inst_executed.max inst 12,689
smsp__inst_executed.min inst 11,912
smsp__inst_executed.sum inst 787,134
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,018.25
smsp__cycles_active.sum cycle 4,545,168
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.72
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,308.84
smsp__inst_executed.max inst 12,686
smsp__inst_executed.min inst 12,079
smsp__inst_executed.sum inst 787,766
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,024.20
smsp__cycles_active.sum cycle 4,609,549
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 185.95
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 157,276.34
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,572.76
smsp__inst_executed.avg inst 132,205.28
smsp__inst_executed.max inst 134,358
smsp__inst_executed.min inst 130,024
smsp__inst_executed.sum inst 8,461,138
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.55
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
smsp__cycles_active.avg cycle 252,593.19
smsp__cycles_active.sum cycle 16,165,964
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.30
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.47
smsp__inst_executed.max inst 12,560
smsp__inst_executed.min inst 12,088
smsp__inst_executed.sum inst 786,526
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,795.55
smsp__cycles_active.sum cycle 4,530,915
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.76
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,290.78
smsp__inst_executed.max inst 12,745
smsp__inst_executed.min inst 11,874
smsp__inst_executed.sum inst 786,610
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,441.03
smsp__cycles_active.sum cycle 4,508,226
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.56
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,293.70
smsp__inst_executed.max inst 12,566
smsp__inst_executed.min inst 12,056
smsp__inst_executed.sum inst 786,797
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,597.62
smsp__cycles_active.sum cycle 4,582,248
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.95
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,299.03
smsp__inst_executed.max inst 12,648
smsp__inst_executed.min inst 11,910
smsp__inst_executed.sum inst 787,138
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,625.34
smsp__cycles_active.sum cycle 4,520,022
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.53
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,308.55
smsp__inst_executed.max inst 12,690
smsp__inst_executed.min inst 12,090
smsp__inst_executed.sum inst 787,747
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,911.34
smsp__cycles_active.sum cycle 4,602,326
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 184.93
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 159,654.44
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,596.54
smsp__inst_executed.avg inst 132,204.97
smsp__inst_executed.max inst 134,424
smsp__inst_executed.min inst 129,985
smsp__inst_executed.sum inst 8,461,118
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.86
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
smsp__cycles_active.avg cycle 252,486.12
smsp__cycles_active.sum cycle 16,159,112
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.24
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.47
smsp__inst_executed.max inst 12,834
smsp__inst_executed.min inst 11,932
smsp__inst_executed.sum inst 786,526
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,577.20
smsp__cycles_active.sum cycle 4,516,941
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.24
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.45
smsp__inst_executed.max inst 12,702
smsp__inst_executed.min inst 11,912
smsp__inst_executed.sum inst 786,525
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,559.42
smsp__cycles_active.sum cycle 4,579,803
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.02
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,291.50
smsp__inst_executed.max inst 12,638
smsp__inst_executed.min inst 12,088
smsp__inst_executed.sum inst 786,656
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,387.86
smsp__cycles_active.sum cycle 4,568,823
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.56
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,293.50
smsp__inst_executed.max inst 12,785
smsp__inst_executed.min inst 11,630
smsp__inst_executed.sum inst 786,784
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,530.47
smsp__cycles_active.sum cycle 4,577,950
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.98
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,298.41
smsp__inst_executed.max inst 12,716
smsp__inst_executed.min inst 11,883
smsp__inst_executed.sum inst 787,098
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,755.27
smsp__cycles_active.sum cycle 4,528,337
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.53
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,310.11
smsp__inst_executed.max inst 12,496
smsp__inst_executed.min inst 11,901
smsp__inst_executed.sum inst 787,847
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,085.02
smsp__cycles_active.sum cycle 4,613,441
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 185.02
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 158,201.12
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,582.01
smsp__inst_executed.avg inst 132,195.78
smsp__inst_executed.max inst 134,319
smsp__inst_executed.min inst 130,101
smsp__inst_executed.sum inst 8,460,530
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.71
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
smsp__cycles_active.avg cycle 251,923.50
smsp__cycles_active.sum cycle 16,123,104
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.66
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.02
smsp__inst_executed.max inst 12,668
smsp__inst_executed.min inst 11,912
smsp__inst_executed.sum inst 786,497
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,635.66
smsp__cycles_active.sum cycle 4,648,682
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.37
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.31
smsp__inst_executed.max inst 12,508
smsp__inst_executed.min inst 11,924
smsp__inst_executed.sum inst 786,452
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,858
smsp__cycles_active.sum cycle 4,534,912
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.30
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.89
smsp__inst_executed.max inst 12,659
smsp__inst_executed.min inst 11,942
smsp__inst_executed.sum inst 786,489
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,747.28
smsp__cycles_active.sum cycle 4,527,826
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.98
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,291.03
smsp__inst_executed.max inst 12,683
smsp__inst_executed.min inst 11,982
smsp__inst_executed.sum inst 786,626
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,881.03
smsp__cycles_active.sum cycle 4,536,386
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.43
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,293.31
smsp__inst_executed.max inst 12,752
smsp__inst_executed.min inst 11,612
smsp__inst_executed.sum inst 786,772
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,166.55
smsp__cycles_active.sum cycle 4,554,659
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.05
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,298.28
smsp__inst_executed.max inst 12,667
smsp__inst_executed.min inst 11,870
smsp__inst_executed.sum inst 787,090
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,893.25
smsp__cycles_active.sum cycle 4,537,168
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.43
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,311.05
smsp__inst_executed.max inst 12,751
smsp__inst_executed.min inst 12,075
smsp__inst_executed.sum inst 787,907
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,743.69
smsp__cycles_active.sum cycle 4,591,596
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 185.66
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 161,553.58
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,615.54
smsp__inst_executed.avg inst 132,193.28
smsp__inst_executed.max inst 134,294
smsp__inst_executed.min inst 130,087
smsp__inst_executed.sum inst 8,460,370
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.09
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
smsp__cycles_active.avg cycle 252,649.62
smsp__cycles_active.sum cycle 16,169,576
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.17
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.12
smsp__inst_executed.max inst 12,484
smsp__inst_executed.min inst 12,084
smsp__inst_executed.sum inst 786,504
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,022.83
smsp__cycles_active.sum cycle 4,609,461
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.82
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.45
smsp__inst_executed.max inst 12,672
smsp__inst_executed.min inst 11,900
smsp__inst_executed.sum inst 786,461
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,587.06
smsp__cycles_active.sum cycle 4,581,572
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.53
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.42
smsp__inst_executed.max inst 12,632
smsp__inst_executed.min inst 12,096
smsp__inst_executed.sum inst 786,459
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,582.89
smsp__cycles_active.sum cycle 4,517,305
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.02
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.80
smsp__inst_executed.max inst 12,500
smsp__inst_executed.min inst 11,924
smsp__inst_executed.sum inst 786,483
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,332.38
smsp__cycles_active.sum cycle 4,565,272
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.92
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,290.08
smsp__inst_executed.max inst 12,636
smsp__inst_executed.min inst 11,868
smsp__inst_executed.sum inst 786,565
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,497.30
smsp__cycles_active.sum cycle 4,575,827
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.43
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,292.33
smsp__inst_executed.max inst 12,709
smsp__inst_executed.min inst 11,780
smsp__inst_executed.sum inst 786,709
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,223.28
smsp__cycles_active.sum cycle 4,622,290
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.08
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,296.56
smsp__inst_executed.max inst 12,676
smsp__inst_executed.min inst 11,885
smsp__inst_executed.sum inst 786,980
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,705.17
smsp__cycles_active.sum cycle 4,525,131
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.72
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,311.58
smsp__inst_executed.max inst 12,710
smsp__inst_executed.min inst 11,851
smsp__inst_executed.sum inst 787,941
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,827.20
smsp__cycles_active.sum cycle 4,596,941
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 185.02
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 159,138.76
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,591.39
smsp__inst_executed.avg inst 132,167.45
smsp__inst_executed.max inst 134,248
smsp__inst_executed.min inst 130,050
smsp__inst_executed.sum inst 8,458,717
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.84
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
smsp__cycles_active.avg cycle 251,922.81
smsp__cycles_active.sum cycle 16,123,060
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.58
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,287.83
smsp__inst_executed.max inst 12,872
smsp__inst_executed.min inst 11,524
smsp__inst_executed.sum inst 786,421
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 73,310.11
smsp__cycles_active.sum cycle 4,691,847
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.10
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.67
smsp__inst_executed.max inst 12,488
smsp__inst_executed.min inst 12,092
smsp__inst_executed.sum inst 786,475
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,120.91
smsp__cycles_active.sum cycle 4,615,738
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.85
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.33
smsp__inst_executed.max inst 12,500
smsp__inst_executed.min inst 11,728
smsp__inst_executed.sum inst 786,453
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,726.75
smsp__cycles_active.sum cycle 4,590,512
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.62
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.86
smsp__inst_executed.max inst 12,522
smsp__inst_executed.min inst 11,924
smsp__inst_executed.sum inst 786,487
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,432.41
smsp__cycles_active.sum cycle 4,571,674
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.18
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.25
smsp__inst_executed.max inst 12,672
smsp__inst_executed.min inst 11,898
smsp__inst_executed.sum inst 786,512
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,698.97
smsp__cycles_active.sum cycle 4,588,734
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.95
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,290.50
smsp__inst_executed.max inst 12,484
smsp__inst_executed.min inst 12,008
smsp__inst_executed.sum inst 786,592
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,943.89
smsp__cycles_active.sum cycle 4,604,409
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.40
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,293.12
smsp__inst_executed.max inst 12,713
smsp__inst_executed.min inst 11,621
smsp__inst_executed.sum inst 786,760
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,649.19
smsp__cycles_active.sum cycle 4,585,548
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.05
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,298.70
smsp__inst_executed.max inst 12,725
smsp__inst_executed.min inst 11,966
smsp__inst_executed.sum inst 787,117
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,102.67
smsp__cycles_active.sum cycle 4,550,571
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.50
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,309.14
smsp__inst_executed.max inst 12,737
smsp__inst_executed.min inst 12,018
smsp__inst_executed.sum inst 787,785
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,724.20
smsp__cycles_active.sum cycle 4,590,349
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 185.76
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 159,061.95
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,590.62
smsp__inst_executed.avg inst 132,213.64
smsp__inst_executed.max inst 134,321
smsp__inst_executed.min inst 130,119
smsp__inst_executed.sum inst 8,461,673
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.80
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
smsp__cycles_active.avg cycle 252,290.12
smsp__cycles_active.sum cycle 16,146,568
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 56.96
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.03
smsp__inst_executed.max inst 12,684
smsp__inst_executed.min inst 12,072
smsp__inst_executed.sum inst 786,434
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 68,948.02
smsp__cycles_active.sum cycle 4,412,673
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.71
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.23
smsp__inst_executed.max inst 12,712
smsp__inst_executed.min inst 11,696
smsp__inst_executed.sum inst 786,447
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,574.20
smsp__cycles_active.sum cycle 4,644,749
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.01
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.03
smsp__inst_executed.max inst 12,668
smsp__inst_executed.min inst 12,068
smsp__inst_executed.sum inst 786,434
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,415.67
smsp__cycles_active.sum cycle 4,634,603
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.14
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.58
smsp__inst_executed.max inst 12,676
smsp__inst_executed.min inst 11,938
smsp__inst_executed.sum inst 786,469
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,550.84
smsp__cycles_active.sum cycle 4,579,254
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.34
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.86
smsp__inst_executed.max inst 12,476
smsp__inst_executed.min inst 12,078
smsp__inst_executed.sum inst 786,487
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,923.06
smsp__cycles_active.sum cycle 4,539,076
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.14
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.56
smsp__inst_executed.max inst 12,503
smsp__inst_executed.min inst 11,928
smsp__inst_executed.sum inst 786,532
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,782.67
smsp__cycles_active.sum cycle 4,530,091
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.79
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,290.62
smsp__inst_executed.max inst 12,556
smsp__inst_executed.min inst 12,068
smsp__inst_executed.sum inst 786,600
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,737.78
smsp__cycles_active.sum cycle 4,527,218
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.75
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,294.31
smsp__inst_executed.max inst 12,661
smsp__inst_executed.min inst 11,903
smsp__inst_executed.sum inst 786,836
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,611.56
smsp__cycles_active.sum cycle 4,583,140
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.89
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,300.94
smsp__inst_executed.max inst 12,703
smsp__inst_executed.min inst 11,887
smsp__inst_executed.sum inst 787,260
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,911.81
smsp__cycles_active.sum cycle 4,538,356
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.11
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,305.08
smsp__inst_executed.max inst 12,731
smsp__inst_executed.min inst 11,780
smsp__inst_executed.sum inst 787,525
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,733.38
smsp__cycles_active.sum cycle 4,590,936
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 184.58
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 161,368.32
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,613.68
smsp__inst_executed.avg inst 131,997.05
smsp__inst_executed.max inst 134,093
smsp__inst_executed.min inst 129,868
smsp__inst_executed.sum inst 8,447,811
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.12
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
smsp__cycles_active.avg cycle 251,939.98
smsp__cycles_active.sum cycle 16,124,159
---------------------------------------------------------------------- --------------- ------------------------------