==PROF== Connected to process 19677 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v1/bitonicCUDA) ==PROF== Profiling "prephase" - 1: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 5 passes ==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 5 passes ==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 5 passes ==PROF== Disconnected from process 19677 [19677] bitonicCUDA@127.0.0.1 void prephase(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum msecond 1.06 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.22 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.91 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 1,054,215.71 smsp__average_warp_latency_issue_stalled_barrier.ratio 10,542.16 smsp__inst_executed.avg inst 770,278.16 smsp__inst_executed.max inst 770,517 smsp__inst_executed.min inst 770,078 smsp__inst_executed.sum inst 49,297,802 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 23.29 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.23 smsp__cycles_active.avg cycle 1,464,763.30 smsp__cycles_active.sum cycle 93,744,851 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.59 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,434.70 smsp__inst_executed.max inst 12,627 smsp__inst_executed.min inst 12,202 smsp__inst_executed.sum inst 795,821 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,275.05 smsp__cycles_active.sum cycle 4,625,603 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 185.54 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 160,167.14 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,601.67 smsp__inst_executed.avg inst 132,203.41 smsp__inst_executed.max inst 134,386 smsp__inst_executed.min inst 130,079 smsp__inst_executed.sum inst 8,461,018 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.86 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 smsp__cycles_active.avg cycle 253,150.12 smsp__cycles_active.sum cycle 16,201,608 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.34 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,300.38 smsp__inst_executed.max inst 12,564 smsp__inst_executed.min inst 12,036 smsp__inst_executed.sum inst 787,224 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,561.48 smsp__cycles_active.sum cycle 4,579,935 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.62 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,308.33 smsp__inst_executed.max inst 12,555 smsp__inst_executed.min inst 12,038 smsp__inst_executed.sum inst 787,733 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,921.98 smsp__cycles_active.sum cycle 4,667,007 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 183.49 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 160,010.27 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,600.10 smsp__inst_executed.avg inst 132,209.20 smsp__inst_executed.max inst 134,250 smsp__inst_executed.min inst 130,144 smsp__inst_executed.sum inst 8,461,389 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.92 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 smsp__cycles_active.avg cycle 252,547.31 smsp__cycles_active.sum cycle 16,163,028 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.59 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,294.86 smsp__inst_executed.max inst 12,694 smsp__inst_executed.min inst 12,054 smsp__inst_executed.sum inst 786,871 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,596.58 smsp__cycles_active.sum cycle 4,582,181 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.98 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,299 smsp__inst_executed.max inst 12,638 smsp__inst_executed.min inst 11,881 smsp__inst_executed.sum inst 787,136 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,894.47 smsp__cycles_active.sum cycle 4,601,246 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.53 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,309.91 smsp__inst_executed.max inst 12,636 smsp__inst_executed.min inst 11,910 smsp__inst_executed.sum inst 787,834 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,313.89 smsp__cycles_active.sum cycle 4,564,089 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 184.90 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 158,555.84 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,585.56 smsp__inst_executed.avg inst 132,207.33 smsp__inst_executed.max inst 134,301 smsp__inst_executed.min inst 130,116 smsp__inst_executed.sum inst 8,461,269 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.73 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 smsp__cycles_active.avg cycle 252,473.81 smsp__cycles_active.sum cycle 16,158,324 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.98 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,291.09 smsp__inst_executed.max inst 12,593 smsp__inst_executed.min inst 11,856 smsp__inst_executed.sum inst 786,630 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,576.33 smsp__cycles_active.sum cycle 4,516,885 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.18 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,293.56 smsp__inst_executed.max inst 12,684 smsp__inst_executed.min inst 11,908 smsp__inst_executed.sum inst 786,788 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,507.47 smsp__cycles_active.sum cycle 4,576,478 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.73 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,298.97 smsp__inst_executed.max inst 12,689 smsp__inst_executed.min inst 11,912 smsp__inst_executed.sum inst 787,134 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,018.25 smsp__cycles_active.sum cycle 4,545,168 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.72 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,308.84 smsp__inst_executed.max inst 12,686 smsp__inst_executed.min inst 12,079 smsp__inst_executed.sum inst 787,766 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,024.20 smsp__cycles_active.sum cycle 4,609,549 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 185.95 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 157,276.34 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,572.76 smsp__inst_executed.avg inst 132,205.28 smsp__inst_executed.max inst 134,358 smsp__inst_executed.min inst 130,024 smsp__inst_executed.sum inst 8,461,138 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.55 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 smsp__cycles_active.avg cycle 252,593.19 smsp__cycles_active.sum cycle 16,165,964 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.30 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.47 smsp__inst_executed.max inst 12,560 smsp__inst_executed.min inst 12,088 smsp__inst_executed.sum inst 786,526 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,795.55 smsp__cycles_active.sum cycle 4,530,915 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.76 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,290.78 smsp__inst_executed.max inst 12,745 smsp__inst_executed.min inst 11,874 smsp__inst_executed.sum inst 786,610 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,441.03 smsp__cycles_active.sum cycle 4,508,226 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.56 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,293.70 smsp__inst_executed.max inst 12,566 smsp__inst_executed.min inst 12,056 smsp__inst_executed.sum inst 786,797 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,597.62 smsp__cycles_active.sum cycle 4,582,248 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.95 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,299.03 smsp__inst_executed.max inst 12,648 smsp__inst_executed.min inst 11,910 smsp__inst_executed.sum inst 787,138 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,625.34 smsp__cycles_active.sum cycle 4,520,022 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.53 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,308.55 smsp__inst_executed.max inst 12,690 smsp__inst_executed.min inst 12,090 smsp__inst_executed.sum inst 787,747 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,911.34 smsp__cycles_active.sum cycle 4,602,326 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 184.93 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 159,654.44 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,596.54 smsp__inst_executed.avg inst 132,204.97 smsp__inst_executed.max inst 134,424 smsp__inst_executed.min inst 129,985 smsp__inst_executed.sum inst 8,461,118 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.86 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 smsp__cycles_active.avg cycle 252,486.12 smsp__cycles_active.sum cycle 16,159,112 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.24 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.47 smsp__inst_executed.max inst 12,834 smsp__inst_executed.min inst 11,932 smsp__inst_executed.sum inst 786,526 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,577.20 smsp__cycles_active.sum cycle 4,516,941 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.24 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.45 smsp__inst_executed.max inst 12,702 smsp__inst_executed.min inst 11,912 smsp__inst_executed.sum inst 786,525 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,559.42 smsp__cycles_active.sum cycle 4,579,803 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.02 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,291.50 smsp__inst_executed.max inst 12,638 smsp__inst_executed.min inst 12,088 smsp__inst_executed.sum inst 786,656 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,387.86 smsp__cycles_active.sum cycle 4,568,823 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.56 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,293.50 smsp__inst_executed.max inst 12,785 smsp__inst_executed.min inst 11,630 smsp__inst_executed.sum inst 786,784 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,530.47 smsp__cycles_active.sum cycle 4,577,950 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.98 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,298.41 smsp__inst_executed.max inst 12,716 smsp__inst_executed.min inst 11,883 smsp__inst_executed.sum inst 787,098 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,755.27 smsp__cycles_active.sum cycle 4,528,337 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.53 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,310.11 smsp__inst_executed.max inst 12,496 smsp__inst_executed.min inst 11,901 smsp__inst_executed.sum inst 787,847 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,085.02 smsp__cycles_active.sum cycle 4,613,441 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 185.02 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 158,201.12 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,582.01 smsp__inst_executed.avg inst 132,195.78 smsp__inst_executed.max inst 134,319 smsp__inst_executed.min inst 130,101 smsp__inst_executed.sum inst 8,460,530 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.71 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 smsp__cycles_active.avg cycle 251,923.50 smsp__cycles_active.sum cycle 16,123,104 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.66 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.02 smsp__inst_executed.max inst 12,668 smsp__inst_executed.min inst 11,912 smsp__inst_executed.sum inst 786,497 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,635.66 smsp__cycles_active.sum cycle 4,648,682 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.37 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.31 smsp__inst_executed.max inst 12,508 smsp__inst_executed.min inst 11,924 smsp__inst_executed.sum inst 786,452 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,858 smsp__cycles_active.sum cycle 4,534,912 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.30 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.89 smsp__inst_executed.max inst 12,659 smsp__inst_executed.min inst 11,942 smsp__inst_executed.sum inst 786,489 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,747.28 smsp__cycles_active.sum cycle 4,527,826 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.98 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,291.03 smsp__inst_executed.max inst 12,683 smsp__inst_executed.min inst 11,982 smsp__inst_executed.sum inst 786,626 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,881.03 smsp__cycles_active.sum cycle 4,536,386 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.43 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,293.31 smsp__inst_executed.max inst 12,752 smsp__inst_executed.min inst 11,612 smsp__inst_executed.sum inst 786,772 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,166.55 smsp__cycles_active.sum cycle 4,554,659 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.05 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,298.28 smsp__inst_executed.max inst 12,667 smsp__inst_executed.min inst 11,870 smsp__inst_executed.sum inst 787,090 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,893.25 smsp__cycles_active.sum cycle 4,537,168 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.43 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,311.05 smsp__inst_executed.max inst 12,751 smsp__inst_executed.min inst 12,075 smsp__inst_executed.sum inst 787,907 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,743.69 smsp__cycles_active.sum cycle 4,591,596 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 185.66 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 161,553.58 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,615.54 smsp__inst_executed.avg inst 132,193.28 smsp__inst_executed.max inst 134,294 smsp__inst_executed.min inst 130,087 smsp__inst_executed.sum inst 8,460,370 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.09 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 smsp__cycles_active.avg cycle 252,649.62 smsp__cycles_active.sum cycle 16,169,576 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.17 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.12 smsp__inst_executed.max inst 12,484 smsp__inst_executed.min inst 12,084 smsp__inst_executed.sum inst 786,504 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,022.83 smsp__cycles_active.sum cycle 4,609,461 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.82 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.45 smsp__inst_executed.max inst 12,672 smsp__inst_executed.min inst 11,900 smsp__inst_executed.sum inst 786,461 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,587.06 smsp__cycles_active.sum cycle 4,581,572 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.53 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.42 smsp__inst_executed.max inst 12,632 smsp__inst_executed.min inst 12,096 smsp__inst_executed.sum inst 786,459 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,582.89 smsp__cycles_active.sum cycle 4,517,305 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.02 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.80 smsp__inst_executed.max inst 12,500 smsp__inst_executed.min inst 11,924 smsp__inst_executed.sum inst 786,483 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,332.38 smsp__cycles_active.sum cycle 4,565,272 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.92 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,290.08 smsp__inst_executed.max inst 12,636 smsp__inst_executed.min inst 11,868 smsp__inst_executed.sum inst 786,565 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,497.30 smsp__cycles_active.sum cycle 4,575,827 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.43 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,292.33 smsp__inst_executed.max inst 12,709 smsp__inst_executed.min inst 11,780 smsp__inst_executed.sum inst 786,709 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,223.28 smsp__cycles_active.sum cycle 4,622,290 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.08 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,296.56 smsp__inst_executed.max inst 12,676 smsp__inst_executed.min inst 11,885 smsp__inst_executed.sum inst 786,980 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,705.17 smsp__cycles_active.sum cycle 4,525,131 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.72 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,311.58 smsp__inst_executed.max inst 12,710 smsp__inst_executed.min inst 11,851 smsp__inst_executed.sum inst 787,941 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,827.20 smsp__cycles_active.sum cycle 4,596,941 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 185.02 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 159,138.76 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,591.39 smsp__inst_executed.avg inst 132,167.45 smsp__inst_executed.max inst 134,248 smsp__inst_executed.min inst 130,050 smsp__inst_executed.sum inst 8,458,717 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.84 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 smsp__cycles_active.avg cycle 251,922.81 smsp__cycles_active.sum cycle 16,123,060 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.58 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,287.83 smsp__inst_executed.max inst 12,872 smsp__inst_executed.min inst 11,524 smsp__inst_executed.sum inst 786,421 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 73,310.11 smsp__cycles_active.sum cycle 4,691,847 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.10 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.67 smsp__inst_executed.max inst 12,488 smsp__inst_executed.min inst 12,092 smsp__inst_executed.sum inst 786,475 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,120.91 smsp__cycles_active.sum cycle 4,615,738 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.85 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.33 smsp__inst_executed.max inst 12,500 smsp__inst_executed.min inst 11,728 smsp__inst_executed.sum inst 786,453 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,726.75 smsp__cycles_active.sum cycle 4,590,512 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.62 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.86 smsp__inst_executed.max inst 12,522 smsp__inst_executed.min inst 11,924 smsp__inst_executed.sum inst 786,487 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,432.41 smsp__cycles_active.sum cycle 4,571,674 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.18 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.25 smsp__inst_executed.max inst 12,672 smsp__inst_executed.min inst 11,898 smsp__inst_executed.sum inst 786,512 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,698.97 smsp__cycles_active.sum cycle 4,588,734 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.95 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,290.50 smsp__inst_executed.max inst 12,484 smsp__inst_executed.min inst 12,008 smsp__inst_executed.sum inst 786,592 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,943.89 smsp__cycles_active.sum cycle 4,604,409 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.40 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,293.12 smsp__inst_executed.max inst 12,713 smsp__inst_executed.min inst 11,621 smsp__inst_executed.sum inst 786,760 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,649.19 smsp__cycles_active.sum cycle 4,585,548 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.05 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,298.70 smsp__inst_executed.max inst 12,725 smsp__inst_executed.min inst 11,966 smsp__inst_executed.sum inst 787,117 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,102.67 smsp__cycles_active.sum cycle 4,550,571 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.50 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,309.14 smsp__inst_executed.max inst 12,737 smsp__inst_executed.min inst 12,018 smsp__inst_executed.sum inst 787,785 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,724.20 smsp__cycles_active.sum cycle 4,590,349 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 185.76 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 159,061.95 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,590.62 smsp__inst_executed.avg inst 132,213.64 smsp__inst_executed.max inst 134,321 smsp__inst_executed.min inst 130,119 smsp__inst_executed.sum inst 8,461,673 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.80 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 smsp__cycles_active.avg cycle 252,290.12 smsp__cycles_active.sum cycle 16,146,568 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 56.96 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.03 smsp__inst_executed.max inst 12,684 smsp__inst_executed.min inst 12,072 smsp__inst_executed.sum inst 786,434 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 68,948.02 smsp__cycles_active.sum cycle 4,412,673 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.71 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.23 smsp__inst_executed.max inst 12,712 smsp__inst_executed.min inst 11,696 smsp__inst_executed.sum inst 786,447 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,574.20 smsp__cycles_active.sum cycle 4,644,749 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.01 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.03 smsp__inst_executed.max inst 12,668 smsp__inst_executed.min inst 12,068 smsp__inst_executed.sum inst 786,434 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 72,415.67 smsp__cycles_active.sum cycle 4,634,603 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 59.14 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.58 smsp__inst_executed.max inst 12,676 smsp__inst_executed.min inst 11,938 smsp__inst_executed.sum inst 786,469 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,550.84 smsp__cycles_active.sum cycle 4,579,254 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.34 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,288.86 smsp__inst_executed.max inst 12,476 smsp__inst_executed.min inst 12,078 smsp__inst_executed.sum inst 786,487 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,923.06 smsp__cycles_active.sum cycle 4,539,076 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.14 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,289.56 smsp__inst_executed.max inst 12,503 smsp__inst_executed.min inst 11,928 smsp__inst_executed.sum inst 786,532 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,782.67 smsp__cycles_active.sum cycle 4,530,091 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.79 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,290.62 smsp__inst_executed.max inst 12,556 smsp__inst_executed.min inst 12,068 smsp__inst_executed.sum inst 786,600 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,737.78 smsp__cycles_active.sum cycle 4,527,218 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.75 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,294.31 smsp__inst_executed.max inst 12,661 smsp__inst_executed.min inst 11,903 smsp__inst_executed.sum inst 786,836 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,611.56 smsp__cycles_active.sum cycle 4,583,140 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 57.89 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,300.94 smsp__inst_executed.max inst 12,703 smsp__inst_executed.min inst 11,887 smsp__inst_executed.sum inst 787,260 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 70,911.81 smsp__cycles_active.sum cycle 4,538,356 ---------------------------------------------------------------------- --------------- ------------------------------ void interBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 58.11 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 0 smsp__average_warp_latency_issue_stalled_barrier.ratio 0 smsp__inst_executed.avg inst 12,305.08 smsp__inst_executed.max inst 12,731 smsp__inst_executed.min inst 11,780 smsp__inst_executed.sum inst 787,525 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0 smsp__cycles_active.avg cycle 71,733.38 smsp__cycles_active.sum cycle 4,590,936 ---------------------------------------------------------------------- --------------- ------------------------------ void inBlockStep(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ gpu__time_duration.sum usecond 184.58 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0 l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83 l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0 l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0 smsp__average_warp_latency_issue_stalled_barrier.pct % 161,368.32 smsp__average_warp_latency_issue_stalled_barrier.ratio 1,613.68 smsp__inst_executed.avg inst 131,997.05 smsp__inst_executed.max inst 134,093 smsp__inst_executed.min inst 129,868 smsp__inst_executed.sum inst 8,447,811 smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.12 smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21 smsp__cycles_active.avg cycle 251,939.98 smsp__cycles_active.sum cycle 16,124,159 ---------------------------------------------------------------------- --------------- ------------------------------