PDS/homework_3/reportv2.3

1918 lines
212 KiB
Groff

==PROF== Connected to process 20279 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v2/bitonicCUDA)
==PROF== Profiling "prephase" - 1: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 5 passes
==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 5 passes
==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 5 passes
==PROF== Disconnected from process 20279
[20279] bitonicCUDA@127.0.0.1
void prephase<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum msecond 1.20
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 186,368
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 186,368
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 186,368
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 2,981,888
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 111,946.88
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 112,116
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 111,795
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 1,791,150
smsp__average_warp_latency_issue_stalled_barrier.pct % 644,345.26
smsp__average_warp_latency_issue_stalled_barrier.ratio 6,443.45
smsp__inst_executed.avg inst 1,030,868.94
smsp__inst_executed.max inst 1,031,062
smsp__inst_executed.min inst 1,030,675
smsp__inst_executed.sum inst 65,975,612
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.50
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.12
smsp__cycles_active.avg cycle 1,666,829.12
smsp__cycles_active.sum cycle 106,677,064
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.84
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,308.59
smsp__inst_executed.max inst 12,538
smsp__inst_executed.min inst 11,945
smsp__inst_executed.sum inst 787,750
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 73,268.67
smsp__cycles_active.sum cycle 4,689,195
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 231.30
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,642.38
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,963
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,322
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,278
smsp__average_warp_latency_issue_stalled_barrier.pct % 123,392.55
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,233.93
smsp__inst_executed.avg inst 189,292.45
smsp__inst_executed.max inst 192,372
smsp__inst_executed.min inst 186,246
smsp__inst_executed.sum inst 12,114,717
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.81
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
smsp__cycles_active.avg cycle 316,267.31
smsp__cycles_active.sum cycle 20,241,108
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.34
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,298.58
smsp__inst_executed.max inst 12,667
smsp__inst_executed.min inst 11,936
smsp__inst_executed.sum inst 787,109
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,505.30
smsp__cycles_active.sum cycle 4,512,339
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.55
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,309.17
smsp__inst_executed.max inst 12,702
smsp__inst_executed.min inst 11,606
smsp__inst_executed.sum inst 787,787
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,897.17
smsp__cycles_active.sum cycle 4,665,419
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 230.91
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,680
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,009
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,334
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,880
smsp__average_warp_latency_issue_stalled_barrier.pct % 123,674.16
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,236.74
smsp__inst_executed.avg inst 189,294.36
smsp__inst_executed.max inst 192,238
smsp__inst_executed.min inst 186,252
smsp__inst_executed.sum inst 12,114,839
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.85
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
smsp__cycles_active.avg cycle 316,040.81
smsp__cycles_active.sum cycle 20,226,612
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.72
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,293.78
smsp__inst_executed.max inst 12,542
smsp__inst_executed.min inst 11,960
smsp__inst_executed.sum inst 786,802
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,235.28
smsp__cycles_active.sum cycle 4,559,058
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.56
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,298.95
smsp__inst_executed.max inst 12,560
smsp__inst_executed.min inst 12,096
smsp__inst_executed.sum inst 787,133
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,575.53
smsp__cycles_active.sum cycle 4,516,834
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.42
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,308.61
smsp__inst_executed.max inst 12,640
smsp__inst_executed.min inst 12,096
smsp__inst_executed.sum inst 787,751
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,641.39
smsp__cycles_active.sum cycle 4,649,049
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:48, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 231.87
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,674.75
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,354
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,796
smsp__average_warp_latency_issue_stalled_barrier.pct % 123,483.94
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,234.84
smsp__inst_executed.avg inst 189,288.14
smsp__inst_executed.max inst 192,081
smsp__inst_executed.min inst 186,477
smsp__inst_executed.sum inst 12,114,441
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
smsp__cycles_active.avg cycle 315,433.75
smsp__cycles_active.sum cycle 20,187,760
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.14
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,290.34
smsp__inst_executed.max inst 12,724
smsp__inst_executed.min inst 12,076
smsp__inst_executed.sum inst 786,582
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,402.61
smsp__cycles_active.sum cycle 4,505,767
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.56
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,294.27
smsp__inst_executed.max inst 12,717
smsp__inst_executed.min inst 11,988
smsp__inst_executed.sum inst 786,833
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,681.59
smsp__cycles_active.sum cycle 4,523,622
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.05
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,298.42
smsp__inst_executed.max inst 12,663
smsp__inst_executed.min inst 11,882
smsp__inst_executed.sum inst 787,099
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,688.28
smsp__cycles_active.sum cycle 4,524,050
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.49
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,309.03
smsp__inst_executed.max inst 12,686
smsp__inst_executed.min inst 11,852
smsp__inst_executed.sum inst 787,778
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,892.83
smsp__cycles_active.sum cycle 4,665,141
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 231.33
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,677
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,976
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,331
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,832
smsp__average_warp_latency_issue_stalled_barrier.pct % 123,882.24
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,238.82
smsp__inst_executed.avg inst 189,292.19
smsp__inst_executed.max inst 192,340
smsp__inst_executed.min inst 186,215
smsp__inst_executed.sum inst 12,114,700
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.86
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
smsp__cycles_active.avg cycle 316,203.25
smsp__cycles_active.sum cycle 20,237,008
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.08
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.06
smsp__inst_executed.max inst 12,694
smsp__inst_executed.min inst 11,900
smsp__inst_executed.sum inst 786,500
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,488.72
smsp__cycles_active.sum cycle 4,511,278
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.27
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,291.25
smsp__inst_executed.max inst 12,681
smsp__inst_executed.min inst 12,008
smsp__inst_executed.sum inst 786,640
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,605.89
smsp__cycles_active.sum cycle 4,518,777
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.34
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,292.84
smsp__inst_executed.max inst 12,543
smsp__inst_executed.min inst 11,998
smsp__inst_executed.sum inst 786,742
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,795.58
smsp__cycles_active.sum cycle 4,530,917
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.02
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,299.95
smsp__inst_executed.max inst 12,683
smsp__inst_executed.min inst 11,720
smsp__inst_executed.sum inst 787,197
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,136.48
smsp__cycles_active.sum cycle 4,488,735
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.52
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,309.09
smsp__inst_executed.max inst 12,613
smsp__inst_executed.min inst 11,865
smsp__inst_executed.sum inst 787,782
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,887.53
smsp__cycles_active.sum cycle 4,664,802
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 231.30
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,682.56
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,017
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,315
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,921
smsp__average_warp_latency_issue_stalled_barrier.pct % 124,910.64
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,249.11
smsp__inst_executed.avg inst 189,291.42
smsp__inst_executed.max inst 192,361
smsp__inst_executed.min inst 186,192
smsp__inst_executed.sum inst 12,114,651
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
smsp__cycles_active.avg cycle 316,146.12
smsp__cycles_active.sum cycle 20,233,352
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:49, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 60.03
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.48
smsp__inst_executed.max inst 12,672
smsp__inst_executed.min inst 11,868
smsp__inst_executed.sum inst 786,463
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 73,004.22
smsp__cycles_active.sum cycle 4,672,270
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.08
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.81
smsp__inst_executed.max inst 12,480
smsp__inst_executed.min inst 12,068
smsp__inst_executed.sum inst 786,548
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,790.83
smsp__cycles_active.sum cycle 4,530,613
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.46
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,290.59
smsp__inst_executed.max inst 12,701
smsp__inst_executed.min inst 12,068
smsp__inst_executed.sum inst 786,598
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,847.19
smsp__cycles_active.sum cycle 4,534,220
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.27
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,293.72
smsp__inst_executed.max inst 12,656
smsp__inst_executed.min inst 12,038
smsp__inst_executed.sum inst 786,798
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,747
smsp__cycles_active.sum cycle 4,527,808
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.95
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,298.14
smsp__inst_executed.max inst 12,645
smsp__inst_executed.min inst 12,029
smsp__inst_executed.sum inst 787,081
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,059.03
smsp__cycles_active.sum cycle 4,483,778
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.58
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,308.86
smsp__inst_executed.max inst 12,724
smsp__inst_executed.min inst 11,654
smsp__inst_executed.sum inst 787,767
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,813.80
smsp__cycles_active.sum cycle 4,660,083
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 231.90
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,669.44
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,942
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,386
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,711
smsp__average_warp_latency_issue_stalled_barrier.pct % 125,049.38
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,250.49
smsp__inst_executed.avg inst 189,291.03
smsp__inst_executed.max inst 192,313
smsp__inst_executed.min inst 186,310
smsp__inst_executed.sum inst 12,114,626
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.97
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
smsp__cycles_active.avg cycle 316,608.81
smsp__cycles_active.sum cycle 20,262,964
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.78
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,287.95
smsp__inst_executed.max inst 12,856
smsp__inst_executed.min inst 11,904
smsp__inst_executed.sum inst 786,429
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,331.70
smsp__cycles_active.sum cycle 4,565,229
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.94
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.03
smsp__inst_executed.max inst 12,488
smsp__inst_executed.min inst 11,888
smsp__inst_executed.sum inst 786,434
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 73,232.05
smsp__cycles_active.sum cycle 4,686,851
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.27
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.50
smsp__inst_executed.max inst 12,488
smsp__inst_executed.min inst 12,072
smsp__inst_executed.sum inst 786,528
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,846.25
smsp__cycles_active.sum cycle 4,534,160
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.11
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,290.84
smsp__inst_executed.max inst 12,564
smsp__inst_executed.min inst 12,104
smsp__inst_executed.sum inst 786,614
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,881.05
smsp__cycles_active.sum cycle 4,536,387
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:50, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.40
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,293.73
smsp__inst_executed.max inst 12,757
smsp__inst_executed.min inst 11,970
smsp__inst_executed.sum inst 786,799
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,142.94
smsp__cycles_active.sum cycle 4,553,148
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.95
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,298.62
smsp__inst_executed.max inst 12,553
smsp__inst_executed.min inst 12,119
smsp__inst_executed.sum inst 787,112
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,189.52
smsp__cycles_active.sum cycle 4,492,129
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.71
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,309.52
smsp__inst_executed.max inst 12,538
smsp__inst_executed.min inst 12,074
smsp__inst_executed.sum inst 787,809
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,879.23
smsp__cycles_active.sum cycle 4,664,271
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 231.42
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,673
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,007
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,299
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,768
smsp__average_warp_latency_issue_stalled_barrier.pct % 124,557.10
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,245.57
smsp__inst_executed.avg inst 189,303.22
smsp__inst_executed.max inst 192,317
smsp__inst_executed.min inst 186,277
smsp__inst_executed.sum inst 12,115,406
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.96
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
smsp__cycles_active.avg cycle 315,741.19
smsp__cycles_active.sum cycle 20,207,436
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.40
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,287.92
smsp__inst_executed.max inst 12,648
smsp__inst_executed.min inst 11,912
smsp__inst_executed.sum inst 786,427
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,978.88
smsp__cycles_active.sum cycle 4,606,648
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.62
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.30
smsp__inst_executed.max inst 12,848
smsp__inst_executed.min inst 11,904
smsp__inst_executed.sum inst 786,451
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,708.22
smsp__cycles_active.sum cycle 4,589,326
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 60.19
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.11
smsp__inst_executed.max inst 12,876
smsp__inst_executed.min inst 11,688
smsp__inst_executed.sum inst 786,503
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 73,332.14
smsp__cycles_active.sum cycle 4,693,257
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.50
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.89
smsp__inst_executed.max inst 12,507
smsp__inst_executed.min inst 12,092
smsp__inst_executed.sum inst 786,489
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,441.14
smsp__cycles_active.sum cycle 4,508,233
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.30
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,290.69
smsp__inst_executed.max inst 12,682
smsp__inst_executed.min inst 11,866
smsp__inst_executed.sum inst 786,604
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,768.55
smsp__cycles_active.sum cycle 4,529,187
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.62
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,293.67
smsp__inst_executed.max inst 12,534
smsp__inst_executed.min inst 11,732
smsp__inst_executed.sum inst 786,795
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,007.56
smsp__cycles_active.sum cycle 4,544,484
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.05
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,299.09
smsp__inst_executed.max inst 12,656
smsp__inst_executed.min inst 11,912
smsp__inst_executed.sum inst 787,142
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,781.25
smsp__cycles_active.sum cycle 4,530,000
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:51, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.14
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,309.02
smsp__inst_executed.max inst 12,707
smsp__inst_executed.min inst 11,847
smsp__inst_executed.sum inst 787,777
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,505.88
smsp__cycles_active.sum cycle 4,640,376
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 231.14
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,666.06
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,013
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,348
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,657
smsp__average_warp_latency_issue_stalled_barrier.pct % 124,275.15
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,242.75
smsp__inst_executed.avg inst 189,315.86
smsp__inst_executed.max inst 192,371
smsp__inst_executed.min inst 186,294
smsp__inst_executed.sum inst 12,116,215
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.90
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
smsp__cycles_active.avg cycle 316,297.72
smsp__cycles_active.sum cycle 20,243,054
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 60.42
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.20
smsp__inst_executed.max inst 12,484
smsp__inst_executed.min inst 12,092
smsp__inst_executed.sum inst 786,445
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 74,382.31
smsp__cycles_active.sum cycle 4,760,468
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.88
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.11
smsp__inst_executed.max inst 12,484
smsp__inst_executed.min inst 11,716
smsp__inst_executed.sum inst 786,439
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,860.06
smsp__cycles_active.sum cycle 4,599,044
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.04
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.05
smsp__inst_executed.max inst 12,664
smsp__inst_executed.min inst 11,700
smsp__inst_executed.sum inst 786,435
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,882.38
smsp__cycles_active.sum cycle 4,600,472
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 60.13
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.81
smsp__inst_executed.max inst 12,870
smsp__inst_executed.min inst 11,908
smsp__inst_executed.sum inst 786,484
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 73,247.75
smsp__cycles_active.sum cycle 4,687,856
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.89
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.59
smsp__inst_executed.max inst 12,494
smsp__inst_executed.min inst 11,898
smsp__inst_executed.sum inst 786,534
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,630.66
smsp__cycles_active.sum cycle 4,520,362
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.14
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,291.27
smsp__inst_executed.max inst 12,510
smsp__inst_executed.min inst 12,082
smsp__inst_executed.sum inst 786,641
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,548.77
smsp__cycles_active.sum cycle 4,515,121
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.66
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,294.64
smsp__inst_executed.max inst 12,656
smsp__inst_executed.min inst 11,924
smsp__inst_executed.sum inst 786,857
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,171.45
smsp__cycles_active.sum cycle 4,554,973
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 57.86
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,301.05
smsp__inst_executed.max inst 12,725
smsp__inst_executed.min inst 11,871
smsp__inst_executed.sum inst 787,267
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,490.50
smsp__cycles_active.sum cycle 4,511,392
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.17
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,316.05
smsp__inst_executed.max inst 12,594
smsp__inst_executed.min inst 11,865
smsp__inst_executed.sum inst 788,227
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,533.61
smsp__cycles_active.sum cycle 4,642,151
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 231.55
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,681.88
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 20,120
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,332
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 314,910
smsp__average_warp_latency_issue_stalled_barrier.pct % 123,982.60
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.83
smsp__inst_executed.avg inst 189,283.48
smsp__inst_executed.max inst 192,309
smsp__inst_executed.min inst 186,242
smsp__inst_executed.sum inst 12,114,143
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.88
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
smsp__cycles_active.avg cycle 316,209.50
smsp__cycles_active.sum cycle 20,237,408
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:52, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 56.70
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,287.97
smsp__inst_executed.max inst 12,492
smsp__inst_executed.min inst 11,896
smsp__inst_executed.sum inst 786,430
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 68,714
smsp__cycles_active.sum cycle 4,397,696
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 60.64
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.14
smsp__inst_executed.max inst 12,844
smsp__inst_executed.min inst 11,528
smsp__inst_executed.sum inst 786,441
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 74,171.33
smsp__cycles_active.sum cycle 4,746,965
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.72
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.55
smsp__inst_executed.max inst 12,684
smsp__inst_executed.min inst 11,884
smsp__inst_executed.sum inst 786,467
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,696.42
smsp__cycles_active.sum cycle 4,588,571
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.94
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.08
smsp__inst_executed.max inst 12,660
smsp__inst_executed.min inst 11,724
smsp__inst_executed.sum inst 786,437
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,640.89
smsp__cycles_active.sum cycle 4,585,017
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 60.06
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,288.06
smsp__inst_executed.max inst 12,524
smsp__inst_executed.min inst 11,900
smsp__inst_executed.sum inst 786,436
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 73,132.61
smsp__cycles_active.sum cycle 4,680,487
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.08
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,289.61
smsp__inst_executed.max inst 12,634
smsp__inst_executed.min inst 11,884
smsp__inst_executed.sum inst 786,535
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,620.73
smsp__cycles_active.sum cycle 4,519,727
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.24
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,291.28
smsp__inst_executed.max inst 12,704
smsp__inst_executed.min inst 11,892
smsp__inst_executed.sum inst 786,642
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 71,037.52
smsp__cycles_active.sum cycle 4,546,401
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.82
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,293.23
smsp__inst_executed.max inst 12,931
smsp__inst_executed.min inst 11,840
smsp__inst_executed.sum inst 786,767
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,840.56
smsp__cycles_active.sum cycle 4,533,796
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 58.24
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,298.42
smsp__inst_executed.max inst 12,587
smsp__inst_executed.min inst 11,966
smsp__inst_executed.sum inst 787,099
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 70,543.30
smsp__cycles_active.sum cycle 4,514,771
---------------------------------------------------------------------- --------------- ------------------------------
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 59.39
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
smsp__inst_executed.avg inst 12,309.44
smsp__inst_executed.max inst 12,751
smsp__inst_executed.min inst 11,714
smsp__inst_executed.sum inst 787,804
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
smsp__cycles_active.avg cycle 72,313.14
smsp__cycles_active.sum cycle 4,628,041
---------------------------------------------------------------------- --------------- ------------------------------
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:40:53, Context 1, Stream 7
Section: Command line profiler metrics
---------------------------------------------------------------------- --------------- ------------------------------
gpu__time_duration.sum usecond 228.54
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 32,768
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 33,280
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 32,256
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 524,288
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 19,691.25
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 19,988
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 19,367
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 315,060
smsp__average_warp_latency_issue_stalled_barrier.pct % 123,962.42
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,239.62
smsp__inst_executed.avg inst 189,051.73
smsp__inst_executed.max inst 192,054
smsp__inst_executed.min inst 186,060
smsp__inst_executed.sum inst 12,099,311
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 12.83
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.13
smsp__cycles_active.avg cycle 317,268.88
smsp__cycles_active.sum cycle 20,305,208
---------------------------------------------------------------------- --------------- ------------------------------