1918 lines
212 KiB
Groff
1918 lines
212 KiB
Groff
==PROF== Connected to process 19677 (/home/hoo2/Work/AUTH/PDS/homework_3/out/v1/bitonicCUDA)
|
|
==PROF== Profiling "prephase" - 1: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 2: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "inBlockStep" - 3: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 4: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 5: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "inBlockStep" - 6: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 7: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 8: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 9: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "inBlockStep" - 10: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 11: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 12: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 13: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 14: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "inBlockStep" - 15: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 16: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 17: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 18: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 19: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 20: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "inBlockStep" - 21: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 22: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 23: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 24: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 25: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 26: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 27: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "inBlockStep" - 28: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 29: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 30: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 31: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 32: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 33: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 34: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 35: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "inBlockStep" - 36: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 37: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 38: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 39: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 40: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 41: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 42: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 43: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 44: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "inBlockStep" - 45: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 46: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 47: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 48: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 49: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 50: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 51: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 52: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 53: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 54: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "inBlockStep" - 55: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 56: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 57: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 58: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 59: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 60: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 61: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 62: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 63: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 64: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "interBlockStep" - 65: 0%....50%....100% - 5 passes
|
|
==PROF== Profiling "inBlockStep" - 66: 0%....50%....100% - 5 passes
|
|
==PROF== Disconnected from process 19677
|
|
[19677] bitonicCUDA@127.0.0.1
|
|
void prephase<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum msecond 1.06
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.22
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.91
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 1,054,215.71
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 10,542.16
|
|
smsp__inst_executed.avg inst 770,278.16
|
|
smsp__inst_executed.max inst 770,517
|
|
smsp__inst_executed.min inst 770,078
|
|
smsp__inst_executed.sum inst 49,297,802
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 23.29
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.23
|
|
smsp__cycles_active.avg cycle 1,464,763.30
|
|
smsp__cycles_active.sum cycle 93,744,851
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.59
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,434.70
|
|
smsp__inst_executed.max inst 12,627
|
|
smsp__inst_executed.min inst 12,202
|
|
smsp__inst_executed.sum inst 795,821
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,275.05
|
|
smsp__cycles_active.sum cycle 4,625,603
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:57, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 185.54
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 160,167.14
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,601.67
|
|
smsp__inst_executed.avg inst 132,203.41
|
|
smsp__inst_executed.max inst 134,386
|
|
smsp__inst_executed.min inst 130,079
|
|
smsp__inst_executed.sum inst 8,461,018
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.86
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 253,150.12
|
|
smsp__cycles_active.sum cycle 16,201,608
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.34
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,300.38
|
|
smsp__inst_executed.max inst 12,564
|
|
smsp__inst_executed.min inst 12,036
|
|
smsp__inst_executed.sum inst 787,224
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,561.48
|
|
smsp__cycles_active.sum cycle 4,579,935
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.62
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.33
|
|
smsp__inst_executed.max inst 12,555
|
|
smsp__inst_executed.min inst 12,038
|
|
smsp__inst_executed.sum inst 787,733
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,921.98
|
|
smsp__cycles_active.sum cycle 4,667,007
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 183.49
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 160,010.27
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,600.10
|
|
smsp__inst_executed.avg inst 132,209.20
|
|
smsp__inst_executed.max inst 134,250
|
|
smsp__inst_executed.min inst 130,144
|
|
smsp__inst_executed.sum inst 8,461,389
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.92
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,547.31
|
|
smsp__cycles_active.sum cycle 16,163,028
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.59
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,294.86
|
|
smsp__inst_executed.max inst 12,694
|
|
smsp__inst_executed.min inst 12,054
|
|
smsp__inst_executed.sum inst 786,871
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,596.58
|
|
smsp__cycles_active.sum cycle 4,582,181
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.98
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,299
|
|
smsp__inst_executed.max inst 12,638
|
|
smsp__inst_executed.min inst 11,881
|
|
smsp__inst_executed.sum inst 787,136
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,894.47
|
|
smsp__cycles_active.sum cycle 4,601,246
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.53
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.91
|
|
smsp__inst_executed.max inst 12,636
|
|
smsp__inst_executed.min inst 11,910
|
|
smsp__inst_executed.sum inst 787,834
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,313.89
|
|
smsp__cycles_active.sum cycle 4,564,089
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 184.90
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 158,555.84
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,585.56
|
|
smsp__inst_executed.avg inst 132,207.33
|
|
smsp__inst_executed.max inst 134,301
|
|
smsp__inst_executed.min inst 130,116
|
|
smsp__inst_executed.sum inst 8,461,269
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.73
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,473.81
|
|
smsp__cycles_active.sum cycle 16,158,324
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.98
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,291.09
|
|
smsp__inst_executed.max inst 12,593
|
|
smsp__inst_executed.min inst 11,856
|
|
smsp__inst_executed.sum inst 786,630
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,576.33
|
|
smsp__cycles_active.sum cycle 4,516,885
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.18
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.56
|
|
smsp__inst_executed.max inst 12,684
|
|
smsp__inst_executed.min inst 11,908
|
|
smsp__inst_executed.sum inst 786,788
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,507.47
|
|
smsp__cycles_active.sum cycle 4,576,478
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.73
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.97
|
|
smsp__inst_executed.max inst 12,689
|
|
smsp__inst_executed.min inst 11,912
|
|
smsp__inst_executed.sum inst 787,134
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,018.25
|
|
smsp__cycles_active.sum cycle 4,545,168
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:58, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.72
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.84
|
|
smsp__inst_executed.max inst 12,686
|
|
smsp__inst_executed.min inst 12,079
|
|
smsp__inst_executed.sum inst 787,766
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,024.20
|
|
smsp__cycles_active.sum cycle 4,609,549
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 185.95
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 157,276.34
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,572.76
|
|
smsp__inst_executed.avg inst 132,205.28
|
|
smsp__inst_executed.max inst 134,358
|
|
smsp__inst_executed.min inst 130,024
|
|
smsp__inst_executed.sum inst 8,461,138
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.55
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,593.19
|
|
smsp__cycles_active.sum cycle 16,165,964
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.30
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.47
|
|
smsp__inst_executed.max inst 12,560
|
|
smsp__inst_executed.min inst 12,088
|
|
smsp__inst_executed.sum inst 786,526
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,795.55
|
|
smsp__cycles_active.sum cycle 4,530,915
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.76
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.78
|
|
smsp__inst_executed.max inst 12,745
|
|
smsp__inst_executed.min inst 11,874
|
|
smsp__inst_executed.sum inst 786,610
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,441.03
|
|
smsp__cycles_active.sum cycle 4,508,226
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.56
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.70
|
|
smsp__inst_executed.max inst 12,566
|
|
smsp__inst_executed.min inst 12,056
|
|
smsp__inst_executed.sum inst 786,797
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,597.62
|
|
smsp__cycles_active.sum cycle 4,582,248
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.95
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,299.03
|
|
smsp__inst_executed.max inst 12,648
|
|
smsp__inst_executed.min inst 11,910
|
|
smsp__inst_executed.sum inst 787,138
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,625.34
|
|
smsp__cycles_active.sum cycle 4,520,022
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.53
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,308.55
|
|
smsp__inst_executed.max inst 12,690
|
|
smsp__inst_executed.min inst 12,090
|
|
smsp__inst_executed.sum inst 787,747
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,911.34
|
|
smsp__cycles_active.sum cycle 4,602,326
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 184.93
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 159,654.44
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,596.54
|
|
smsp__inst_executed.avg inst 132,204.97
|
|
smsp__inst_executed.max inst 134,424
|
|
smsp__inst_executed.min inst 129,985
|
|
smsp__inst_executed.sum inst 8,461,118
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.86
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,486.12
|
|
smsp__cycles_active.sum cycle 16,159,112
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.24
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.47
|
|
smsp__inst_executed.max inst 12,834
|
|
smsp__inst_executed.min inst 11,932
|
|
smsp__inst_executed.sum inst 786,526
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,577.20
|
|
smsp__cycles_active.sum cycle 4,516,941
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.24
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.45
|
|
smsp__inst_executed.max inst 12,702
|
|
smsp__inst_executed.min inst 11,912
|
|
smsp__inst_executed.sum inst 786,525
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,559.42
|
|
smsp__cycles_active.sum cycle 4,579,803
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.02
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,291.50
|
|
smsp__inst_executed.max inst 12,638
|
|
smsp__inst_executed.min inst 12,088
|
|
smsp__inst_executed.sum inst 786,656
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,387.86
|
|
smsp__cycles_active.sum cycle 4,568,823
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:35:59, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.56
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.50
|
|
smsp__inst_executed.max inst 12,785
|
|
smsp__inst_executed.min inst 11,630
|
|
smsp__inst_executed.sum inst 786,784
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,530.47
|
|
smsp__cycles_active.sum cycle 4,577,950
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.98
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.41
|
|
smsp__inst_executed.max inst 12,716
|
|
smsp__inst_executed.min inst 11,883
|
|
smsp__inst_executed.sum inst 787,098
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,755.27
|
|
smsp__cycles_active.sum cycle 4,528,337
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.53
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,310.11
|
|
smsp__inst_executed.max inst 12,496
|
|
smsp__inst_executed.min inst 11,901
|
|
smsp__inst_executed.sum inst 787,847
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,085.02
|
|
smsp__cycles_active.sum cycle 4,613,441
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 185.02
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 158,201.12
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,582.01
|
|
smsp__inst_executed.avg inst 132,195.78
|
|
smsp__inst_executed.max inst 134,319
|
|
smsp__inst_executed.min inst 130,101
|
|
smsp__inst_executed.sum inst 8,460,530
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.71
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 251,923.50
|
|
smsp__cycles_active.sum cycle 16,123,104
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.66
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.02
|
|
smsp__inst_executed.max inst 12,668
|
|
smsp__inst_executed.min inst 11,912
|
|
smsp__inst_executed.sum inst 786,497
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,635.66
|
|
smsp__cycles_active.sum cycle 4,648,682
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.37
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.31
|
|
smsp__inst_executed.max inst 12,508
|
|
smsp__inst_executed.min inst 11,924
|
|
smsp__inst_executed.sum inst 786,452
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,858
|
|
smsp__cycles_active.sum cycle 4,534,912
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.30
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.89
|
|
smsp__inst_executed.max inst 12,659
|
|
smsp__inst_executed.min inst 11,942
|
|
smsp__inst_executed.sum inst 786,489
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,747.28
|
|
smsp__cycles_active.sum cycle 4,527,826
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.98
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,291.03
|
|
smsp__inst_executed.max inst 12,683
|
|
smsp__inst_executed.min inst 11,982
|
|
smsp__inst_executed.sum inst 786,626
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,881.03
|
|
smsp__cycles_active.sum cycle 4,536,386
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.43
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.31
|
|
smsp__inst_executed.max inst 12,752
|
|
smsp__inst_executed.min inst 11,612
|
|
smsp__inst_executed.sum inst 786,772
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,166.55
|
|
smsp__cycles_active.sum cycle 4,554,659
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.05
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.28
|
|
smsp__inst_executed.max inst 12,667
|
|
smsp__inst_executed.min inst 11,870
|
|
smsp__inst_executed.sum inst 787,090
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,893.25
|
|
smsp__cycles_active.sum cycle 4,537,168
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.43
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,311.05
|
|
smsp__inst_executed.max inst 12,751
|
|
smsp__inst_executed.min inst 12,075
|
|
smsp__inst_executed.sum inst 787,907
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,743.69
|
|
smsp__cycles_active.sum cycle 4,591,596
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 185.66
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 161,553.58
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,615.54
|
|
smsp__inst_executed.avg inst 132,193.28
|
|
smsp__inst_executed.max inst 134,294
|
|
smsp__inst_executed.min inst 130,087
|
|
smsp__inst_executed.sum inst 8,460,370
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.09
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,649.62
|
|
smsp__cycles_active.sum cycle 16,169,576
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:00, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.17
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.12
|
|
smsp__inst_executed.max inst 12,484
|
|
smsp__inst_executed.min inst 12,084
|
|
smsp__inst_executed.sum inst 786,504
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,022.83
|
|
smsp__cycles_active.sum cycle 4,609,461
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.82
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.45
|
|
smsp__inst_executed.max inst 12,672
|
|
smsp__inst_executed.min inst 11,900
|
|
smsp__inst_executed.sum inst 786,461
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,587.06
|
|
smsp__cycles_active.sum cycle 4,581,572
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.53
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.42
|
|
smsp__inst_executed.max inst 12,632
|
|
smsp__inst_executed.min inst 12,096
|
|
smsp__inst_executed.sum inst 786,459
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,582.89
|
|
smsp__cycles_active.sum cycle 4,517,305
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.02
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.80
|
|
smsp__inst_executed.max inst 12,500
|
|
smsp__inst_executed.min inst 11,924
|
|
smsp__inst_executed.sum inst 786,483
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,332.38
|
|
smsp__cycles_active.sum cycle 4,565,272
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.92
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.08
|
|
smsp__inst_executed.max inst 12,636
|
|
smsp__inst_executed.min inst 11,868
|
|
smsp__inst_executed.sum inst 786,565
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,497.30
|
|
smsp__cycles_active.sum cycle 4,575,827
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.43
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,292.33
|
|
smsp__inst_executed.max inst 12,709
|
|
smsp__inst_executed.min inst 11,780
|
|
smsp__inst_executed.sum inst 786,709
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,223.28
|
|
smsp__cycles_active.sum cycle 4,622,290
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.08
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,296.56
|
|
smsp__inst_executed.max inst 12,676
|
|
smsp__inst_executed.min inst 11,885
|
|
smsp__inst_executed.sum inst 786,980
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,705.17
|
|
smsp__cycles_active.sum cycle 4,525,131
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.72
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,311.58
|
|
smsp__inst_executed.max inst 12,710
|
|
smsp__inst_executed.min inst 11,851
|
|
smsp__inst_executed.sum inst 787,941
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,827.20
|
|
smsp__cycles_active.sum cycle 4,596,941
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 185.02
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 159,138.76
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,591.39
|
|
smsp__inst_executed.avg inst 132,167.45
|
|
smsp__inst_executed.max inst 134,248
|
|
smsp__inst_executed.min inst 130,050
|
|
smsp__inst_executed.sum inst 8,458,717
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.84
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 251,922.81
|
|
smsp__cycles_active.sum cycle 16,123,060
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.58
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,287.83
|
|
smsp__inst_executed.max inst 12,872
|
|
smsp__inst_executed.min inst 11,524
|
|
smsp__inst_executed.sum inst 786,421
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 73,310.11
|
|
smsp__cycles_active.sum cycle 4,691,847
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.10
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.67
|
|
smsp__inst_executed.max inst 12,488
|
|
smsp__inst_executed.min inst 12,092
|
|
smsp__inst_executed.sum inst 786,475
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,120.91
|
|
smsp__cycles_active.sum cycle 4,615,738
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:01, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.85
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.33
|
|
smsp__inst_executed.max inst 12,500
|
|
smsp__inst_executed.min inst 11,728
|
|
smsp__inst_executed.sum inst 786,453
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,726.75
|
|
smsp__cycles_active.sum cycle 4,590,512
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.62
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.86
|
|
smsp__inst_executed.max inst 12,522
|
|
smsp__inst_executed.min inst 11,924
|
|
smsp__inst_executed.sum inst 786,487
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,432.41
|
|
smsp__cycles_active.sum cycle 4,571,674
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.18
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.25
|
|
smsp__inst_executed.max inst 12,672
|
|
smsp__inst_executed.min inst 11,898
|
|
smsp__inst_executed.sum inst 786,512
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,698.97
|
|
smsp__cycles_active.sum cycle 4,588,734
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.95
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.50
|
|
smsp__inst_executed.max inst 12,484
|
|
smsp__inst_executed.min inst 12,008
|
|
smsp__inst_executed.sum inst 786,592
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,943.89
|
|
smsp__cycles_active.sum cycle 4,604,409
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.40
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,293.12
|
|
smsp__inst_executed.max inst 12,713
|
|
smsp__inst_executed.min inst 11,621
|
|
smsp__inst_executed.sum inst 786,760
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,649.19
|
|
smsp__cycles_active.sum cycle 4,585,548
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.05
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,298.70
|
|
smsp__inst_executed.max inst 12,725
|
|
smsp__inst_executed.min inst 11,966
|
|
smsp__inst_executed.sum inst 787,117
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,102.67
|
|
smsp__cycles_active.sum cycle 4,550,571
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.50
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,309.14
|
|
smsp__inst_executed.max inst 12,737
|
|
smsp__inst_executed.min inst 12,018
|
|
smsp__inst_executed.sum inst 787,785
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,724.20
|
|
smsp__cycles_active.sum cycle 4,590,349
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 185.76
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 159,061.95
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,590.62
|
|
smsp__inst_executed.avg inst 132,213.64
|
|
smsp__inst_executed.max inst 134,321
|
|
smsp__inst_executed.min inst 130,119
|
|
smsp__inst_executed.sum inst 8,461,673
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 20.80
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 252,290.12
|
|
smsp__cycles_active.sum cycle 16,146,568
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 56.96
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.03
|
|
smsp__inst_executed.max inst 12,684
|
|
smsp__inst_executed.min inst 12,072
|
|
smsp__inst_executed.sum inst 786,434
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 68,948.02
|
|
smsp__cycles_active.sum cycle 4,412,673
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.71
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.23
|
|
smsp__inst_executed.max inst 12,712
|
|
smsp__inst_executed.min inst 11,696
|
|
smsp__inst_executed.sum inst 786,447
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,574.20
|
|
smsp__cycles_active.sum cycle 4,644,749
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.01
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.03
|
|
smsp__inst_executed.max inst 12,668
|
|
smsp__inst_executed.min inst 12,068
|
|
smsp__inst_executed.sum inst 786,434
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 72,415.67
|
|
smsp__cycles_active.sum cycle 4,634,603
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:02, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 59.14
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.58
|
|
smsp__inst_executed.max inst 12,676
|
|
smsp__inst_executed.min inst 11,938
|
|
smsp__inst_executed.sum inst 786,469
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,550.84
|
|
smsp__cycles_active.sum cycle 4,579,254
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.34
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,288.86
|
|
smsp__inst_executed.max inst 12,476
|
|
smsp__inst_executed.min inst 12,078
|
|
smsp__inst_executed.sum inst 786,487
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,923.06
|
|
smsp__cycles_active.sum cycle 4,539,076
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.14
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,289.56
|
|
smsp__inst_executed.max inst 12,503
|
|
smsp__inst_executed.min inst 11,928
|
|
smsp__inst_executed.sum inst 786,532
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,782.67
|
|
smsp__cycles_active.sum cycle 4,530,091
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.79
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,290.62
|
|
smsp__inst_executed.max inst 12,556
|
|
smsp__inst_executed.min inst 12,068
|
|
smsp__inst_executed.sum inst 786,600
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,737.78
|
|
smsp__cycles_active.sum cycle 4,527,218
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.75
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,294.31
|
|
smsp__inst_executed.max inst 12,661
|
|
smsp__inst_executed.min inst 11,903
|
|
smsp__inst_executed.sum inst 786,836
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,611.56
|
|
smsp__cycles_active.sum cycle 4,583,140
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 57.89
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,300.94
|
|
smsp__inst_executed.max inst 12,703
|
|
smsp__inst_executed.min inst 11,887
|
|
smsp__inst_executed.sum inst 787,260
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 70,911.81
|
|
smsp__cycles_active.sum cycle 4,538,356
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void interBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 58.11
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 12.50
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 4
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 0
|
|
smsp__inst_executed.avg inst 12,305.08
|
|
smsp__inst_executed.max inst 12,731
|
|
smsp__inst_executed.min inst 11,780
|
|
smsp__inst_executed.sum inst 787,525
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 0
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0
|
|
smsp__cycles_active.avg cycle 71,733.38
|
|
smsp__cycles_active.sum cycle 4,590,936
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|
|
void inBlockStep<unsigned int>(T1 *, unsigned long, unsigned long, unsigned long), 2025-Feb-16 13:36:03, Context 1, Stream 7
|
|
Section: Command line profiler metrics
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
gpu__time_duration.sum usecond 184.58
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 0
|
|
l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum (!) n/a
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.pct % 10.83
|
|
l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio sector/request 3.47
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_read.sum 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.avg 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.max 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.min 0
|
|
l1tex__data_pipe_lsu_wavefronts_mem_shared_cmd_write.sum 0
|
|
smsp__average_warp_latency_issue_stalled_barrier.pct % 161,368.32
|
|
smsp__average_warp_latency_issue_stalled_barrier.ratio 1,613.68
|
|
smsp__inst_executed.avg inst 131,997.05
|
|
smsp__inst_executed.max inst 134,093
|
|
smsp__inst_executed.min inst 129,868
|
|
smsp__inst_executed.sum inst 8,447,811
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.pct % 21.12
|
|
smsp__warp_issue_stalled_barrier_per_warp_active.ratio 0.21
|
|
smsp__cycles_active.avg cycle 251,939.98
|
|
smsp__cycles_active.sum cycle 16,124,159
|
|
---------------------------------------------------------------------- --------------- ------------------------------
|
|
|