Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4d15bd36e3 | |||
| a5ae5c6bdb |
68
homework_3/analyse/RC3-a5ae5c6/Pending-PIDs-ampere
Normal file
68
homework_3/analyse/RC3-a5ae5c6/Pending-PIDs-ampere
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
[cchoutou@aristotle4 homework_3]$ ./hpc/submitJobs.sh
|
||||||
|
Submitting: hpc/BitncV0Q20.sh
|
||||||
|
Submitted batch job 1914925
|
||||||
|
Submitting: hpc/BitncV0Q21.sh
|
||||||
|
Submitted batch job 1914926
|
||||||
|
Submitting: hpc/BitncV0Q22.sh
|
||||||
|
Submitted batch job 1914927
|
||||||
|
Submitting: hpc/BitncV0Q23.sh
|
||||||
|
Submitted batch job 1914928
|
||||||
|
Submitting: hpc/BitncV0Q24.sh
|
||||||
|
Submitted batch job 1914929
|
||||||
|
Submitting: hpc/BitncV0Q25.sh
|
||||||
|
Submitted batch job 1914930
|
||||||
|
Submitting: hpc/BitncV0Q26.sh
|
||||||
|
Submitted batch job 1914931
|
||||||
|
Submitting: hpc/BitncV0Q27.sh
|
||||||
|
Submitted batch job 1914932
|
||||||
|
Submitting: hpc/BitncV0Q28.sh
|
||||||
|
Submitted batch job 1914933
|
||||||
|
Submitting: hpc/BitncV0Q29.sh
|
||||||
|
Submitted batch job 1914934
|
||||||
|
Submitting: hpc/BitncV0Q30.sh
|
||||||
|
Submitted batch job 1914935
|
||||||
|
Submitting: hpc/BitncV1Q20.sh
|
||||||
|
Submitted batch job 1914936
|
||||||
|
Submitting: hpc/BitncV1Q21.sh
|
||||||
|
Submitted batch job 1914937
|
||||||
|
Submitting: hpc/BitncV1Q22.sh
|
||||||
|
Submitted batch job 1914938
|
||||||
|
Submitting: hpc/BitncV1Q23.sh
|
||||||
|
Submitted batch job 1914939
|
||||||
|
Submitting: hpc/BitncV1Q24.sh
|
||||||
|
Submitted batch job 1914940
|
||||||
|
Submitting: hpc/BitncV1Q25.sh
|
||||||
|
Submitted batch job 1914941
|
||||||
|
Submitting: hpc/BitncV1Q26.sh
|
||||||
|
Submitted batch job 1914942
|
||||||
|
Submitting: hpc/BitncV1Q27.sh
|
||||||
|
Submitted batch job 1914943
|
||||||
|
Submitting: hpc/BitncV1Q28.sh
|
||||||
|
Submitted batch job 1914944
|
||||||
|
Submitting: hpc/BitncV1Q29.sh
|
||||||
|
Submitted batch job 1914945
|
||||||
|
Submitting: hpc/BitncV1Q30.sh
|
||||||
|
Submitted batch job 1914946
|
||||||
|
Submitting: hpc/BitncV2Q20.sh
|
||||||
|
Submitted batch job 1914947
|
||||||
|
Submitting: hpc/BitncV2Q21.sh
|
||||||
|
Submitted batch job 1914948
|
||||||
|
Submitting: hpc/BitncV2Q22.sh
|
||||||
|
Submitted batch job 1914949
|
||||||
|
Submitting: hpc/BitncV2Q23.sh
|
||||||
|
Submitted batch job 1914950
|
||||||
|
Submitting: hpc/BitncV2Q24.sh
|
||||||
|
Submitted batch job 1914951
|
||||||
|
Submitting: hpc/BitncV2Q25.sh
|
||||||
|
Submitted batch job 1914952
|
||||||
|
Submitting: hpc/BitncV2Q26.sh
|
||||||
|
Submitted batch job 1914953
|
||||||
|
Submitting: hpc/BitncV2Q27.sh
|
||||||
|
Submitted batch job 1914954
|
||||||
|
Submitting: hpc/BitncV2Q28.sh
|
||||||
|
Submitted batch job 1914955
|
||||||
|
Submitting: hpc/BitncV2Q29.sh
|
||||||
|
Submitted batch job 1914956
|
||||||
|
Submitting: hpc/BitncV2Q30.sh
|
||||||
|
Submitted batch job 1914957
|
||||||
|
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914925.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914925.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V0
|
||||||
|
[Log]: Array size: 1048576 (Q=20)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 8694 [usec]
|
||||||
|
[Timing] Mem-xch : 5435 [usec]
|
||||||
|
[Timing] Sorting : 3159 [usec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914926.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914926.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V0
|
||||||
|
[Log]: Array size: 2097152 (Q=21)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 16 [msec]
|
||||||
|
[Timing] Mem-xch : 12 [msec]
|
||||||
|
[Timing] Sorting : 4093 [usec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914927.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914927.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V0
|
||||||
|
[Log]: Array size: 4194304 (Q=22)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 53 [msec]
|
||||||
|
[Timing] Mem-xch : 46 [msec]
|
||||||
|
[Timing] Sorting : 6080 [usec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914928.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914928.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V0
|
||||||
|
[Log]: Array size: 8388608 (Q=23)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 41 [msec]
|
||||||
|
[Timing] Mem-xch : 27 [msec]
|
||||||
|
[Timing] Sorting : 14 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914929.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914929.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V0
|
||||||
|
[Log]: Array size: 16777216 (Q=24)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 102 [msec]
|
||||||
|
[Timing] Mem-xch : 72 [msec]
|
||||||
|
[Timing] Sorting : 29 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914930.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914930.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V0
|
||||||
|
[Log]: Array size: 33554432 (Q=25)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 95 [msec]
|
||||||
|
[Timing] Mem-xch : 36 [msec]
|
||||||
|
[Timing] Sorting : 59 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914931.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914931.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V0
|
||||||
|
[Log]: Array size: 67108864 (Q=26)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 204 [msec]
|
||||||
|
[Timing] Mem-xch : 84 [msec]
|
||||||
|
[Timing] Sorting : 120 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914932.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914932.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V0
|
||||||
|
[Log]: Array size: 134217728 (Q=27)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 427 [msec]
|
||||||
|
[Timing] Mem-xch : 172 [msec]
|
||||||
|
[Timing] Sorting : 255 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914933.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914933.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V0
|
||||||
|
[Log]: Array size: 268435456 (Q=28)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 953 [msec]
|
||||||
|
[Timing] Mem-xch : 396 [msec]
|
||||||
|
[Timing] Sorting : 552 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914934.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914934.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V0
|
||||||
|
[Log]: Array size: 536870912 (Q=29)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 1859 [msec]
|
||||||
|
[Timing] Mem-xch : 694 [msec]
|
||||||
|
[Timing] Sorting : 1164 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914935.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914935.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V0
|
||||||
|
[Log]: Array size: 1073741824 (Q=30)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 3769 [msec]
|
||||||
|
[Timing] Mem-xch : 1316 [msec]
|
||||||
|
[Timing] Sorting : 2452 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914936.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914936.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V1
|
||||||
|
[Log]: Array size: 1048576 (Q=20)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 3119 [usec]
|
||||||
|
[Timing] Mem-xch : 1708 [usec]
|
||||||
|
[Timing] Sorting : 1412 [usec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914937.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914937.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V1
|
||||||
|
[Log]: Array size: 2097152 (Q=21)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 5227 [usec]
|
||||||
|
[Timing] Mem-xch : 3052 [usec]
|
||||||
|
[Timing] Sorting : 2200 [usec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914938.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914938.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V1
|
||||||
|
[Log]: Array size: 4194304 (Q=22)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 9131 [usec]
|
||||||
|
[Timing] Mem-xch : 5421 [usec]
|
||||||
|
[Timing] Sorting : 3712 [usec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914939.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914939.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V1
|
||||||
|
[Log]: Array size: 8388608 (Q=23)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 21 [msec]
|
||||||
|
[Timing] Mem-xch : 12 [msec]
|
||||||
|
[Timing] Sorting : 8485 [usec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914940.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914940.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V1
|
||||||
|
[Log]: Array size: 16777216 (Q=24)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 34 [msec]
|
||||||
|
[Timing] Mem-xch : 17 [msec]
|
||||||
|
[Timing] Sorting : 17 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914941.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914941.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V1
|
||||||
|
[Log]: Array size: 33554432 (Q=25)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 70 [msec]
|
||||||
|
[Timing] Mem-xch : 33 [msec]
|
||||||
|
[Timing] Sorting : 37 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914942.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914942.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V1
|
||||||
|
[Log]: Array size: 67108864 (Q=26)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 144 [msec]
|
||||||
|
[Timing] Mem-xch : 66 [msec]
|
||||||
|
[Timing] Sorting : 79 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914943.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914943.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V1
|
||||||
|
[Log]: Array size: 134217728 (Q=27)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 329 [msec]
|
||||||
|
[Timing] Mem-xch : 175 [msec]
|
||||||
|
[Timing] Sorting : 153 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914944.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914944.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V1
|
||||||
|
[Log]: Array size: 268435456 (Q=28)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 763 [msec]
|
||||||
|
[Timing] Mem-xch : 408 [msec]
|
||||||
|
[Timing] Sorting : 354 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914945.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914945.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V1
|
||||||
|
[Log]: Array size: 536870912 (Q=29)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 1425 [msec]
|
||||||
|
[Timing] Mem-xch : 685 [msec]
|
||||||
|
[Timing] Sorting : 739 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914946.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914946.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V1
|
||||||
|
[Log]: Array size: 1073741824 (Q=30)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 3201 [msec]
|
||||||
|
[Timing] Mem-xch : 1626 [msec]
|
||||||
|
[Timing] Sorting : 1555 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914947.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914947.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V2
|
||||||
|
[Log]: Array size: 1048576 (Q=20)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 3192 [usec]
|
||||||
|
[Timing] Mem-xch : 1553 [usec]
|
||||||
|
[Timing] Sorting : 1638 [usec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914948.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914948.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V2
|
||||||
|
[Log]: Array size: 2097152 (Q=21)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 5811 [usec]
|
||||||
|
[Timing] Mem-xch : 3228 [usec]
|
||||||
|
[Timing] Sorting : 2629 [usec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914949.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914949.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V2
|
||||||
|
[Log]: Array size: 4194304 (Q=22)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 12 [msec]
|
||||||
|
[Timing] Mem-xch : 7923 [usec]
|
||||||
|
[Timing] Sorting : 4624 [usec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914950.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914950.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V2
|
||||||
|
[Log]: Array size: 8388608 (Q=23)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 18 [msec]
|
||||||
|
[Timing] Mem-xch : 8502 [usec]
|
||||||
|
[Timing] Sorting : 9900 [usec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914951.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914951.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V2
|
||||||
|
[Log]: Array size: 16777216 (Q=24)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 38 [msec]
|
||||||
|
[Timing] Mem-xch : 17 [msec]
|
||||||
|
[Timing] Sorting : 21 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914952.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914952.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V2
|
||||||
|
[Log]: Array size: 33554432 (Q=25)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 73 [msec]
|
||||||
|
[Timing] Mem-xch : 28 [msec]
|
||||||
|
[Timing] Sorting : 44 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914953.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914953.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V2
|
||||||
|
[Log]: Array size: 67108864 (Q=26)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 163 [msec]
|
||||||
|
[Timing] Mem-xch : 80 [msec]
|
||||||
|
[Timing] Sorting : 82 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914954.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914954.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V2
|
||||||
|
[Log]: Array size: 134217728 (Q=27)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 479 [msec]
|
||||||
|
[Timing] Mem-xch : 301 [msec]
|
||||||
|
[Timing] Sorting : 178 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914955.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914955.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V2
|
||||||
|
[Log]: Array size: 268435456 (Q=28)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 736 [msec]
|
||||||
|
[Timing] Mem-xch : 328 [msec]
|
||||||
|
[Timing] Sorting : 410 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914956.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914956.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V2
|
||||||
|
[Log]: Array size: 536870912 (Q=29)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 1595 [msec]
|
||||||
|
[Timing] Mem-xch : 738 [msec]
|
||||||
|
[Timing] Sorting : 857 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914957.out
Normal file
23
homework_3/analyse/RC3-a5ae5c6/ampere/slurm-1914957.out
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[Log]: Code version: V2
|
||||||
|
[Log]: Array size: 1073741824 (Q=30)
|
||||||
|
[Log]: Repeated sorts: 7
|
||||||
|
[Log]: GPU: NVIDIA A100-SXM4-40GB
|
||||||
|
[Log]: Block size: 512
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Log]: Initialize array ... Done.
|
||||||
|
[Log]: Start sorting ... Done.
|
||||||
|
[Timing] Total : 3237 [msec]
|
||||||
|
[Timing] Mem-xch : 1443 [msec]
|
||||||
|
[Timing] Sorting : 1793 [msec]
|
||||||
|
[Validation] Results validation ...[32m [PASSED] [0m
|
||||||
2049
homework_3/analyse/RC3-a5ae5c6/profReportv2_A-regVals.txt
Normal file
2049
homework_3/analyse/RC3-a5ae5c6/profReportv2_A-regVals.txt
Normal file
File diff suppressed because it is too large
Load Diff
2049
homework_3/analyse/RC3-a5ae5c6/profReportv2_B-CodeRefactor.txt
Normal file
2049
homework_3/analyse/RC3-a5ae5c6/profReportv2_B-CodeRefactor.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -112,8 +112,8 @@ template <typename ValueT>
|
|||||||
__device__ void exchange(ValueT* data, threadId_t tid, threadId_t partner, bool keepSmall) {
|
__device__ void exchange(ValueT* data, threadId_t tid, threadId_t partner, bool keepSmall) {
|
||||||
if (( keepSmall && (data[tid] > data[partner])) ||
|
if (( keepSmall && (data[tid] > data[partner])) ||
|
||||||
(!keepSmall && (data[tid] < data[partner])) ) {
|
(!keepSmall && (data[tid] < data[partner])) ) {
|
||||||
ValueT temp = data[tid];
|
ValueT temp = data[tid];
|
||||||
data[tid] = data[partner];
|
data[tid] = data[partner];
|
||||||
data[partner] = temp;
|
data[partner] = temp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -378,6 +378,7 @@ __global__ void interBlockStep(ValueT* data, size_t n, size_t step, size_t stage
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* This is unrolled part of the bitonic double loop.
|
* This is unrolled part of the bitonic double loop.
|
||||||
*
|
*
|
||||||
@ -399,42 +400,44 @@ __global__ void inBlockStep(ValueT* data, size_t n, size_t innerSteps, size_t st
|
|||||||
* Here we skip blocks every time (one for SizeToThreadsRatio = 2)
|
* Here we skip blocks every time (one for SizeToThreadsRatio = 2)
|
||||||
* And we cache the neighbor block address indexes in local (shared) memory
|
* And we cache the neighbor block address indexes in local (shared) memory
|
||||||
*/
|
*/
|
||||||
threadId_t gIdx0 = threadIdx.x + SizeToThreadsRatio * blockIdx.x * blockDim.x;
|
threadId_t gIdx = threadIdx.x + SizeToThreadsRatio * blockIdx.x * blockDim.x;
|
||||||
threadId_t lIdx0 = toLocal(gIdx0, blockDim.x);
|
threadId_t lIdx = toLocal(gIdx, blockDim.x);
|
||||||
|
|
||||||
if (gIdx0 + blockDim.x >= n) // Boundary check
|
if (gIdx + blockDim.x >= n) // Boundary check
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Fetch to local memory the entire effective block size (2 positions for each thread)
|
// Fetch to local memory the entire effective block size (2 positions for each thread)
|
||||||
shared_data[lIdx0] = data[gIdx0];
|
shared_data[lIdx] = data[gIdx];
|
||||||
shared_data[lIdx0 + blockDim.x] = data[gIdx0 + blockDim.x];
|
shared_data[lIdx + blockDim.x] = data[gIdx + blockDim.x];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
for (size_t step = innerSteps + 1; step > 0; ) {
|
for (size_t step = innerSteps + 1; step > 0; ) {
|
||||||
--step;
|
--step;
|
||||||
|
|
||||||
// Init thread global and local indices
|
|
||||||
threadId_t gIdx = gIdx0;
|
|
||||||
threadId_t lIdx = lIdx0;
|
|
||||||
// Find partner and keep-small configuration based on the global data positions
|
// Find partner and keep-small configuration based on the global data positions
|
||||||
threadId_t pIdx = partner(gIdx, step);
|
threadId_t pIdx = partner(gIdx, step);
|
||||||
if (gIdx > pIdx) {
|
if (gIdx > pIdx) {
|
||||||
// Shift inside effective block
|
// Work on the right site
|
||||||
gIdx += blockDim.x; // global
|
bool keep = keepSmall(gIdx + blockDim.x, pIdx + blockDim.x, stage);
|
||||||
pIdx += blockDim.x;
|
|
||||||
lIdx += blockDim.x; // local
|
|
||||||
}
|
|
||||||
bool keep = keepSmall(gIdx, pIdx, stage);
|
|
||||||
|
|
||||||
// Exchange data on local(shared) copy
|
// Exchange data on local(shared) copy
|
||||||
threadId_t lpIdx = toLocal(pIdx, blockDim.x);
|
threadId_t lpIdx = toLocal(pIdx + blockDim.x, blockDim.x);
|
||||||
exchange(shared_data, lIdx, lpIdx, keep);
|
exchange(shared_data, lIdx + blockDim.x, lpIdx, keep);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Work on the left site
|
||||||
|
bool keep = keepSmall(gIdx, pIdx, stage);
|
||||||
|
|
||||||
|
// Exchange data on local(shared) copy
|
||||||
|
threadId_t lpIdx = toLocal(pIdx, blockDim.x);
|
||||||
|
exchange(shared_data, lIdx, lpIdx, keep);
|
||||||
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write back to global memory (no sync here, there will be sync from host)
|
// Write back to global memory (no sync here, there will be sync from host)
|
||||||
data[gIdx0] = shared_data[lIdx0];
|
data[gIdx] = shared_data[lIdx];
|
||||||
data[gIdx0 + blockDim.x] = shared_data[lIdx0 + blockDim.x];
|
data[gIdx + blockDim.x] = shared_data[lIdx + blockDim.x];
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@ -459,6 +462,59 @@ __global__ void prephase(ValueT* data, size_t n, size_t stages, size_t maxStages
|
|||||||
* Here we skip blocks every time (one for SizeToThreadsRatio = 2)
|
* Here we skip blocks every time (one for SizeToThreadsRatio = 2)
|
||||||
* And we cache the neighbor block address indexes in local (shared) memory
|
* And we cache the neighbor block address indexes in local (shared) memory
|
||||||
*/
|
*/
|
||||||
|
threadId_t gIdx = threadIdx.x + SizeToThreadsRatio * blockIdx.x * blockDim.x;
|
||||||
|
threadId_t lIdx = toLocal(gIdx, blockDim.x);
|
||||||
|
|
||||||
|
if (gIdx + blockDim.x >= n) // Boundary check
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Fetch to local memory the entire effective block size (2 positions for each thread)
|
||||||
|
shared_data[lIdx] = data[gIdx];
|
||||||
|
shared_data[lIdx + blockDim.x] = data[gIdx + blockDim.x];
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
for (size_t stage = 1; (stage <= stages) && (stage <= maxStages); ++stage) {
|
||||||
|
for (size_t step = stage; step > 0; ) {
|
||||||
|
--step;
|
||||||
|
|
||||||
|
// Find partner and keep-small configuration based on the global data positions
|
||||||
|
threadId_t pIdx = partner(gIdx, step);
|
||||||
|
if (gIdx > pIdx) {
|
||||||
|
// Work on the right site
|
||||||
|
bool keep = keepSmall(gIdx + blockDim.x, pIdx + blockDim.x, stage);
|
||||||
|
|
||||||
|
// Exchange data on local(shared) copy
|
||||||
|
threadId_t lpIdx = toLocal(pIdx + blockDim.x, blockDim.x);
|
||||||
|
exchange(shared_data, lIdx + blockDim.x, lpIdx, keep);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Work on the left site
|
||||||
|
bool keep = keepSmall(gIdx, pIdx, stage);
|
||||||
|
|
||||||
|
// Exchange data on local(shared) copy
|
||||||
|
threadId_t lpIdx = toLocal(pIdx, blockDim.x);
|
||||||
|
exchange(shared_data, lIdx, lpIdx, keep);
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write back to global memory (no sync here, there will be sync from host)
|
||||||
|
data[gIdx] = shared_data[lIdx];
|
||||||
|
data[gIdx + blockDim.x] = shared_data[lIdx + blockDim.x];
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/*
|
||||||
|
* Idea:
|
||||||
|
* - Keep a register copy of data[gIdx0], and data[gIdx0 + blockDim.x]
|
||||||
|
* - Instead of exchange in shared_data, read in register the partner and exchange there.
|
||||||
|
* - Write back only if there was an exchange
|
||||||
|
*
|
||||||
|
* ^^
|
||||||
|
* Unfortunately this breaks sequential consistency and register values (lValve) does not match with share_data
|
||||||
|
* or even lValueR0 and lValueL0. Maybe there is something to do with register spilling (lValue keeps spill
|
||||||
|
* on local mem).
|
||||||
|
*/
|
||||||
threadId_t gIdx0 = threadIdx.x + SizeToThreadsRatio * blockIdx.x * blockDim.x;
|
threadId_t gIdx0 = threadIdx.x + SizeToThreadsRatio * blockIdx.x * blockDim.x;
|
||||||
threadId_t lIdx0 = toLocal(gIdx0, blockDim.x);
|
threadId_t lIdx0 = toLocal(gIdx0, blockDim.x);
|
||||||
|
|
||||||
@ -466,29 +522,40 @@ __global__ void prephase(ValueT* data, size_t n, size_t stages, size_t maxStages
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
// Fetch to local memory the entire effective block size (2 positions for each thread)
|
// Fetch to local memory the entire effective block size (2 positions for each thread)
|
||||||
shared_data[lIdx0] = data[gIdx0];
|
// also keep thread's init values (L and R) on register locations
|
||||||
shared_data[lIdx0 + blockDim.x] = data[gIdx0 + blockDim.x];
|
ValueT lValueL0 = data[gIdx0];
|
||||||
|
ValueT lValueR0 = data[gIdx0 + blockDim.x];
|
||||||
|
shared_data[lIdx0] = lValueL0;
|
||||||
|
shared_data[lIdx0 + blockDim.x] = lValueR0;
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
for (size_t stage = 1; (stage <= stages) && (stage <= maxStages); ++stage) {
|
for (size_t stage = 1; (stage <= stages) && (stage <= maxStages); ++stage) {
|
||||||
for (size_t step = stage; step > 0; ) {
|
for (size_t step = stage; step > 0; ) {
|
||||||
--step;
|
--step;
|
||||||
|
|
||||||
// Init thread global and local indices
|
// Init thread global, local indices and active local register value
|
||||||
threadId_t gIdx = gIdx0;
|
threadId_t gIdx = gIdx0;
|
||||||
threadId_t lIdx = lIdx0;
|
threadId_t lIdx = lIdx0;
|
||||||
|
ValueT lValue = lValueL0; // "Me" on the left side of effective block
|
||||||
|
|
||||||
// Find partner and keep-small configuration based on the global data positions
|
// Find partner and keep-small configuration based on the global data positions
|
||||||
threadId_t pIdx = partner(gIdx, step);
|
threadId_t pIdx = partner(gIdx, step);
|
||||||
if (gIdx > pIdx) {
|
if (gIdx > pIdx) {
|
||||||
// Shift inside effective block
|
// Shift inside effective block
|
||||||
gIdx += blockDim.x; // global
|
gIdx += blockDim.x; // global
|
||||||
pIdx += blockDim.x;
|
pIdx += blockDim.x;
|
||||||
lIdx += blockDim.x; // local
|
lIdx += blockDim.x; // local
|
||||||
|
lValue = lValueR0; // The other me (the right side)
|
||||||
}
|
}
|
||||||
bool keep = keepSmall(gIdx, pIdx, stage);
|
bool keep = keepSmall(gIdx, pIdx, stage);
|
||||||
|
|
||||||
// Exchange data on local(shared) copy
|
// Exchange data on local(shared) copy
|
||||||
threadId_t lpIdx = toLocal(pIdx, blockDim.x);
|
threadId_t lpIdx = toLocal(pIdx, blockDim.x);
|
||||||
exchange(shared_data, lIdx, lpIdx, keep);
|
ValueT pValue = shared_data[lpIdx];
|
||||||
|
if (exchangeVals(&lValue, &pValue, keep)) {
|
||||||
|
shared_data[lIdx] = lValue;
|
||||||
|
shared_data[lpIdx] = pValue;
|
||||||
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -496,8 +563,10 @@ __global__ void prephase(ValueT* data, size_t n, size_t stages, size_t maxStages
|
|||||||
// Write back to global memory (no sync here, there will be sync from host)
|
// Write back to global memory (no sync here, there will be sync from host)
|
||||||
data[gIdx0] = shared_data[lIdx0];
|
data[gIdx0] = shared_data[lIdx0];
|
||||||
data[gIdx0 + blockDim.x] = shared_data[lIdx0 + blockDim.x];
|
data[gIdx0 + blockDim.x] = shared_data[lIdx0 + blockDim.x];
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* A CUDA version of the Bitonic sort algorithm.
|
* A CUDA version of the Bitonic sort algorithm.
|
||||||
*
|
*
|
||||||
|
|||||||
@ -16,8 +16,10 @@
|
|||||||
/*
|
/*
|
||||||
* Versioning:
|
* Versioning:
|
||||||
* - RC1: First version to test on HPC
|
* - RC1: First version to test on HPC
|
||||||
|
* - RC2: A pre-phase added for v1 and v2
|
||||||
|
* - RC3:
|
||||||
*/
|
*/
|
||||||
static constexpr char version[] = "0.1";
|
static constexpr char version[] = "0.2";
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Defines for different version of the exercise
|
* Defines for different version of the exercise
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user