PDS/homework_2/include/distsort.hpp

334 lines
12 KiB
C++

/*!
* \file
* \brief Distributed sort implementation header
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#ifndef DISTBITONIC_H_
#define DISTBITONIC_H_
#include <vector>
#include <algorithm>
//#include <parallel/algorithm>
#include <cmath>
#include <cstdint>
#if !defined DEBUG
#define NDEBUG
#endif
#include <cassert>
#include "utils.hpp"
extern Timing TfullSort, Texchange, Tminmax, TelbowSort;
/*!
* Enumerator for the different versions of the sorting method
*/
enum class SortMode {
Bubbletonic, //!< The v0.5 of the algorithm where we use a bubble-sort like approach
Bitonic //!< The v1.0 of the algorithm where we use the bitonic data-exchange approach
};
/*
* ============================== Sort utilities ==============================
*/
/*!
* The primary function template of ascending(). It is DISABLED since , it is explicitly specialized
* for each of the \c SortMode
*/
template <SortMode Mode> inline bool ascending(mpi_id_t, [[maybe_unused]] size_t) noexcept = delete;
/*!
* Returns the ascending or descending configuration of the node's sequence based on
* the current node (MPI process) and the depth of the sorting network
*
* @param node [mpi_id_t] The current node (MPI process)
* @return [bool] True if we need ascending configuration, false otherwise
*/
template <> inline
bool ascending<SortMode::Bubbletonic>(mpi_id_t node, [[maybe_unused]] size_t depth) noexcept {
return (node % 2) == 0;
}
/*!
* Returns the ascending or descending configuration of the node's sequence based on
* the current node (MPI process) and the depth of the sorting network
*
* @param node [mpi_id_t] The current node (MPI process)
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
* @return [bool] True if we need ascending configuration, false otherwise
*/
template <> inline
bool ascending<SortMode::Bitonic>(mpi_id_t node, size_t depth) noexcept {
return !(node & (1 << depth));
}
/*!
* The primary function template of partner(). It is DISABLED since , it is explicitly specialized
* for each of the \c SortMode
*/
template <SortMode Mode> inline mpi_id_t partner(mpi_id_t, size_t) noexcept = delete;
/*!
* Returns the node's partner for data exchange during the sorting network iterations
* of Bubbletonic
*
* @param node [mpi_id_t] The current node
* @param step [size_t] The step of the sorting network
* @return [mpi_id_t] The node id of the partner for data exchange
*/
template <> inline
mpi_id_t partner<SortMode::Bubbletonic>(mpi_id_t node, size_t step) noexcept {
//return (node % 2 == step % 2) ? node + 1 : node - 1;
return (((node+step) % 2) == 0) ? node + 1 : node - 1;
}
/*!
* Returns the node's partner for data exchange during the sorting network iterations
* of Bitonic
*
* @param node [mpi_id_t] The current node
* @param step [size_t] The step of the sorting network
* @return [mpi_id_t] The node id of the partner for data exchange
*/
template <> inline
mpi_id_t partner<SortMode::Bitonic>(mpi_id_t node, size_t step) noexcept {
return (node ^ (1 << step));
}
/*!
* The primary function template of keepSmall(). It is DISABLED since , it is explicitly specialized
* for each of the \c SortMode
*/
template<SortMode Mode> inline bool keepSmall(mpi_id_t, mpi_id_t, [[maybe_unused]] size_t) = delete;
/*!
* Predicate to check if a node keeps the small numbers during the bubbletonic sort network exchange.
*
* @param node [mpi_id_t] The node for which we check
* @param partner [mpi_id_t] The partner of the data exchange
* @return [bool] True if the node should keep the small values, false otherwise
*/
template <> inline
bool keepSmall<SortMode::Bubbletonic>(mpi_id_t node, mpi_id_t partner, [[maybe_unused]] size_t depth) {
if (node == partner)
throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n");
return (node < partner);
}
/*!
* Predicate to check if a node keeps the small numbers during the bitonic sort network exchange.
*
* @param node [mpi_id_t] The node for which we check
* @param partner [mpi_id_t] The partner of the data exchange
* @param depth [size_t] The total depth of the sorting network (same for each step for a given network)
* @return [bool] True if the node should keep the small values, false otherwise
*/
template <> inline
bool keepSmall<SortMode::Bitonic>(mpi_id_t node, mpi_id_t partner, size_t depth) {
if (node == partner)
throw std::runtime_error("(keepSmall) Node and Partner can not be the same\n");
return ascending<SortMode::Bitonic>(node, depth) == (node < partner);
}
/*!
* Predicate to check if the node is active in the current iteration of the bubbletonic
* sort exchange.
*
* @param node [mpi_id_t] The node to check
* @param nodes [size_t] The total number of nodes
* @return [bool] True if the node is active, false otherwise
*/
bool isActive(mpi_id_t node, size_t nodes);
/*
* ============================== Data utilities ==============================
*/
/*!
* Sort a range using the build-in O(Nlog(N)) algorithm
*
* @tparam RangeT A range type with random access iterator
*
* @param data [RangeT] The data to be sorted
* @param ascending [bool] Flag to indicate the sorting order
*/
template<typename RangeT>
void fullSort(RangeT& data, bool ascending) noexcept {
// Use introsort from stdlib++ here, unless ... __gnu_parallel
if (ascending) {
std::sort(data.begin(), data.end(), std::less<>());
}
else {
std::sort(data.begin(), data.end(), std::greater<>());
}
}
/*!
* Core functionality of sort for shadowed buffer types using
* the "elbow sort" algorithm.
*
* @note:
* This algorithm can not work "in place".
* We use the active buffer as source and the shadow as target.
* At the end we switch which buffer is active and which is the shadow.
* @note
* This is the core functionality. Use the elbowSort() function instead
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
* @tparam CompT A Comparison type for binary operation comparisons
*
* @param data [ShadowedDataT] The data to sort
* @param ascending [bool] Flag to indicate the sorting order
* @param comp [CompT] The binary operator object
*/
template<typename ShadowedDataT, typename CompT>
void elbowSortCore(ShadowedDataT& data, bool ascending, CompT comp) noexcept {
auto& active = data.getActive(); // Get the source vector (the data to sort)
auto& shadow = data.getShadow(); // Get the target vector (the sorted data)
size_t N = data.size(); // The total size is the same or both vectors
size_t left = std::distance(
active.begin(),
(ascending) ?
std::min_element(active.begin(), active.end()) :
std::max_element(active.begin(), active.end())
); // start 'left' from elbow of the bitonic
size_t right = (left == N-1) ? 0 : left + 1;
// Walk in opposite directions from elbow and insert-sort to target vector
for (size_t i = 0 ; i<N ; ++i) {
if (comp(active[left], active[right])) {
shadow[i] = active[left];
left = (left == 0) ? N-1 : left -1; // cycle decrease
}
else {
shadow[i] = active[right];
right = (right + 1) % N; // cycle increase
}
}
data.switch_active(); // Switch active-shadow buffers
}
/*!
* Sort a shadowed buffer using the "elbow sort" algorithm.
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
*
* @param data [ShadowedDataT] The data to sort
* @param ascending [bool] Flag to indicate the sorting order
*/
template<typename ShadowedDataT>
void elbowSort(ShadowedDataT& data, bool ascending) noexcept {
if (ascending)
elbowSortCore(data, ascending, std::less<>());
else
elbowSortCore(data, ascending, std::greater<>());
}
/*!
* Takes two sorted sequences where one is in increasing and the other is in decreasing order
* and selects either the larger or the smaller items in one-to-one comparison between them.
* The result is a bitonic sequence.
*
* @tparam RangeT A range type with random access iterator
*
* @param local [RangeT] Reference to the local sequence
* @param remote [const RangeT] Reference to the remote sequence (copied locally by MPI)
* @param keepSmall [bool] Flag to indicate if we keep the small items in local sequence
*/
template<typename RangeT>
void minmax(RangeT& local, const RangeT& remote, bool keepSmall) noexcept {
using value_t = typename RangeT::value_type;
std::transform(
local.begin(), local.end(),
remote.begin(),
local.begin(),
[&keepSmall](const value_t& a, const value_t& b){
return (keepSmall) ? std::min(a, b) : std::max(a, b);
});
}
/*
* ============================== Sort algorithms ==============================
*/
/*!
* A distributed version of the Bubbletonic sort algorithm.
*
* @note
* Each MPI process should run an instance of this function.
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
*
* @param data [ShadowedDataT] The local to MPI process data to sort
* @param Processes [mpi_id_t] The total number of MPI processes
* @param rank [mpi_id_t] The current process id
*/
template<typename ShadowedDataT>
void distBubbletonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
// Initially sort to create a half part of a bitonic sequence
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bubbletonic>(rank, 0));
// Sort network (O(N) iterations)
for (size_t step = 0; step < static_cast<size_t>(Processes); ++step) {
// Find out exchange configuration
auto part = partner<SortMode::Bubbletonic>(rank, step);
auto ks = keepSmall<SortMode::Bubbletonic>(rank, part, Processes);
if ( isActive(rank, Processes) &&
isActive(part, Processes) ) {
// Exchange with partner, keep nim-or-max and sort - O(N)
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, step);
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks);
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bubbletonic>(rank, Processes));
}
}
// Invert if the node was descending.
if (!ascending<SortMode::Bubbletonic>(rank, 0))
elbowSort(data, true);
}
/*!
* A distributed version of the Bitonic sort algorithm.
*
* @note
* Each MPI process should run an instance of this function.
*
* @tparam ShadowedDataT A Shadowed buffer type with random access iterator.
*
* @param data [ShadowedDataT] The local to MPI process data to sort
* @param Processes [mpi_id_t] The total number of MPI processes
* @param rank [mpi_id_t] The current process id
*/
template<typename ShadowedDataT>
void distBitonic(ShadowedDataT& data, mpi_id_t Processes, mpi_id_t rank) {
// Initially sort to create a half part of a bitonic sequence
timeCall(TfullSort, fullSort, data, ascending<SortMode::Bitonic>(rank, 0));
// Run through sort network using elbow-sort ( O(LogN * LogN) iterations )
auto p = static_cast<uint32_t>(std::log2(Processes));
for (size_t depth = 1; depth <= p; ++depth) {
for (size_t step = depth; step > 0;) {
--step;
// Find out exchange configuration
auto part = partner<SortMode::Bitonic>(rank, step);
auto ks = keepSmall<SortMode::Bitonic>(rank, part, depth);
// Exchange with partner, keep nim-or-max
timeCall(Texchange, mpi.exchange, data.getActive(), data.getShadow(), part, (depth << 8) | step);
timeCall(Tminmax, minmax, data.getActive(), data.getShadow(), ks);
}
// sort - O(N)
timeCall(TelbowSort, elbowSort, data, ascending<SortMode::Bitonic>(rank, depth));
}
}
#endif //DISTBITONIC_H_