COD/Q6-cache/src/matmul.c
2020-05-17 17:09:48 +03:00

333 lines
12 KiB
C

/*!
\file matmul.c
\brief Matrix multiplication implementation.
\author Nikos Pitsianis
\author Dimitris Floros
\author Christos Choutouridis 8997 <cchoutou@ece.auth.gr>
\date 2020-05-05
*/
#include "matmul.h"
/*!
* Square Matrix multiplication - ijk
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \return none
*/
void matrixMult_ijk(float * const C, float const * const A, float const * const B, int const n) {
for (int i = 0; i < n; ++i)
for (int j = 0; j < n; ++j) {
int k =0;
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
for (k = 1; k < n; ++k)
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
}
}
/*!
* Square Matrix multiplication - ikj
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \return none
*/
void matrixMult_ikj(float * const C, float const * const A, float const * const B, int const n) {
for (int i = 0; i < n; ++i)
for (int k = 0; k < n; ++k) {
if (!k) {
for (int j = 0; j < n; ++j)
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
} else {
for (int j = 0; j < n; ++j)
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
}
}
}
/*!
* Square Matrix multiplication - jik
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \return none
*/
void matrixMult_jik(float * const C, float const * const A, float const * const B, int const n) {
for (int j = 0; j < n; j++)
for (int i = 0; i < n; i++) {
int k =0;
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
for (k = 1; k < n; k++)
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
}
}
/*!
* Square Matrix multiplication - jki
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \return none
*/
void matrixMult_jki(float * const C, float const * const A, float const * const B, int const n) {
for (int j = 0; j < n; ++j)
for (int k = 0; k < n; ++k) {
if (!k) {
for (int i = 0; i < n; ++i)
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
} else {
for (int i = 0; i < n; ++i)
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
}
}
}
/*!
* Square Matrix multiplication - kij
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \return none
*/
void matrixMult_kij(float * const C, float const * const A, float const * const B, int const n) {
for (int k = 0; k < n; ++k) {
if (!k) {
for (int i = 0; i < n; ++i)
for (int j = 0; j < n; ++j)
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
} else {
for (int i = 0; i < n; ++i)
for (int j = 0; j < n; ++j)
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
}
}
}
/*!
* Square Matrix multiplication - kji
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \return none
*/
void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n) {
for (int k = 0; k < n; ++k) {
if (!k) {
for (int j = 0; j < n; ++j)
for (int i = 0; i < n; ++i)
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
} else {
for (int j = 0; j < n; ++j)
for (int i = 0; i < n; ++i)
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
}
}
}
/*!
* Square Matrix multiplication in blocks - ijk
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_ijk_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int I =0; I<n; I +=s)
for (int J =0; J<n; J +=s)
for (int K = 0; K<n; K +=s)
for (int i =0; i<s; ++i)
for (int j =0; j<s; ++j) {
int k =0;
if (K+k == 0)
C[ sub2ind(I+i,J+j,n) ] = 0;
for (k =0; k<s; ++k)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}
/*!
* Square Matrix multiplication in blocks - ikj
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_ikj_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int I =0; I<n; I +=s)
for (int K =0; K<n; K +=s)
for (int J = 0; J<n; J +=s)
for (int i =0; i<s; ++i)
for (int k =0; k<s; ++k) {
if ((k+K) == 0) {
for (int j =0; j<s; ++j)
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
else {
for (int j =0; j<s; ++j)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}
}
/*!
* Square Matrix multiplication in blocks - jik
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_jik_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int J =0; J<n; J +=s)
for (int I =0; I<n; I +=s)
for (int K = 0; K<n; K +=s)
for (int j =0; j<s; ++j)
for (int i =0; i<s; ++i) {
int k =0;
if (K+k == 0)
C[ sub2ind(I+i,J+j,n) ] = 0;
for (k =0; k<s; ++k)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}
/*!
* Square Matrix multiplication in blocks - jki
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_jki_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int J =0; J<n; J +=s)
for (int K =0; K<n; K +=s)
for (int I = 0; I<n; I +=s)
for (int j =0; j<s; ++j)
for (int k =0; k<s; ++k) {
if ((k+K) == 0) {
for (int i =0; i<s; ++i)
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
else {
for (int i =0; i<s; ++i)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
}
}
/*!
* Square Matrix multiplication in blocks - kij
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*
* \warning
* Calling this function will trigger undefined result. There is no initialization of C.
*/
void matrixMult_kij_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int K =0; K<n; K +=s)
for (int I =0; I<n; I +=s)
for (int J = 0; J<n; J +=s)
for (int k =0; k<s; ++k)
for (int i =0; i<s; ++i)
for (int j =0; j<s; ++j)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
/*!
* Square Matrix multiplication in blocks - kji
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*
* \warning
* Calling this function will trigger undefined result. There is no initialization of C.
*/
void matrixMult_kji_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
for (int K =0; K<n; K +=s)
for (int J =0; J<n; J +=s)
for (int I = 0; I<n; I +=s)
for (int k =0; k<s; ++k)
for (int j =0; j<s; ++j)
for (int i =0; i<s; ++i)
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
}
/*!
* Square Matrix multiplication in unrolling block of 8 - ikj
* \param C pointer to output matrix
* \param A pointer to input matrix A
* \param B pointer to input matrix B
* \param n Size of matrices (both sizes)
* \param s The block size
* \return none
*/
void matrixMult_ikj8(float * const C, float const * const A, float const * const B, int const n) {
for (int I =0; I<n; I +=8)
for (int K =0; K<n; K +=8)
for (int J = 0; J<n; J +=8)
for (int i =0; i<8; ++i)
for (int k =0; k<8; ++k) {
if ((k+K) == 0) {
C[ sub2ind(I+i,J+0,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+0,n) ];
C[ sub2ind(I+i,J+1,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+1,n) ];
C[ sub2ind(I+i,J+2,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+2,n) ];
C[ sub2ind(I+i,J+3,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+3,n) ];
C[ sub2ind(I+i,J+4,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+4,n) ];
C[ sub2ind(I+i,J+5,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+5,n) ];
C[ sub2ind(I+i,J+6,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+6,n) ];
C[ sub2ind(I+i,J+7,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+7,n) ];
}
else {
C[ sub2ind(I+i,J+0,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+0,n) ];
C[ sub2ind(I+i,J+1,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+1,n) ];
C[ sub2ind(I+i,J+2,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+2,n) ];
C[ sub2ind(I+i,J+3,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+3,n) ];
C[ sub2ind(I+i,J+4,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+4,n) ];
C[ sub2ind(I+i,J+5,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+5,n) ];
C[ sub2ind(I+i,J+6,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+6,n) ];
C[ sub2ind(I+i,J+7,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+7,n) ];
}
}
}
/*!
* Initialize matrix with random indices and return the matrix pointer.
*
* \param n The size of the matrix (both of them)
* \return Pointer to allocated and initialized matrix
*/
float* matrixInit(int const n) {
float *M = (float *) malloc( n*n*sizeof(float) );
for (int i = 0; i < n; i++) /* rows */
for (int j = 0; j < n; j++) /* cols */
M[ sub2ind(i,j,n) ] = (float)rand()/(float)(RAND_MAX);
return M;
}