333 lines
12 KiB
C
333 lines
12 KiB
C
/*!
|
|
\file matmul.c
|
|
\brief Matrix multiplication implementation.
|
|
|
|
\author Nikos Pitsianis
|
|
\author Dimitris Floros
|
|
\author Christos Choutouridis 8997 <cchoutou@ece.auth.gr>
|
|
\date 2020-05-05
|
|
*/
|
|
#include "matmul.h"
|
|
|
|
/*!
|
|
* Square Matrix multiplication - ijk
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \return none
|
|
*/
|
|
void matrixMult_ijk(float * const C, float const * const A, float const * const B, int const n) {
|
|
for (int i = 0; i < n; ++i)
|
|
for (int j = 0; j < n; ++j) {
|
|
int k =0;
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
for (k = 1; k < n; ++k)
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication - ikj
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \return none
|
|
*/
|
|
void matrixMult_ikj(float * const C, float const * const A, float const * const B, int const n) {
|
|
for (int i = 0; i < n; ++i)
|
|
for (int k = 0; k < n; ++k) {
|
|
if (!k) {
|
|
for (int j = 0; j < n; ++j)
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
} else {
|
|
for (int j = 0; j < n; ++j)
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication - jik
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \return none
|
|
*/
|
|
void matrixMult_jik(float * const C, float const * const A, float const * const B, int const n) {
|
|
for (int j = 0; j < n; j++)
|
|
for (int i = 0; i < n; i++) {
|
|
int k =0;
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
for (k = 1; k < n; k++)
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication - jki
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \return none
|
|
*/
|
|
void matrixMult_jki(float * const C, float const * const A, float const * const B, int const n) {
|
|
for (int j = 0; j < n; ++j)
|
|
for (int k = 0; k < n; ++k) {
|
|
if (!k) {
|
|
for (int i = 0; i < n; ++i)
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
} else {
|
|
for (int i = 0; i < n; ++i)
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication - kij
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \return none
|
|
*/
|
|
void matrixMult_kij(float * const C, float const * const A, float const * const B, int const n) {
|
|
for (int k = 0; k < n; ++k) {
|
|
if (!k) {
|
|
for (int i = 0; i < n; ++i)
|
|
for (int j = 0; j < n; ++j)
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
} else {
|
|
for (int i = 0; i < n; ++i)
|
|
for (int j = 0; j < n; ++j)
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication - kji
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \return none
|
|
*/
|
|
void matrixMult_kji(float * const C, float const * const A, float const * const B, int const n) {
|
|
for (int k = 0; k < n; ++k) {
|
|
if (!k) {
|
|
for (int j = 0; j < n; ++j)
|
|
for (int i = 0; i < n; ++i)
|
|
C[ sub2ind(i,j,n) ] = A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
} else {
|
|
for (int j = 0; j < n; ++j)
|
|
for (int i = 0; i < n; ++i)
|
|
C[ sub2ind(i,j,n) ] += A[ sub2ind(i,k,n) ] * B[ sub2ind(k,j,n) ];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication in blocks - ijk
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \param s The block size
|
|
* \return none
|
|
*/
|
|
void matrixMult_ijk_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
|
|
for (int I =0; I<n; I +=s)
|
|
for (int J =0; J<n; J +=s)
|
|
for (int K = 0; K<n; K +=s)
|
|
for (int i =0; i<s; ++i)
|
|
for (int j =0; j<s; ++j) {
|
|
int k =0;
|
|
if (K+k == 0)
|
|
C[ sub2ind(I+i,J+j,n) ] = 0;
|
|
for (k =0; k<s; ++k)
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication in blocks - ikj
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \param s The block size
|
|
* \return none
|
|
*/
|
|
void matrixMult_ikj_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
|
|
for (int I =0; I<n; I +=s)
|
|
for (int K =0; K<n; K +=s)
|
|
for (int J = 0; J<n; J +=s)
|
|
for (int i =0; i<s; ++i)
|
|
for (int k =0; k<s; ++k) {
|
|
if ((k+K) == 0) {
|
|
for (int j =0; j<s; ++j)
|
|
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
|
|
}
|
|
else {
|
|
for (int j =0; j<s; ++j)
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication in blocks - jik
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \param s The block size
|
|
* \return none
|
|
*/
|
|
void matrixMult_jik_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
|
|
for (int J =0; J<n; J +=s)
|
|
for (int I =0; I<n; I +=s)
|
|
for (int K = 0; K<n; K +=s)
|
|
for (int j =0; j<s; ++j)
|
|
for (int i =0; i<s; ++i) {
|
|
int k =0;
|
|
if (K+k == 0)
|
|
C[ sub2ind(I+i,J+j,n) ] = 0;
|
|
for (k =0; k<s; ++k)
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication in blocks - jki
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \param s The block size
|
|
* \return none
|
|
*/
|
|
void matrixMult_jki_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
|
|
for (int J =0; J<n; J +=s)
|
|
for (int K =0; K<n; K +=s)
|
|
for (int I = 0; I<n; I +=s)
|
|
for (int j =0; j<s; ++j)
|
|
for (int k =0; k<s; ++k) {
|
|
if ((k+K) == 0) {
|
|
for (int i =0; i<s; ++i)
|
|
C[ sub2ind(I+i,J+j,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
|
|
}
|
|
else {
|
|
for (int i =0; i<s; ++i)
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication in blocks - kij
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \param s The block size
|
|
* \return none
|
|
*
|
|
* \warning
|
|
* Calling this function will trigger undefined result. There is no initialization of C.
|
|
*/
|
|
void matrixMult_kij_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
|
|
for (int K =0; K<n; K +=s)
|
|
for (int I =0; I<n; I +=s)
|
|
for (int J = 0; J<n; J +=s)
|
|
for (int k =0; k<s; ++k)
|
|
for (int i =0; i<s; ++i)
|
|
for (int j =0; j<s; ++j)
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication in blocks - kji
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \param s The block size
|
|
* \return none
|
|
*
|
|
* \warning
|
|
* Calling this function will trigger undefined result. There is no initialization of C.
|
|
*/
|
|
void matrixMult_kji_block(float * const C, float const * const A, float const * const B, int const n, int const s) {
|
|
for (int K =0; K<n; K +=s)
|
|
for (int J =0; J<n; J +=s)
|
|
for (int I = 0; I<n; I +=s)
|
|
for (int k =0; k<s; ++k)
|
|
for (int j =0; j<s; ++j)
|
|
for (int i =0; i<s; ++i)
|
|
C[ sub2ind(I+i,J+j,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+j,n) ];
|
|
}
|
|
|
|
/*!
|
|
* Square Matrix multiplication in unrolling block of 8 - ikj
|
|
* \param C pointer to output matrix
|
|
* \param A pointer to input matrix A
|
|
* \param B pointer to input matrix B
|
|
* \param n Size of matrices (both sizes)
|
|
* \param s The block size
|
|
* \return none
|
|
*/
|
|
void matrixMult_ikj8(float * const C, float const * const A, float const * const B, int const n) {
|
|
for (int I =0; I<n; I +=8)
|
|
for (int K =0; K<n; K +=8)
|
|
for (int J = 0; J<n; J +=8)
|
|
for (int i =0; i<8; ++i)
|
|
for (int k =0; k<8; ++k) {
|
|
if ((k+K) == 0) {
|
|
C[ sub2ind(I+i,J+0,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+0,n) ];
|
|
C[ sub2ind(I+i,J+1,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+1,n) ];
|
|
C[ sub2ind(I+i,J+2,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+2,n) ];
|
|
C[ sub2ind(I+i,J+3,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+3,n) ];
|
|
C[ sub2ind(I+i,J+4,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+4,n) ];
|
|
C[ sub2ind(I+i,J+5,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+5,n) ];
|
|
C[ sub2ind(I+i,J+6,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+6,n) ];
|
|
C[ sub2ind(I+i,J+7,n) ] = A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+7,n) ];
|
|
}
|
|
else {
|
|
C[ sub2ind(I+i,J+0,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+0,n) ];
|
|
C[ sub2ind(I+i,J+1,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+1,n) ];
|
|
C[ sub2ind(I+i,J+2,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+2,n) ];
|
|
C[ sub2ind(I+i,J+3,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+3,n) ];
|
|
C[ sub2ind(I+i,J+4,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+4,n) ];
|
|
C[ sub2ind(I+i,J+5,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+5,n) ];
|
|
C[ sub2ind(I+i,J+6,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+6,n) ];
|
|
C[ sub2ind(I+i,J+7,n) ] += A[ sub2ind(I+i,K+k,n) ] * B[ sub2ind(K+k,J+7,n) ];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Initialize matrix with random indices and return the matrix pointer.
|
|
*
|
|
* \param n The size of the matrix (both of them)
|
|
* \return Pointer to allocated and initialized matrix
|
|
*/
|
|
float* matrixInit(int const n) {
|
|
|
|
float *M = (float *) malloc( n*n*sizeof(float) );
|
|
|
|
for (int i = 0; i < n; i++) /* rows */
|
|
for (int j = 0; j < n; j++) /* cols */
|
|
M[ sub2ind(i,j,n) ] = (float)rand()/(float)(RAND_MAX);
|
|
|
|
return M;
|
|
}
|
|
|
|
|
|
|