ArrayFire: benchmarks/cg.cpp (original) (raw)

#include

using namespace af;

static size_t dimension = 4 * 1024;

static const int maxIter = 10;

static const int sparsityFactor = 7;

static array spA;

void setupInputs() {

array T = randu(dimension, dimension, f32);

A = floor(T * 1000);

A = A * ((A % sparsityFactor) == 0) / 1000;

A = transpose(A) + A + A.dims(0) * identity(A.dims(0), A.dims(0), f32);

spA = sparse(A);

b = matmul(A, x0);

std::cout << "Sparsity of A = "

<< 100.f * (float)sparseGetNNZ(spA) / (float)spA.elements() << "%"

<< std::endl;

std::cout << "Memory Usage of A = " << A.bytes() / (1024.f * 1024.f)

<< " MB" << std::endl;

std::cout << "Memory Usage of spA = "

<< (sparseGetValues(spA).bytes() + sparseGetRowIdx(spA).bytes() +

sparseGetColIdx(spA).bytes()) /

(1024.f * 1024.f)

<< " MB" << std::endl;

}

void sparseConjugateGradient(void) {

array r = b - matmul(spA, x);

for (int i = 0; i < maxIter; ++i) {

array Ap = matmul(spA, p);

array alpha_num = dot(r, r);

array alpha_den = dot(p, Ap);

array alpha = alpha_num / alpha_den;

r -= tile(alpha, Ap.dims()) * Ap;

x += tile(alpha, Ap.dims()) * p;

array beta_num = dot(r, r);

array beta = beta_num / alpha_num;

p = r + tile(beta, p.dims()) * p;

}

}

void denseConjugateGradient(void) {

for (int i = 0; i < maxIter; ++i) {

array alpha = alpha_num / alpha_den;

r -= tile(alpha, Ap.dims()) * Ap;

array beta = beta_num / alpha_num;

p = r + tile(beta, p.dims()) * p;

}

}

void checkConjugateGradient(const af::array in) {

for (int i = 0; i < maxIter; ++i) {

array alpha = alpha_num / alpha_den;

r -= tile(alpha, Ap.dims()) * Ap;

array beta = beta_num / alpha_num;

p = r + tile(beta, p.dims()) * p;

}

std::cout << "Final difference in solutions:\n";

}

int main(int, char **) {

setupInputs();

std::cout << "Verifying Dense Conjugate Gradient:" << std::endl;

checkConjugateGradient(A);

std::cout << "Verifying Sparse Conjugate Gradient:" << std::endl;

checkConjugateGradient(spA);

std::cout << "Dense Conjugate Gradient Time: "

<< timeit(denseConjugateGradient) * 1000 << "ms" << std::endl;

std::cout << "Sparse Conjugate Gradient Time: "

<< timeit(sparseConjugateGradient) * 1000 << "ms" << std::endl;

return 0;

}

A multi dimensional data container.

dim4 dims() const

Get dimensions of the array.

size_t bytes() const

Get the size of the array in bytes.

dim_t elements() const

Get the total number of elements across all dimensions of the array.

@ f32

32-bit floating point values

T dot(const array &lhs, const array &rhs, const matProp optLhs=AF_MAT_NONE, const matProp optRhs=AF_MAT_NONE)

C++ Interface to compute the dot product.

AFAPI array matmul(const array &lhs, const array &rhs, const matProp optLhs=AF_MAT_NONE, const matProp optRhs=AF_MAT_NONE)

C++ Interface to multiply two matrices.

array constant(T val, const dim4 &dims, const dtype ty=(af_dtype) dtype_traits< T >::ctype)

C++ Interface to generate an array with elements set to a specified value.

AFAPI void sync(const int device=-1)

Blocks until the device is finished processing.

AFAPI array tile(const array &in, const unsigned x, const unsigned y=1, const unsigned z=1, const unsigned w=1)

C++ Interface to generate a tiled array.

AFAPI double timeit(void(*fn)())