Ginkgo: The adaptiveprecision-blockjacobi program (original) (raw)

The preconditioned solver example..

This example depends on preconditioned-solver.

Table of contents
Introduction The commented program Results Comments about programming and debugging The plain program

This example shows how to use the adaptive precision block-Jacobi preconditioner.

In this example, we first read in a matrix from file, then generate a right-hand side and an initial guess. The preconditioned CG solver is enhanced with a block-Jacobi preconditioner that optimizes the storage format for the distinct inverted diagonal blocks to the numerical requirements. The example features the iteration count and runtime of the CG solver.

The commented program

const auto executor_string = argc >= 2 ? argv[1] : "reference";

Figure out where to run the code

std::map<std::string, std::function<std::shared_ptrgko::Executor()>>

exec_map{

{"cuda",

[] {

}},

{"hip",

[] {

}},

{"dpcpp",

[] {

}},

{"reference", [] { return gko::ReferenceExecutor::create(); }}};

executor where Ginkgo will perform the computation

const auto exec = exec_map.at(executor_string)();

Read data

auto A = share(gko::read(std::ifstream("data/A.mtx"), exec));

Create RHS and initial guess as 1

auto host_x = vec::create(exec->get_master(), gko::dim<2>(size, 1));

for (auto i = 0; i < size; i++) {

host_x->at(i, 0) = 1.;

}

Calculate initial residual by overwriting b

auto one = gko::initialize({1.0}, exec);

auto neg_one = gko::initialize({-1.0}, exec);

auto initres = gko::initialize<real_vec>({0.0}, exec);

A->apply(one, x, neg_one, b);

b->compute_norm2(initres);

copy b again

Create solver factory

const RealValueType reduction_factor = 1e-7;

auto solver_gen =

cg::build()

.with_criteria(gko::stop::Iteration::build().with_max_iters(10000u),

.with_reduction_factor(reduction_factor))

Add preconditioner, these 2 lines are the only difference from the simple solver example

.with_preconditioner(

bj::build().with_max_block_size(16u).with_storage_optimization(

.on(exec);

Create solver

std::shared_ptr<const gko:🪵:Convergence> logger =

solver_gen->add_logger(logger);

auto solver = solver_gen->generate(A);

Solve system

exec->synchronize();

std::chrono::nanoseconds time(0);

auto tic = std::chrono::steady_clock::now();

auto toc = std::chrono::steady_clock::now();

time += std::chrono::duration_caststd::chrono::nanoseconds(toc - tic);

Get residual

auto res = gko::as<real_vec>(logger->get_residual_norm());

auto impl_res = gko::as<real_vec>(logger->get_implicit_sq_resnorm());

std::cout << "Initial residual norm sqrt(r^T r):\n";

write(std::cout, initres);

std::cout << "Final residual norm sqrt(r^T r):\n";

write(std::cout, res);

std::cout << "Implicit residual norm squared (r^2):\n";

write(std::cout, impl_res);

Print solver statistics

std::cout << "CG iteration count: " << logger->get_num_iterations()

<< std::endl;

std::cout << "CG execution time [ms]: "

<< static_cast(time.count()) / 1000000.0 << std::endl;

}

Results

This is the expected output:

Initial residual norm sqrt(r^T r):

%%MatrixMarket matrix array real general

1 1

194.679

Final residual norm sqrt(r^T r):

%%MatrixMarket matrix array real general

1 1

5.69384e-06

Implicit residual norm squared (r^2):

%%MatrixMarket matrix array real general

1 1

1.27043e-15

CG iteration count: 5

CG execution time [ms]: 0.080041

Comments about programming and debugging

The plain program

#include

#include

#include

#include

#include

#include <ginkgo/ginkgo.hpp>

int main(int argc, char* argv[])

{

using ValueType = double;

using IndexType = int;

if (argc == 2 && (std::string(argv[1]) == "--help")) {

std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;

std::exit(-1);

}

const auto executor_string = argc >= 2 ? argv[1] : "reference";

std::map<std::string, std::function<std::shared_ptrgko::Executor()>>

exec_map{

{"cuda",

[] {

}},

{"hip",

[] {

}},

{"dpcpp",

[] {

}},

{"reference", [] { return gko::ReferenceExecutor::create(); }}};

const auto exec = exec_map.at(executor_string)();

auto A = share(gko::read(std::ifstream("data/A.mtx"), exec));

auto host_x = vec::create(exec->get_master(), gko::dim<2>(size, 1));

for (auto i = 0; i < size; i++) {

host_x->at(i, 0) = 1.;

}

auto one = gko::initialize({1.0}, exec);

auto neg_one = gko::initialize({-1.0}, exec);

auto initres = gko::initialize<real_vec>({0.0}, exec);

A->apply(one, x, neg_one, b);

b->compute_norm2(initres);

b->copy_from(host_x);

const RealValueType reduction_factor = 1e-7;

auto solver_gen =

cg::build()

.with_criteria(gko::stop::Iteration::build().with_max_iters(10000u),

.with_reduction_factor(reduction_factor))

.with_preconditioner(

bj::build().with_max_block_size(16u).with_storage_optimization(

.on(exec);

std::shared_ptr<const gko:🪵:Convergence> logger =

solver_gen->add_logger(logger);

auto solver = solver_gen->generate(A);

exec->synchronize();

std::chrono::nanoseconds time(0);

auto tic = std::chrono::steady_clock::now();

auto toc = std::chrono::steady_clock::now();

time += std::chrono::duration_caststd::chrono::nanoseconds(toc - tic);

auto res = gko::as<real_vec>(logger->get_residual_norm());

auto impl_res = gko::as<real_vec>(logger->get_implicit_sq_resnorm());

std::cout << "Initial residual norm sqrt(r^T r):\n";

write(std::cout, initres);

std::cout << "Final residual norm sqrt(r^T r):\n";

write(std::cout, res);

std::cout << "Implicit residual norm squared (r^2):\n";

write(std::cout, impl_res);

std::cout << "CG iteration count: " << logger->get_num_iterations()

<< std::endl;

std::cout << "CG execution time [ms]: "

<< static_cast(time.count()) / 1000000.0 << std::endl;

}

static std::unique_ptr< Convergence > create(std::shared_ptr< const Executor >, const mask_type &enabled_events=Logger::criterion_events_mask|Logger::iteration_complete_mask)

Creates a convergence logger.

Definition: convergence.hpp:73

constexpr static precision_reduction autodetect() noexcept

Returns a special encoding which instructs the algorithm to automatically detect the best precision.

Definition: types.hpp:314

static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)

Creates a new HipExecutor.

void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType >>>::default_layout)

Writes a matrix into an output stream in matrix market format.

Definition: mtx_io.hpp:295

static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)

Creates a new CudaExecutor.

static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())

Creates a new OmpExecutor.

Definition: executor.hpp:1396

static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)

Creates a new DpcppExecutor.