Ginkgo: The iterative-refinement program

The iterative refinement solver example..

This example depends on simple-solver.

Table of contents
Introduction The commented program	Results Comments about programming and debugging The plain program

The commented program

}

const auto executor_string = argc >= 2 ? argv[1] : "reference";

std::map<std::string, std::function<std::shared_ptrgko::Executor()>>

exec_map{

{"cuda",

[] {

}},

{"hip",

[] {

}},

{"dpcpp",

[] {

}},

{"reference", [] { return gko::ReferenceExecutor::create(); }}};

executor where Ginkgo will perform the computation

const auto exec = exec_map.at(executor_string)();

Read data

auto A = share(gko::read(std::ifstream("data/A.mtx"), exec));

Create RHS and initial guess as 1

for (auto i = 0; i < size; i++) {

host_x->at(i, 0) = 1.;

}

Calculate initial residual by overwriting b

auto one = gko::initialize({1.0}, exec);

auto neg_one = gko::initialize({-1.0}, exec);

auto initres = gko::initialize<real_vec>({0.0}, exec);

A->apply(one, x, neg_one, b);

b->compute_norm2(initres);

copy b again

Create solver factory

RealValueType outer_reduction_factor{1e-12};

RealValueType inner_reduction_factor{1e-2};

auto solver_gen =

ir::build()

.with_solver(cg::build().with_criteria(

.with_reduction_factor(inner_reduction_factor)))

.with_criteria(

gko::stop::Iteration::build().with_max_iters(max_iters),

.with_reduction_factor(outer_reduction_factor))

.on(exec);

Create solver

auto solver = solver_gen->generate(A);

std::shared_ptr<const gko:🪵:Convergence> logger =

solver->add_logger(logger);

Solve system

exec->synchronize();

std::chrono::nanoseconds time(0);

auto tic = std::chrono::steady_clock::now();

auto toc = std::chrono::steady_clock::now();

time += std::chrono::duration_caststd::chrono::nanoseconds(toc - tic);

Get residual

auto res = gko::as<real_vec>(logger->get_residual_norm());

std::cout << "Initial residual norm sqrt(r^T r):\n";

write(std::cout, initres);

std::cout << "Final residual norm sqrt(r^T r):\n";

write(std::cout, res);

Print solver statistics

std::cout << "IR iteration count: " << logger->get_num_iterations()

<< std::endl;

std::cout << "IR execution time [ms]: "

<< static_cast(time.count()) / 1000000.0 << std::endl;

}

Results

This is the expected output:

Initial residual norm sqrt(r^T r):

%%MatrixMarket matrix array real general

1 1

194.679

Final residual norm sqrt(r^T r):

%%MatrixMarket matrix array real general

1 1

4.23821e-11

IR iteration count: 24

IR execution time [ms]: 0.794962

Comments about programming and debugging

The plain program

#include

#include <ginkgo/ginkgo.hpp>

int main(int argc, char* argv[])

{

using ValueType = double;

using IndexType = int;

if (argc == 2 && (std::string(argv[1]) == "--help")) {

std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;

std::exit(-1);

}

const auto executor_string = argc >= 2 ? argv[1] : "reference";

std::map<std::string, std::function<std::shared_ptrgko::Executor()>>

exec_map{

{"cuda",

[] {

}},

{"hip",

[] {

}},

{"dpcpp",

[] {

}},

{"reference", [] { return gko::ReferenceExecutor::create(); }}};

const auto exec = exec_map.at(executor_string)();

auto A = share(gko::read(std::ifstream("data/A.mtx"), exec));

for (auto i = 0; i < size; i++) {

host_x->at(i, 0) = 1.;

}

auto one = gko::initialize({1.0}, exec);

auto neg_one = gko::initialize({-1.0}, exec);

auto initres = gko::initialize<real_vec>({0.0}, exec);

A->apply(one, x, neg_one, b);

b->compute_norm2(initres);

b->copy_from(host_x);

RealValueType outer_reduction_factor{1e-12};

RealValueType inner_reduction_factor{1e-2};

auto solver_gen =

ir::build()

.with_solver(cg::build().with_criteria(

.with_reduction_factor(inner_reduction_factor)))

.with_criteria(

gko::stop::Iteration::build().with_max_iters(max_iters),

.with_reduction_factor(outer_reduction_factor))

.on(exec);

auto solver = solver_gen->generate(A);

std::shared_ptr<const gko:🪵:Convergence> logger =

solver->add_logger(logger);

exec->synchronize();

std::chrono::nanoseconds time(0);

auto tic = std::chrono::steady_clock::now();

auto toc = std::chrono::steady_clock::now();

time += std::chrono::duration_caststd::chrono::nanoseconds(toc - tic);

auto res = gko::as<real_vec>(logger->get_residual_norm());

std::cout << "Initial residual norm sqrt(r^T r):\n";

write(std::cout, initres);

std::cout << "Final residual norm sqrt(r^T r):\n";

write(std::cout, res);

std::cout << "IR iteration count: " << logger->get_num_iterations()

<< std::endl;

std::cout << "IR execution time [ms]: "

<< static_cast(time.count()) / 1000000.0 << std::endl;

}

static std::unique_ptr< Convergence > create(std::shared_ptr< const Executor >, const mask_type &enabled_events=Logger::criterion_events_mask|Logger::iteration_complete_mask)

Creates a convergence logger.

Definition: convergence.hpp:73

static std::unique_ptr< Dense > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type stride=0)

Creates an uninitialized Dense matrix of the specified size.

detail::cloned_type< Pointer > clone(const Pointer &p)

Creates a unique clone of the object pointed to by p.

Definition: utils_helper.hpp:173

static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)

Creates a new HipExecutor.

void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType >>>::default_layout)

Writes a matrix into an output stream in matrix market format.

Definition: mtx_io.hpp:295

detail::shared_type< OwningPointer > share(OwningPointer &&p)

Marks the object pointed to by p as shared.

Definition: utils_helper.hpp:224

static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)

Creates a new CudaExecutor.

static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())

Creates a new OmpExecutor.

Definition: executor.hpp:1396

static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)

Creates a new DpcppExecutor.

Ginkgo: The iterative-refinement program (original) (raw)

This example shows how to use the iterative refinement solver.

In this example, we first read in a matrix from file, then generate a right-hand side and an initial guess. An inaccurate CG solver is used as the inner solver to an iterative refinement (IR) method which solves a linear system. The example features the iteration count and runtime of the IR solver.

The commented program

Results

Comments about programming and debugging

The plain program