Ginkgo: The iterative-refinement program (original) (raw)

The iterative refinement solver example..

This example depends on simple-solver.

Table of contents
Introduction The commented program Results Comments about programming and debugging The plain program

This example shows how to use the iterative refinement solver.

In this example, we first read in a matrix from file, then generate a right-hand side and an initial guess. An inaccurate CG solver is used as the inner solver to an iterative refinement (IR) method which solves a linear system. The example features the iteration count and runtime of the IR solver.

The commented program

}

const auto executor_string = argc >= 2 ? argv[1] : "reference";

std::map<std::string, std::function<std::shared_ptrgko::Executor()>>

exec_map{

{"cuda",

[] {

}},

{"hip",

[] {

}},

{"dpcpp",

[] {

}},

{"reference", [] { return gko::ReferenceExecutor::create(); }}};

executor where Ginkgo will perform the computation

const auto exec = exec_map.at(executor_string)();

Read data

auto A = share(gko::read(std::ifstream("data/A.mtx"), exec));

Create RHS and initial guess as 1

for (auto i = 0; i < size; i++) {

host_x->at(i, 0) = 1.;

}

Calculate initial residual by overwriting b

auto one = gko::initialize({1.0}, exec);

auto neg_one = gko::initialize({-1.0}, exec);

auto initres = gko::initialize<real_vec>({0.0}, exec);

A->apply(one, x, neg_one, b);

b->compute_norm2(initres);

copy b again

Create solver factory

RealValueType outer_reduction_factor{1e-12};

RealValueType inner_reduction_factor{1e-2};

auto solver_gen =

ir::build()

.with_solver(cg::build().with_criteria(

.with_reduction_factor(inner_reduction_factor)))

.with_criteria(

gko::stop::Iteration::build().with_max_iters(max_iters),

.with_reduction_factor(outer_reduction_factor))

.on(exec);

Create solver

auto solver = solver_gen->generate(A);

std::shared_ptr<const gko:🪵:Convergence> logger =

solver->add_logger(logger);

Solve system

exec->synchronize();

std::chrono::nanoseconds time(0);

auto tic = std::chrono::steady_clock::now();

auto toc = std::chrono::steady_clock::now();

time += std::chrono::duration_caststd::chrono::nanoseconds(toc - tic);

Get residual

auto res = gko::as<real_vec>(logger->get_residual_norm());

std::cout << "Initial residual norm sqrt(r^T r):\n";

write(std::cout, initres);

std::cout << "Final residual norm sqrt(r^T r):\n";

write(std::cout, res);

Print solver statistics

std::cout << "IR iteration count: " << logger->get_num_iterations()

<< std::endl;

std::cout << "IR execution time [ms]: "

<< static_cast(time.count()) / 1000000.0 << std::endl;

}

Results

This is the expected output:

Initial residual norm sqrt(r^T r):

%%MatrixMarket matrix array real general

1 1

194.679

Final residual norm sqrt(r^T r):

%%MatrixMarket matrix array real general

1 1

4.23821e-11

IR iteration count: 24

IR execution time [ms]: 0.794962

Comments about programming and debugging

The plain program

#include

#include

#include

#include

#include

#include <ginkgo/ginkgo.hpp>

int main(int argc, char* argv[])

{

using ValueType = double;

using IndexType = int;

if (argc == 2 && (std::string(argv[1]) == "--help")) {

std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;

std::exit(-1);

}

const auto executor_string = argc >= 2 ? argv[1] : "reference";

std::map<std::string, std::function<std::shared_ptrgko::Executor()>>

exec_map{

{"cuda",

[] {

}},

{"hip",

[] {

}},

{"dpcpp",

[] {

}},

{"reference", [] { return gko::ReferenceExecutor::create(); }}};

const auto exec = exec_map.at(executor_string)();

auto A = share(gko::read(std::ifstream("data/A.mtx"), exec));

for (auto i = 0; i < size; i++) {

host_x->at(i, 0) = 1.;

}

auto one = gko::initialize({1.0}, exec);

auto neg_one = gko::initialize({-1.0}, exec);

auto initres = gko::initialize<real_vec>({0.0}, exec);

A->apply(one, x, neg_one, b);

b->compute_norm2(initres);

b->copy_from(host_x);

RealValueType outer_reduction_factor{1e-12};

RealValueType inner_reduction_factor{1e-2};

auto solver_gen =

ir::build()

.with_solver(cg::build().with_criteria(

.with_reduction_factor(inner_reduction_factor)))

.with_criteria(

gko::stop::Iteration::build().with_max_iters(max_iters),

.with_reduction_factor(outer_reduction_factor))

.on(exec);

auto solver = solver_gen->generate(A);

std::shared_ptr<const gko:🪵:Convergence> logger =

solver->add_logger(logger);

exec->synchronize();

std::chrono::nanoseconds time(0);

auto tic = std::chrono::steady_clock::now();

auto toc = std::chrono::steady_clock::now();

time += std::chrono::duration_caststd::chrono::nanoseconds(toc - tic);

auto res = gko::as<real_vec>(logger->get_residual_norm());

std::cout << "Initial residual norm sqrt(r^T r):\n";

write(std::cout, initres);

std::cout << "Final residual norm sqrt(r^T r):\n";

write(std::cout, res);

std::cout << "IR iteration count: " << logger->get_num_iterations()

<< std::endl;

std::cout << "IR execution time [ms]: "

<< static_cast(time.count()) / 1000000.0 << std::endl;

}

static std::unique_ptr< Convergence > create(std::shared_ptr< const Executor >, const mask_type &enabled_events=Logger::criterion_events_mask|Logger::iteration_complete_mask)

Creates a convergence logger.

Definition: convergence.hpp:73

static std::unique_ptr< Dense > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type stride=0)

Creates an uninitialized Dense matrix of the specified size.

detail::cloned_type< Pointer > clone(const Pointer &p)

Creates a unique clone of the object pointed to by p.

Definition: utils_helper.hpp:173

static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)

Creates a new HipExecutor.

void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType >>>::default_layout)

Writes a matrix into an output stream in matrix market format.

Definition: mtx_io.hpp:295

detail::shared_type< OwningPointer > share(OwningPointer &&p)

Marks the object pointed to by p as shared.

Definition: utils_helper.hpp:224

static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)

Creates a new CudaExecutor.

static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())

Creates a new OmpExecutor.

Definition: executor.hpp:1396

static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)

Creates a new DpcppExecutor.