Ginkgo Generated from branch based on master. Ginkgo version 1.7.0
A numerical linear algebra library targeting many-core architectures
Loading...
Searching...
No Matches
The par-ilu-convergence program

The ParILU convergence example.

This example depends on simple-solver.

Table of contents
  1. Introduction
  2. The commented program
  1. Results
  2. The plain program

Introduction

About the example

This example can be used to inspect the convergence behavior of parallel incomplete factorizations. *

The commented program

#include <ginkgo/ginkgo.hpp>
#include <fstream>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <string>
const std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
executors{
{"reference", [] { return gko::ReferenceExecutor::create(); }},
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp", [] {
}}};
template <typename Function>
auto try_generate(Function fun) -> decltype(fun())
{
decltype(fun()) result;
try {
result = fun();
} catch (const gko::Error& err) {
std::cerr << "Error: " << err.what() << '\n';
std::exit(-1);
}
return result;
}
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
residual->write(residual_data);
mtx->write(mtx_data);
residual_data.ensure_row_major_order();
auto it = mtx_data.nonzeros.begin();
double residual_norm{};
for (auto entry : residual_data.nonzeros) {
auto ref_row = it->row;
auto ref_col = it->column;
if (entry.row == ref_row && entry.column == ref_col) {
residual_norm += gko::squared_norm(entry.value);
++it;
}
}
return std::sqrt(residual_norm);
}
int main(int argc, char* argv[])
{
using ValueType = double;
using IndexType = int;
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
Creates a new CudaExecutor.
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
Creates a new DpcppExecutor.
The Error class is used to report exceptional behaviour in library functions.
Definition exception.hpp:86
virtual const char * what() const noexcept override
Returns a human-readable string with a more detailed description of the error.
Definition exception.hpp:103
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
Creates a new HipExecutor.
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Creates a new OmpExecutor.
Definition executor.hpp:1373
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition csr.hpp:146
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.
constexpr auto squared_norm(const T &x) -> decltype(real(conj(x) *x))
Returns the squared norm of the object.
Definition math.hpp:1085
This structure is used as an intermediate data type to store a sparse matrix.
Definition matrix_data.hpp:155
std::vector< nonzero_type > nonzeros
A vector of tuples storing the non-zeros of the matrix.
Definition matrix_data.hpp:482
void ensure_row_major_order()
Sorts the nonzero vector so the values follow row-major order.
Definition matrix_data.hpp:487

print usage message

if (argc < 2 || executors.find(argv[1]) == executors.end()) {
std::cerr << "Usage: executable"
<< " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
"[<parilu|parilut|paric|parict] [<max-iterations>] "
"[<num-repetitions>] [<fill-in-limit>]\n";
return -1;
}

generate executor based on first argument

auto exec = try_generate([&] { return executors.at(argv[1])(); });

set matrix and preconditioner name with default values

std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
std::string precond = argc < 4 ? "parilu" : argv[3];
int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
double limit = argc < 7 ? 2 : std::stod(argv[6]);

load matrix file into Csr format

auto mtx = gko::share(try_generate([&] {
std::ifstream mtx_stream{matrix};
if (!mtx_stream) {
throw GKO_STREAM_ERROR("Unable to open matrix file");
}
std::cerr << "Reading " << matrix << std::endl;
exec);
}));
auto factory_generator =
[&](gko::size_type iteration) -> std::shared_ptr<gko::LinOpFactory> {
if (precond == "parilu") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "paric") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "parilut") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else if (precond == "parict") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else {
GKO_NOT_IMPLEMENTED;
}
};
auto minus_one =
for (int it = 1; it <= max_iterations; ++it) {
auto factory = factory_generator(it);
std::cout << it << ';';
std::vector<long> times;
std::vector<double> residuals;
for (int rep = 0; rep < num_repetitions; ++rep) {
auto tic = std::chrono::high_resolution_clock::now();
auto result =
exec->synchronize();
auto toc = std::chrono::high_resolution_clock::now();
auto residual = gko::clone(exec, mtx);
result->get_operators()[0]->apply(one, result->get_operators()[1],
minus_one, residual);
times.push_back(
std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
.count());
residuals.push_back(
compute_ilu_residual_norm(residual.get(), mtx.get()));
}
for (auto el : times) {
std::cout << el << ';';
}
for (auto el : residuals) {
std::cout << el << ';';
}
std::cout << '\n';
}
}
ParIC is an incomplete Cholesky factorization which is computed in parallel.
Definition par_ic.hpp:96
ParICT is an incomplete threshold-based Cholesky factorization which is computed in parallel.
Definition par_ict.hpp:96
ParILU is an incomplete LU factorization which is computed in parallel.
Definition par_ilu.hpp:97
ParILUT is an incomplete threshold-based LU factorization which is computed in parallel.
Definition par_ilut.hpp:99
@ factory
LinOpFactory events.
constexpr T one()
Returns the multiplicative identity for T.
Definition math.hpp:803
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:120
detail::cloned_type< Pointer > clone(const Pointer &p)
Creates a unique clone of the object pointed to by p.
Definition utils_helper.hpp:203
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition utils_helper.hpp:254

Results

This is the expected output:

Usage: executable <reference|omp|cuda|hip|dpcpp> [<matrix-file>] [<parilu|parilut|paric|parict] [<max-iterations>] [<num-repetitions>] [fill-in-limit]

When specifying an executor:

Reading data/A.mtx
1;71800;10300;8800;8200;8000;7700;7500;7500;7500;7400;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;
2;15500;9100;13500;9000;8600;8800;8700;8600;8600;8500;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
3;16500;10200;10100;10100;9900;10000;9800;9800;9900;9900;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
4;17500;11500;11200;15600;11300;11200;11400;11200;11200;11100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
5;18800;12800;12700;12600;12500;12400;12400;12400;12400;14100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
6;19200;13400;23100;15400;13200;13000;13000;13000;13100;13000;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
7;20500;14500;14400;14200;14200;14300;14200;14100;14300;14200;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
8;21600;15700;86200;16300;15700;15600;15500;15400;15500;15600;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
9;22700;17000;16700;16600;16700;16800;20400;17400;17500;17400;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
10;25500;19000;18800;18700;18700;18800;18600;18700;18600;18700;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;

Comments about programming and debugging

The plain program

/*******************************<GINKGO LICENSE>******************************
Copyright (c) 2017-2023, the Ginkgo authors
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/
#include <ginkgo/ginkgo.hpp>
#include <fstream>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <string>
const std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
executors{
{"reference", [] { return gko::ReferenceExecutor::create(); }},
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp", [] {
}}};
template <typename Function>
auto try_generate(Function fun) -> decltype(fun())
{
decltype(fun()) result;
try {
result = fun();
} catch (const gko::Error& err) {
std::cerr << "Error: " << err.what() << '\n';
std::exit(-1);
}
return result;
}
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
residual->write(residual_data);
mtx->write(mtx_data);
residual_data.ensure_row_major_order();
auto it = mtx_data.nonzeros.begin();
double residual_norm{};
for (auto entry : residual_data.nonzeros) {
auto ref_row = it->row;
auto ref_col = it->column;
if (entry.row == ref_row && entry.column == ref_col) {
residual_norm += gko::squared_norm(entry.value);
++it;
}
}
return std::sqrt(residual_norm);
}
int main(int argc, char* argv[])
{
using ValueType = double;
using IndexType = int;
if (argc < 2 || executors.find(argv[1]) == executors.end()) {
std::cerr << "Usage: executable"
<< " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
"[<parilu|parilut|paric|parict] [<max-iterations>] "
"[<num-repetitions>] [<fill-in-limit>]\n";
return -1;
}
auto exec = try_generate([&] { return executors.at(argv[1])(); });
std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
std::string precond = argc < 4 ? "parilu" : argv[3];
int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
double limit = argc < 7 ? 2 : std::stod(argv[6]);
auto mtx = gko::share(try_generate([&] {
std::ifstream mtx_stream{matrix};
if (!mtx_stream) {
throw GKO_STREAM_ERROR("Unable to open matrix file");
}
std::cerr << "Reading " << matrix << std::endl;
exec);
}));
auto factory_generator =
[&](gko::size_type iteration) -> std::shared_ptr<gko::LinOpFactory> {
if (precond == "parilu") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "paric") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "parilut") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else if (precond == "parict") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else {
GKO_NOT_IMPLEMENTED;
}
};
auto minus_one =
for (int it = 1; it <= max_iterations; ++it) {
auto factory = factory_generator(it);
std::cout << it << ';';
std::vector<long> times;
std::vector<double> residuals;
for (int rep = 0; rep < num_repetitions; ++rep) {
auto tic = std::chrono::high_resolution_clock::now();
auto result =
exec->synchronize();
auto toc = std::chrono::high_resolution_clock::now();
auto residual = gko::clone(exec, mtx);
result->get_operators()[0]->apply(one, result->get_operators()[1],
minus_one, residual);
times.push_back(
std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
.count());
residuals.push_back(
compute_ilu_residual_norm(residual.get(), mtx.get()));
}
for (auto el : times) {
std::cout << el << ';';
}
for (auto el : residuals) {
std::cout << el << ';';
}
std::cout << '\n';
}
}