159 const params& prm = params(),
160 const backend_params& bprm = backend_params(),
161 const InnerProduct& inner_product = InnerProduct())
164 , inner_product(inner_product)
168 , r(Backend::create_vector(n, bprm))
169 , v(Backend::create_vector(n, bprm))
170 , t(Backend::create_vector(n, bprm))
172 static const scalar_type one = math::identity<scalar_type>();
173 static const scalar_type zero = math::zero<scalar_type>();
176 x_s = Backend::create_vector(n, bprm);
177 r_s = Backend::create_vector(n, bprm);
182 for (
Int32 i = 0; i < prm.s; ++i) {
183 G.push_back(Backend::create_vector(n, bprm));
184 U.push_back(Backend::create_vector(n, bprm));
190 std::vector<rhs_type> p(n);
192 int pid = inner_product.rank();
196 std::mt19937 rng(pid * nt + tid);
197 std::uniform_real_distribution<scalar_type> rnd(-1, 1);
199 for (
unsigned j = 0; j < prm.s; ++j) {
200 for (ptrdiff_t i = 0; i < n; ++i) {
201 p[i] = math::constant<rhs_type>(rnd(rng));
203 P.push_back(Backend::copy_vector(p, bprm));
206 for (
unsigned j = 0; j < prm.s; ++j) {
207 for (
unsigned k = 0; k < j; ++k) {
208 coef_type alpha = inner_product(*P[k], *P[j]);
209 backend::axpby(-alpha, *P[k], one, *P[j]);
211 scalar_type norm_pj = norm(*P[j]);
212 backend::axpby(math::inverse(norm_pj), *P[j], zero, *P[j]);
235 static const scalar_type one = math::identity<scalar_type>();
236 static const scalar_type zero = math::zero<scalar_type>();
240 scalar_type norm_rhs = norm(rhs);
241 if (norm_rhs < Alina::detail::eps<scalar_type>(1)) {
243 norm_rhs = math::identity<scalar_type>();
251 scalar_type eps = std::max(prm.tol * norm_rhs, prm.abstol);
254 backend::residual(rhs, A, x, *r);
256 scalar_type res_norm = norm(*r);
257 if (res_norm <= eps) {
263 backend::copy(x, *x_s);
264 backend::copy(*r, *r_s);
268 coef_type om = math::identity<coef_type>();
270 for (
unsigned i = 0; i < prm.s; ++i) {
271 backend::clear(*G[i]);
272 backend::clear(*U[i]);
274 for (
unsigned j = 0; j < prm.s; ++j)
280 while (iter < prm.maxiter && res_norm > eps) {
282 for (
unsigned i = 0; i < prm.s; ++i)
283 f[i] = inner_product(*r, *P[i]);
285 for (
unsigned k = 0; k < prm.s; ++k) {
287 backend::copy(*r, *v);
291 for (
unsigned i = k; i < prm.s; ++i) {
293 for (
unsigned j = k; j < i; ++j)
294 c[i] -= M(i, j) * c[j];
295 c[i] = math::inverse(M(i, i)) * c[i];
297 backend::axpby(-c[i], *G[i], one, *v);
303 backend::axpby(om, *t, c[k], *U[k]);
304 for (
unsigned i = k + 1; i < prm.s; ++i)
305 backend::axpby(c[i], *U[i], one, *U[k]);
308 backend::spmv(one, A, *U[k], zero, *G[k]);
311 for (
unsigned i = 0; i < k; ++i) {
312 coef_type alpha = inner_product(*G[k], *P[i]) / M(i, i);
314 backend::axpby(-alpha, *G[i], one, *G[k]);
315 backend::axpby(-alpha, *U[i], one, *U[k]);
319 for (
unsigned i = k; i < prm.s; ++i)
320 M(i, k) = inner_product(*G[k], *P[i]);
322 precondition(!math::is_zero(M(k, k)),
"IDR(s) breakdown: zero M[k,k]");
325 coef_type beta = math::inverse(M(k, k)) * f[k];
326 backend::axpby(-beta, *G[k], one, *r);
327 backend::axpby(beta, *U[k], one, x);
333 backend::axpbypcz(one, *r_s, -one, *r, zero, *t);
334 coef_type gamma = inner_product(*t, *r_s) / inner_product(*t, *t);
335 backend::axpby(-gamma, *t, one, *r_s);
336 backend::axpbypcz(-gamma, *x_s, gamma, x, one, *x_s);
337 res_norm = norm(*r_s);
340 if (prm.verbose && iter % 5 == 0)
341 std::cout << iter <<
"\t" << std::scientific << res_norm / norm_rhs << std::endl;
342 if (res_norm <= eps || ++iter >= prm.maxiter)
346 for (
unsigned i = k + 1; i < prm.s; ++i)
347 f[i] -= beta * M(i, k);
350 if (res_norm <= eps || iter >= prm.maxiter)
357 backend::spmv(one, A, *v, zero, *t);
361 precondition(!math::is_zero(om),
"IDR(s) breakdown: zero omega");
363 backend::axpby(-om, *t, one, *r);
364 backend::axpby(om, *v, one, x);
366 if (prm.replacement) {
367 backend::residual(rhs, A, x, *r);
373 backend::axpbypcz(one, *r_s, -one, *r, zero, *t);
374 coef_type gamma = inner_product(*t, *r_s) / inner_product(*t, *t);
375 backend::axpby(-gamma, *t, one, *r_s);
376 backend::axpbypcz(-gamma, *x_s, gamma, x, one, *x_s);
377 res_norm = norm(*r_s);
384 backend::copy(*x_s, x);