Actual source code: bcgsl.c
1: #define PETSCKSP_DLL
2: /*
3: * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
4: * "Enhanced implementation of BiCGStab(L) for solving linear systems
5: * of equations". This uses tricky delayed updating ideas to prevent
6: * round-off buildup.
7: */
8: #include petscblaslapack.h
9: #include private/kspimpl.h
10: #include bcgsl.h
15: static PetscErrorCode KSPSolve_BCGSL(KSP ksp)
16: {
17: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *) ksp->data;
18: PetscScalar alpha, beta, omega, sigma;
19: PetscScalar rho0, rho1;
20: PetscReal kappa0, kappaA, kappa1;
21: PetscReal ghat, epsilon, abstol;
22: PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
23: PetscTruth bUpdateX;
24: PetscTruth bBombed = PETSC_FALSE;
26: PetscInt maxit;
27: PetscInt h, i, j, k, vi, ell;
28: PetscBLASInt ldMZ,bierr;
33: if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL");
34: if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL");
35: if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL");
37: /* set up temporary vectors */
38: vi = 0;
39: ell = bcgsl->ell;
40: bcgsl->vB = ksp->work[vi]; vi++;
41: bcgsl->vRt = ksp->work[vi]; vi++;
42: bcgsl->vTm = ksp->work[vi]; vi++;
43: bcgsl->vvR = ksp->work+vi; vi += ell+1;
44: bcgsl->vvU = ksp->work+vi; vi += ell+1;
45: bcgsl->vXr = ksp->work[vi]; vi++;
46: ldMZ = PetscBLASIntCast(ell+1);
47: {
48: PetscMalloc(ldMZ*sizeof(PetscScalar), &AY0c);
49: PetscMalloc(ldMZ*sizeof(PetscScalar), &AYlc);
50: PetscMalloc(ldMZ*sizeof(PetscScalar), &AYtc);
51: PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZa);
52: PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZb);
53: }
55: /* Prime the iterative solver */
56: KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
57: VecNorm(VVR[0], NORM_2, &zeta0);
58: rnmax_computed = zeta0;
59: rnmax_true = zeta0;
61: (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
62: if (ksp->reason) {
63: PetscObjectTakeAccess(ksp);
64: ksp->its = 0;
65: ksp->rnorm = zeta0;
66: PetscObjectGrantAccess(ksp);
67: PetscFree(AY0c);
68: PetscFree(AYlc);
69: PetscFree(AYtc);
70: PetscFree(MZa);
71: PetscFree(MZb);
73: return(0);
74: }
76: VecSet(VVU[0],0.0);
77: alpha = 0;
78: rho0 = omega = 1;
80: if (bcgsl->delta>0.0) {
81: VecCopy(VX, VXR);
82: VecSet(VX,0.0);
83: VecCopy(VVR[0], VB);
84: } else {
85: VecCopy(ksp->vec_rhs, VB);
86: }
88: /* Life goes on */
89: VecCopy(VVR[0], VRT);
90: zeta = zeta0;
92: KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);
94: for (k=0; k<maxit; k += bcgsl->ell) {
95: ksp->its = k;
96: ksp->rnorm = zeta;
98: KSPLogResidualHistory(ksp, zeta);
99: KSPMonitor(ksp, ksp->its, zeta);
101: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
102: if (ksp->reason) break;
104: /* BiCG part */
105: rho0 = -omega*rho0;
106: nrm0 = zeta;
107: for (j=0; j<bcgsl->ell; j++) {
108: /* rho1 <- r_j' * r_tilde */
109: VecDot(VVR[j], VRT, &rho1);
110: if (rho1 == 0.0) {
111: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
112: bBombed = PETSC_TRUE;
113: break;
114: }
115: beta = alpha*(rho1/rho0);
116: rho0 = rho1;
117: for (i=0; i<=j; i++) {
118: /* u_i <- r_i - beta*u_i */
119: VecAYPX(VVU[i], -beta, VVR[i]);
120: }
121: /* u_{j+1} <- inv(K)*A*u_j */
122: KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);
124: VecDot(VVU[j+1], VRT, &sigma);
125: if (sigma == 0.0) {
126: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
127: bBombed = PETSC_TRUE;
128: break;
129: }
130: alpha = rho1/sigma;
132: /* x <- x + alpha*u_0 */
133: VecAXPY(VX, alpha, VVU[0]);
135: for (i=0; i<=j; i++) {
136: /* r_i <- r_i - alpha*u_{i+1} */
137: VecAXPY(VVR[i], -alpha, VVU[i+1]);
138: }
140: /* r_{j+1} <- inv(K)*A*r_j */
141: KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);
143: VecNorm(VVR[0], NORM_2, &nrm0);
144: if (bcgsl->delta>0.0) {
145: if (rnmax_computed<nrm0) rnmax_computed = nrm0;
146: if (rnmax_true<nrm0) rnmax_true = nrm0;
147: }
149: /* NEW: check for early exit */
150: (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
151: if (ksp->reason) {
152: PetscObjectTakeAccess(ksp);
153: ksp->its = k+j;
154: ksp->rnorm = nrm0;
155: PetscObjectGrantAccess(ksp);
156: break;
157: }
158: }
160: if (bBombed==PETSC_TRUE) break;
162: /* Polynomial part */
163: for(i = 0; i <= bcgsl->ell; ++i) {
164: VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);
165: }
166: /* Symmetrize MZa */
167: for(i = 0; i <= bcgsl->ell; ++i) {
168: for(j = i+1; j <= bcgsl->ell; ++j) {
169: MZa[i*ldMZ+j] = MZa[j*ldMZ+i];
170: }
171: }
172: /* Copy MZa to MZb */
173: for(i = 0; i <= bcgsl->ell; ++i) {
174: for(j = 0; j <= bcgsl->ell; ++j) {
175: MZb[i+ldMZ*j] = MZa[i+ldMZ*j];
176: }
177: }
179: if (!bcgsl->bConvex || bcgsl->ell==1) {
180: PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell);
182: AY0c[0] = -1;
183: LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
184: if (ierr!=0) {
185: ksp->reason = KSP_DIVERGED_BREAKDOWN;
186: bBombed = PETSC_TRUE;
187: break;
188: }
189: BLAScopy_(&bell, &MZb[1], &ione, &AY0c[1], &ione);
190: LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
191: } else {
192: PetscBLASInt ione = 1;
193: PetscScalar aone = 1.0, azero = 0.0;
194: PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1);
196: LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
197: if (ierr!=0) {
198: ksp->reason = KSP_DIVERGED_BREAKDOWN;
199: bBombed = PETSC_TRUE;
200: break;
201: }
202: BLAScopy_(&neqs, &MZb[1], &ione, &AY0c[1], &ione);
203: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
204: AY0c[0] = -1;
205: AY0c[bcgsl->ell] = 0;
207: BLAScopy_(&neqs, &MZb[1+ldMZ*(bcgsl->ell)], &ione, &AYlc[1], &ione);
208: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);
210: AYlc[0] = 0;
211: AYlc[bcgsl->ell] = -1;
213: BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);
215: kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);
217: /* round-off can cause negative kappa's */
218: if (kappa0<0) kappa0 = -kappa0;
219: kappa0 = sqrt(kappa0);
221: kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
223: BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);
225: kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
227: if (kappa1<0) kappa1 = -kappa1;
228: kappa1 = sqrt(kappa1);
230: if (kappa0!=0.0 && kappa1!=0.0) {
231: if (kappaA<0.7*kappa0*kappa1) {
232: ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
233: } else {
234: ghat = kappaA/(kappa1*kappa1);
235: }
236: for (i=0; i<=bcgsl->ell; i++) {
237: AY0c[i] = AY0c[i] - ghat* AYlc[i];
238: }
239: }
240: }
242: omega = AY0c[bcgsl->ell];
243: for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
244: omega = AY0c[h];
245: }
246: if (omega==0.0) {
247: ksp->reason = KSP_DIVERGED_BREAKDOWN;
248: break;
249: }
251: for (i=1; i<=bcgsl->ell; i++) {
252: VecAXPY(VVU[0], -AY0c[i], VVU[i]);
253: VecAXPY(VX, AY0c[i], VVR[i-1]);
254: VecAXPY(VVR[0], -AY0c[i], VVR[i]);
255: }
257: VecNorm(VVR[0], NORM_2, &zeta);
259: /* Accurate Update */
260: if (bcgsl->delta>0.0) {
261: if (rnmax_computed<zeta) rnmax_computed = zeta;
262: if (rnmax_true<zeta) rnmax_true = zeta;
264: bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
265: if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
266: /* r0 <- b-inv(K)*A*X */
267: KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
268: VecAYPX(VVR[0], -1.0, VB);
269: rnmax_true = zeta;
271: if (bUpdateX) {
272: VecAXPY(VXR,1.0,VX);
273: VecSet(VX,0.0);
274: VecCopy(VVR[0], VB);
275: rnmax_computed = zeta;
276: }
277: }
278: }
279: }
280: if (bcgsl->delta>0.0) {
281: VecAXPY(VX,1.0,VXR);
282: }
284: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
285: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
287: PetscFree(AY0c);
288: PetscFree(AYlc);
289: PetscFree(AYtc);
290: PetscFree(MZa);
291: PetscFree(MZb);
292: return(0);
293: }
297: /*@
298: KSPBCGSLSetXRes - Sets the parameter governing when
299: exact residuals will be used instead of computed residuals.
301: Collective on KSP
303: Input Parameters:
304: + ksp - iterative context obtained from KSPCreate
305: - delta - computed residuals are used alone when delta is not positive
307: Options Database Keys:
309: . -ksp_bcgsl_xres delta
311: Level: intermediate
313: .keywords: KSP, BiCGStab(L), set, exact residuals
315: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
316: @*/
317: PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
318: {
319: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
323: if (ksp->setupcalled) {
324: if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
325: KSPDefaultFreeWork(ksp);
326: ksp->setupcalled = 0;
327: }
328: }
329: bcgsl->delta = delta;
330: return(0);
331: }
335: /*@
336: KSPBCGSLSetPol - Sets the type of polynomial part will
337: be used in the BiCGSTab(L) solver.
339: Collective on KSP
341: Input Parameters:
342: + ksp - iterative context obtained from KSPCreate
343: - uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.
345: Options Database Keys:
347: + -ksp_bcgsl_cxpoly - use enhanced polynomial
348: . -ksp_bcgsl_mrpoly - use standard polynomial
350: Level: intermediate
352: .keywords: KSP, BiCGStab(L), set, polynomial
354: .seealso: @()
355: @*/
356: PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscTruth uMROR)
357: {
358: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
362: if (!ksp->setupcalled) {
363: bcgsl->bConvex = uMROR;
364: } else if (bcgsl->bConvex != uMROR) {
365: /* free the data structures,
366: then create them again
367: */
368: KSPDefaultFreeWork(ksp);
369: bcgsl->bConvex = uMROR;
370: ksp->setupcalled = 0;
371: }
372: return(0);
373: }
377: /*@
378: KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).
380: Collective on KSP
382: Input Parameters:
383: + ksp - iterative context obtained from KSPCreate
384: - ell - number of search directions
386: Options Database Keys:
388: . -ksp_bcgsl_ell ell
390: Level: intermediate
392: .keywords: KSP, BiCGStab(L), set, exact residuals,
394: .seealso: @()
395: @*/
396: PetscErrorCode KSPBCGSLSetEll(KSP ksp, int ell)
397: {
398: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
402: if (ell < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");
404: if (!ksp->setupcalled) {
405: bcgsl->ell = ell;
406: } else if (bcgsl->ell != ell) {
407: /* free the data structures, then create them again */
408: KSPDefaultFreeWork(ksp);
410: bcgsl->ell = ell;
411: ksp->setupcalled = 0;
412: }
413: return(0);
414: }
418: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
419: {
420: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
421: PetscErrorCode ierr;
422: PetscTruth isascii, isstring;
425: PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_ASCII, &isascii);
426: PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_STRING, &isstring);
428: if (isascii) {
429: PetscViewerASCIIPrintf(viewer, " BCGSL: Ell = %D\n", bcgsl->ell);
430: PetscViewerASCIIPrintf(viewer, " BCGSL: Delta = %lg\n", bcgsl->delta);
431: } else {
432: SETERRQ1(PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
433: }
434: return(0);
435: }
439: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
440: {
441: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
443: PetscInt this_ell;
444: PetscReal delta;
445: PetscTruth flga, flg;
448: /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
449: don't need to be called here.
450: */
451: PetscOptionsHead("KSP BiCGStab(L) Options");
453: /* Set number of search directions */
454: PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);
455: if (flg) {
456: KSPBCGSLSetEll(ksp, this_ell);
457: }
459: /* Set polynomial type */
460: PetscOptionsName("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", &flga);
461: if (flga) {
462: KSPBCGSLSetPol(ksp, PETSC_TRUE);
463: } else {
464: PetscOptionsName("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", &flg);
465: KSPBCGSLSetPol(ksp, PETSC_FALSE);
466: }
468: /* Will computed residual be refreshed? */
469: PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
470: if (flg) {
471: KSPBCGSLSetXRes(ksp, delta);
472: }
473: PetscOptionsTail();
474: return(0);
475: }
479: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
480: {
481: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
482: PetscInt ell = bcgsl->ell;
486: if (ksp->pc_side == PC_SYMMETRIC) {
487: SETERRQ(PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
488: } else if (ksp->pc_side == PC_RIGHT) {
489: SETERRQ(PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
490: }
491: KSPDefaultGetWork(ksp, 6+2*ell);
492: return(0);
493: }
495: /*MC
496: KSPBCGSL - Implements a slight variant of the Enhanced
497: BiCGStab(L) algorithm in (3) and (2). The variation
498: concerns cases when either kappa0**2 or kappa1**2 is
499: negative due to round-off. Kappa0 has also been pulled
500: out of the denominator in the formula for ghat.
502: References:
503: 1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
504: approaches for the stable computation of hybrid BiCG
505: methods", Applied Numerical Mathematics: Transactions
506: f IMACS, 19(3), pp 235-54, 1996.
507: 2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
508: "BiCGStab(L) and other hybrid Bi-CG methods",
509: Numerical Algorithms, 7, pp 75-109, 1994.
510: 3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
511: for solving linear systems of equations", preprint
512: from www.citeseer.com.
514: Contributed by: Joel M. Malard, email jm.malard@pnl.gov
516: Options Database Keys:
517: + -ksp_bcgsl_ell <ell> Number of Krylov search directions
518: - -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
519: - -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals
521: Level: beginner
523: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS
525: M*/
529: PetscErrorCode KSPCreate_BCGSL(KSP ksp)
530: {
532: KSP_BiCGStabL *bcgsl;
535: /* allocate BiCGStab(L) context */
536: PetscNewLog(ksp, KSP_BiCGStabL, &bcgsl);
537: ksp->data = (void*)bcgsl;
539: ksp->pc_side = PC_LEFT;
540: ksp->ops->setup = KSPSetUp_BCGSL;
541: ksp->ops->solve = KSPSolve_BCGSL;
542: ksp->ops->destroy = KSPDefaultDestroy;
543: ksp->ops->buildsolution = KSPDefaultBuildSolution;
544: ksp->ops->buildresidual = KSPDefaultBuildResidual;
545: ksp->ops->setfromoptions = KSPSetFromOptions_BCGSL;
546: ksp->ops->view = KSPView_BCGSL;
548: /* Let the user redefine the number of directions vectors */
549: bcgsl->ell = 2;
551: /*Choose between a single MR step or an averaged MR/OR */
552: bcgsl->bConvex = PETSC_FALSE;
554: /* Set the threshold for when exact residuals will be used */
555: bcgsl->delta = 0.0;
556: return(0);
557: }