Actual source code: bcgsl.c

  1: #define PETSCKSP_DLL
  2: /*
  3:  * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
  4:  * "Enhanced implementation of BiCGStab(L) for solving linear systems
  5:  * of equations". This uses tricky delayed updating ideas to prevent
  6:  * round-off buildup.
  7:  */
 8:  #include petscblaslapack.h
 9:  #include private/kspimpl.h
 10:  #include bcgsl.h


 15: static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
 16: {
 17:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *) ksp->data;
 18:   PetscScalar    alpha, beta, omega, sigma;
 19:   PetscScalar    rho0, rho1;
 20:   PetscReal      kappa0, kappaA, kappa1;
 21:   PetscReal      ghat, epsilon, abstol;
 22:   PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
 23:   PetscTruth     bUpdateX;
 24:   PetscTruth     bBombed = PETSC_FALSE;

 26:   PetscInt       maxit;
 27:   PetscInt       h, i, j, k, vi, ell;
 28:   PetscBLASInt   ldMZ,bierr;


 33:   if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL");
 34:   if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL");
 35:   if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL");

 37:   /* set up temporary vectors */
 38:   vi = 0;
 39:   ell = bcgsl->ell;
 40:   bcgsl->vB    = ksp->work[vi]; vi++;
 41:   bcgsl->vRt   = ksp->work[vi]; vi++;
 42:   bcgsl->vTm   = ksp->work[vi]; vi++;
 43:   bcgsl->vvR   = ksp->work+vi; vi += ell+1;
 44:   bcgsl->vvU   = ksp->work+vi; vi += ell+1;
 45:   bcgsl->vXr   = ksp->work[vi]; vi++;
 46:   ldMZ = PetscBLASIntCast(ell+1);
 47:   {
 48:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AY0c);
 49:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AYlc);
 50:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AYtc);
 51:     PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZa);
 52:     PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZb);
 53:   }

 55:   /* Prime the iterative solver */
 56:   KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
 57:   VecNorm(VVR[0], NORM_2, &zeta0);
 58:   rnmax_computed = zeta0;
 59:   rnmax_true = zeta0;

 61:   (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
 62:   if (ksp->reason) {
 63:     PetscObjectTakeAccess(ksp);
 64:     ksp->its   = 0;
 65:     ksp->rnorm = zeta0;
 66:     PetscObjectGrantAccess(ksp);
 67:     PetscFree(AY0c);
 68:     PetscFree(AYlc);
 69:     PetscFree(AYtc);
 70:     PetscFree(MZa);
 71:     PetscFree(MZb);

 73:     return(0);
 74:   }

 76:   VecSet(VVU[0],0.0);
 77:   alpha = 0;
 78:   rho0 = omega = 1;

 80:   if (bcgsl->delta>0.0) {
 81:     VecCopy(VX, VXR);
 82:     VecSet(VX,0.0);
 83:     VecCopy(VVR[0], VB);
 84:   } else {
 85:     VecCopy(ksp->vec_rhs, VB);
 86:   }

 88:   /* Life goes on */
 89:   VecCopy(VVR[0], VRT);
 90:   zeta = zeta0;

 92:   KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);

 94:   for (k=0; k<maxit; k += bcgsl->ell) {
 95:     ksp->its   = k;
 96:     ksp->rnorm = zeta;

 98:     KSPLogResidualHistory(ksp, zeta);
 99:     KSPMonitor(ksp, ksp->its, zeta);

101:     (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
102:     if (ksp->reason) break;

104:     /* BiCG part */
105:     rho0 = -omega*rho0;
106:     nrm0 = zeta;
107:     for (j=0; j<bcgsl->ell; j++) {
108:       /* rho1 <- r_j' * r_tilde */
109:       VecDot(VVR[j], VRT, &rho1);
110:       if (rho1 == 0.0) {
111:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
112:         bBombed = PETSC_TRUE;
113:         break;
114:       }
115:       beta = alpha*(rho1/rho0);
116:       rho0 = rho1;
117:       for (i=0; i<=j; i++) {
118:         /* u_i <- r_i - beta*u_i */
119:         VecAYPX(VVU[i], -beta, VVR[i]);
120:       }
121:       /* u_{j+1} <- inv(K)*A*u_j */
122:       KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);

124:       VecDot(VVU[j+1], VRT, &sigma);
125:       if (sigma == 0.0) {
126:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
127:         bBombed = PETSC_TRUE;
128:         break;
129:       }
130:       alpha = rho1/sigma;

132:       /* x <- x + alpha*u_0 */
133:       VecAXPY(VX, alpha, VVU[0]);

135:       for (i=0; i<=j; i++) {
136:         /* r_i <- r_i - alpha*u_{i+1} */
137:         VecAXPY(VVR[i], -alpha, VVU[i+1]);
138:       }

140:       /* r_{j+1} <- inv(K)*A*r_j */
141:       KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);

143:       VecNorm(VVR[0], NORM_2, &nrm0);
144:       if (bcgsl->delta>0.0) {
145:         if (rnmax_computed<nrm0) rnmax_computed = nrm0;
146:         if (rnmax_true<nrm0) rnmax_true = nrm0;
147:       }

149:       /* NEW: check for early exit */
150:       (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
151:       if (ksp->reason) {
152:         PetscObjectTakeAccess(ksp);
153:         ksp->its   = k+j;
154:         ksp->rnorm = nrm0;
155:         PetscObjectGrantAccess(ksp);
156:         break;
157:       }
158:     }

160:     if (bBombed==PETSC_TRUE) break;

162:     /* Polynomial part */
163:     for(i = 0; i <= bcgsl->ell; ++i) {
164:       VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);
165:     }
166:     /* Symmetrize MZa */
167:     for(i = 0; i <= bcgsl->ell; ++i) {
168:       for(j = i+1; j <= bcgsl->ell; ++j) {
169:         MZa[i*ldMZ+j] = MZa[j*ldMZ+i];
170:       }
171:     }
172:     /* Copy MZa to MZb */
173:     for(i = 0; i <= bcgsl->ell; ++i) {
174:       for(j = 0; j <= bcgsl->ell; ++j) {
175:         MZb[i+ldMZ*j] = MZa[i+ldMZ*j];
176:       }
177:     }

179:     if (!bcgsl->bConvex || bcgsl->ell==1) {
180:       PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell);

182:       AY0c[0] = -1;
183:       LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
184:       if (ierr!=0) {
185:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
186:         bBombed = PETSC_TRUE;
187:         break;
188:       }
189:       BLAScopy_(&bell, &MZb[1], &ione, &AY0c[1], &ione);
190:       LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
191:     } else {
192:       PetscBLASInt ione = 1;
193:       PetscScalar aone = 1.0, azero = 0.0;
194:       PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1);

196:       LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
197:       if (ierr!=0) {
198:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
199:         bBombed = PETSC_TRUE;
200:         break;
201:       }
202:       BLAScopy_(&neqs, &MZb[1], &ione, &AY0c[1], &ione);
203:       LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
204:       AY0c[0] = -1;
205:       AY0c[bcgsl->ell] = 0;

207:       BLAScopy_(&neqs, &MZb[1+ldMZ*(bcgsl->ell)], &ione, &AYlc[1], &ione);
208:       LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);

210:       AYlc[0] = 0;
211:       AYlc[bcgsl->ell] = -1;

213:       BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);

215:       kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);

217:       /* round-off can cause negative kappa's */
218:       if (kappa0<0) kappa0 = -kappa0;
219:       kappa0 = sqrt(kappa0);

221:       kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

223:       BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);

225:       kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

227:       if (kappa1<0) kappa1 = -kappa1;
228:       kappa1 = sqrt(kappa1);

230:       if (kappa0!=0.0 && kappa1!=0.0) {
231:         if (kappaA<0.7*kappa0*kappa1) {
232:           ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
233:         } else {
234:           ghat = kappaA/(kappa1*kappa1);
235:         }
236:         for (i=0; i<=bcgsl->ell; i++) {
237:           AY0c[i] = AY0c[i] - ghat* AYlc[i];
238:         }
239:       }
240:     }

242:     omega = AY0c[bcgsl->ell];
243:     for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
244:       omega = AY0c[h];
245:     }
246:     if (omega==0.0) {
247:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
248:       break;
249:     }

251:     for (i=1; i<=bcgsl->ell; i++) {
252:       VecAXPY(VVU[0], -AY0c[i], VVU[i]);
253:       VecAXPY(VX, AY0c[i], VVR[i-1]);
254:       VecAXPY(VVR[0], -AY0c[i], VVR[i]);
255:     }

257:     VecNorm(VVR[0], NORM_2, &zeta);

259:     /* Accurate Update */
260:     if (bcgsl->delta>0.0) {
261:       if (rnmax_computed<zeta) rnmax_computed = zeta;
262:       if (rnmax_true<zeta) rnmax_true = zeta;

264:       bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
265:       if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
266:         /* r0 <- b-inv(K)*A*X */
267:         KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
268:         VecAYPX(VVR[0], -1.0, VB);
269:         rnmax_true = zeta;

271:         if (bUpdateX) {
272:           VecAXPY(VXR,1.0,VX);
273:           VecSet(VX,0.0);
274:           VecCopy(VVR[0], VB);
275:           rnmax_computed = zeta;
276:         }
277:       }
278:     }
279:   }
280:   if (bcgsl->delta>0.0) {
281:     VecAXPY(VX,1.0,VXR);
282:   }

284:   (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
285:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;

287:   PetscFree(AY0c);
288:   PetscFree(AYlc);
289:   PetscFree(AYtc);
290:   PetscFree(MZa);
291:   PetscFree(MZb);
292:   return(0);
293: }

297: /*@
298:    KSPBCGSLSetXRes - Sets the parameter governing when
299:    exact residuals will be used instead of computed residuals.

301:    Collective on KSP

303:    Input Parameters:
304: +  ksp - iterative context obtained from KSPCreate
305: -  delta - computed residuals are used alone when delta is not positive

307:    Options Database Keys:

309: .  -ksp_bcgsl_xres delta

311:    Level: intermediate

313: .keywords: KSP, BiCGStab(L), set, exact residuals

315: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
316: @*/
317: PetscErrorCode  KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
318: {
319:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

323:   if (ksp->setupcalled) {
324:     if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
325:       KSPDefaultFreeWork(ksp);
326:       ksp->setupcalled = 0;
327:     }
328:   }
329:   bcgsl->delta = delta;
330:   return(0);
331: }

335: /*@
336:    KSPBCGSLSetPol - Sets the type of polynomial part will
337:    be used in the BiCGSTab(L) solver.

339:    Collective on KSP

341:    Input Parameters:
342: +  ksp - iterative context obtained from KSPCreate
343: -  uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.

345:    Options Database Keys:

347: +  -ksp_bcgsl_cxpoly - use enhanced polynomial
348: .  -ksp_bcgsl_mrpoly - use standard polynomial

350:    Level: intermediate

352: .keywords: KSP, BiCGStab(L), set, polynomial

354: .seealso: @()
355: @*/
356: PetscErrorCode  KSPBCGSLSetPol(KSP ksp, PetscTruth uMROR)
357: {
358:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

362:   if (!ksp->setupcalled) {
363:     bcgsl->bConvex = uMROR;
364:   } else if (bcgsl->bConvex != uMROR) {
365:     /* free the data structures,
366:        then create them again
367:      */
368:    KSPDefaultFreeWork(ksp);
369:     bcgsl->bConvex = uMROR;
370:     ksp->setupcalled = 0;
371:   }
372:   return(0);
373: }

377: /*@
378:    KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).

380:    Collective on KSP

382:    Input Parameters:
383: +  ksp - iterative context obtained from KSPCreate
384: -  ell - number of search directions

386:    Options Database Keys:

388: .  -ksp_bcgsl_ell ell

390:    Level: intermediate

392: .keywords: KSP, BiCGStab(L), set, exact residuals,

394: .seealso: @()
395: @*/
396: PetscErrorCode  KSPBCGSLSetEll(KSP ksp, int ell)
397: {
398:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

402:   if (ell < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");

404:   if (!ksp->setupcalled) {
405:     bcgsl->ell = ell;
406:   } else if (bcgsl->ell != ell) {
407:     /* free the data structures, then create them again */
408:     KSPDefaultFreeWork(ksp);

410:     bcgsl->ell = ell;
411:     ksp->setupcalled = 0;
412:   }
413:   return(0);
414: }

418: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
419: {
420:   KSP_BiCGStabL       *bcgsl = (KSP_BiCGStabL *)ksp->data;
421:   PetscErrorCode      ierr;
422:   PetscTruth          isascii, isstring;

425:   PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_ASCII, &isascii);
426:   PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_STRING, &isstring);

428:   if (isascii) {
429:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Ell = %D\n", bcgsl->ell);
430:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Delta = %lg\n", bcgsl->delta);
431:   } else {
432:     SETERRQ1(PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
433:   }
434:   return(0);
435: }

439: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
440: {
441:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;
443:   PetscInt       this_ell;
444:   PetscReal      delta;
445:   PetscTruth     flga, flg;

448:   /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
449:      don't need to be called here.
450:   */
451:   PetscOptionsHead("KSP BiCGStab(L) Options");

453:   /* Set number of search directions */
454:   PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);
455:   if (flg) {
456:     KSPBCGSLSetEll(ksp, this_ell);
457:   }

459:   /* Set polynomial type */
460:   PetscOptionsName("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", &flga);
461:   if (flga) {
462:     KSPBCGSLSetPol(ksp, PETSC_TRUE);
463:   } else {
464:     PetscOptionsName("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", &flg);
465:     KSPBCGSLSetPol(ksp, PETSC_FALSE);
466:   }

468:   /* Will computed residual be refreshed? */
469:   PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
470:   if (flg) {
471:     KSPBCGSLSetXRes(ksp, delta);
472:   }
473:   PetscOptionsTail();
474:   return(0);
475: }

479: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
480: {
481:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;
482:   PetscInt        ell = bcgsl->ell;

486:   if (ksp->pc_side == PC_SYMMETRIC) {
487:     SETERRQ(PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
488:   } else if (ksp->pc_side == PC_RIGHT) {
489:     SETERRQ(PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
490:   }
491:   KSPDefaultGetWork(ksp, 6+2*ell);
492:   return(0);
493: }

495: /*MC
496:      KSPBCGSL - Implements a slight variant of the Enhanced
497:                 BiCGStab(L) algorithm in (3) and (2).  The variation
498:                 concerns cases when either kappa0**2 or kappa1**2 is
499:                 negative due to round-off. Kappa0 has also been pulled
500:                 out of the denominator in the formula for ghat.

502:     References:
503:       1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
504:          approaches for the stable computation of hybrid BiCG
505:          methods", Applied Numerical Mathematics: Transactions
506:          f IMACS, 19(3), pp 235-54, 1996.
507:       2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
508:          "BiCGStab(L) and other hybrid Bi-CG methods",
509:           Numerical Algorithms, 7, pp 75-109, 1994.
510:       3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
511:          for solving linear systems of equations", preprint
512:          from www.citeseer.com.

514:    Contributed by: Joel M. Malard, email jm.malard@pnl.gov

516:    Options Database Keys:
517: +  -ksp_bcgsl_ell <ell> Number of Krylov search directions
518: -  -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
519: -  -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals

521:    Level: beginner

523: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS

525: M*/
529: PetscErrorCode  KSPCreate_BCGSL(KSP ksp)
530: {
532:   KSP_BiCGStabL  *bcgsl;

535:   /* allocate BiCGStab(L) context */
536:   PetscNewLog(ksp, KSP_BiCGStabL, &bcgsl);
537:   ksp->data = (void*)bcgsl;

539:   ksp->pc_side              = PC_LEFT;
540:   ksp->ops->setup           = KSPSetUp_BCGSL;
541:   ksp->ops->solve           = KSPSolve_BCGSL;
542:   ksp->ops->destroy         = KSPDefaultDestroy;
543:   ksp->ops->buildsolution   = KSPDefaultBuildSolution;
544:   ksp->ops->buildresidual   = KSPDefaultBuildResidual;
545:   ksp->ops->setfromoptions  = KSPSetFromOptions_BCGSL;
546:   ksp->ops->view            = KSPView_BCGSL;

548:   /* Let the user redefine the number of directions vectors */
549:   bcgsl->ell = 2;

551:   /*Choose between a single MR step or an averaged MR/OR */
552:   bcgsl->bConvex = PETSC_FALSE;

554:   /* Set the threshold for when exact residuals will be used */
555:   bcgsl->delta = 0.0;
556:   return(0);
557: }