Actual source code: baijfact.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Factorization code for BAIJ format. 
  5: */
 6:  #include ../src/mat/impls/baij/seq/baij.h
 7:  #include ../src/inline/ilu.h

  9: /* ------------------------------------------------------------*/
 10: /*
 11:       Version for when blocks are 2 by 2
 12: */
 15: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat B,Mat A,const MatFactorInfo *info)
 16: {
 17:   Mat            C = B;
 18:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
 19:   IS             isrow = b->row,isicol = b->icol;
 21:   const PetscInt *r,*ic;
 22:   PetscInt       i,j,n = a->mbs,*bi = b->i,*bj = b->j;
 23:   PetscInt       *ajtmpold,*ajtmp,nz,row;
 24:   PetscInt       *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
 25:   MatScalar      *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
 26:   MatScalar      p1,p2,p3,p4;
 27:   MatScalar      *ba = b->a,*aa = a->a;
 28:   PetscReal      shift = info->shiftinblocks;

 31:   ISGetIndices(isrow,&r);
 32:   ISGetIndices(isicol,&ic);
 33:   PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);

 35:   for (i=0; i<n; i++) {
 36:     nz    = bi[i+1] - bi[i];
 37:     ajtmp = bj + bi[i];
 38:     for  (j=0; j<nz; j++) {
 39:       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
 40:     }
 41:     /* load in initial (unfactored row) */
 42:     idx      = r[i];
 43:     nz       = ai[idx+1] - ai[idx];
 44:     ajtmpold = aj + ai[idx];
 45:     v        = aa + 4*ai[idx];
 46:     for (j=0; j<nz; j++) {
 47:       x    = rtmp+4*ic[ajtmpold[j]];
 48:       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
 49:       v    += 4;
 50:     }
 51:     row = *ajtmp++;
 52:     while (row < i) {
 53:       pc = rtmp + 4*row;
 54:       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
 55:       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
 56:         pv = ba + 4*diag_offset[row];
 57:         pj = bj + diag_offset[row] + 1;
 58:         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
 59:         pc[0] = m1 = p1*x1 + p3*x2;
 60:         pc[1] = m2 = p2*x1 + p4*x2;
 61:         pc[2] = m3 = p1*x3 + p3*x4;
 62:         pc[3] = m4 = p2*x3 + p4*x4;
 63:         nz = bi[row+1] - diag_offset[row] - 1;
 64:         pv += 4;
 65:         for (j=0; j<nz; j++) {
 66:           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
 67:           x    = rtmp + 4*pj[j];
 68:           x[0] -= m1*x1 + m3*x2;
 69:           x[1] -= m2*x1 + m4*x2;
 70:           x[2] -= m1*x3 + m3*x4;
 71:           x[3] -= m2*x3 + m4*x4;
 72:           pv   += 4;
 73:         }
 74:         PetscLogFlops(16*nz+12);
 75:       }
 76:       row = *ajtmp++;
 77:     }
 78:     /* finished row so stick it into b->a */
 79:     pv = ba + 4*bi[i];
 80:     pj = bj + bi[i];
 81:     nz = bi[i+1] - bi[i];
 82:     for (j=0; j<nz; j++) {
 83:       x     = rtmp+4*pj[j];
 84:       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
 85:       pv   += 4;
 86:     }
 87:     /* invert diagonal block */
 88:     w = ba + 4*diag_offset[i];
 89:     Kernel_A_gets_inverse_A_2(w,shift);
 90:   }

 92:   PetscFree(rtmp);
 93:   ISRestoreIndices(isicol,&ic);
 94:   ISRestoreIndices(isrow,&r);
 95:   C->ops->solve          = MatSolve_SeqBAIJ_2;
 96:   C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2;
 97:   C->assembled = PETSC_TRUE;
 98:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
 99:   return(0);
100: }
101: /*
102:       Version for when blocks are 2 by 2 Using natural ordering
103: */
106: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
107: {
108:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
110:   PetscInt       i,j,n = a->mbs,*bi = b->i,*bj = b->j;
111:   PetscInt       *ajtmpold,*ajtmp,nz,row;
112:   PetscInt       *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
113:   MatScalar      *pv,*v,*rtmp,*pc,*w,*x;
114:   MatScalar      p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
115:   MatScalar      *ba = b->a,*aa = a->a;
116:   PetscReal      shift = info->shiftinblocks;

119:   PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);

121:   for (i=0; i<n; i++) {
122:     nz    = bi[i+1] - bi[i];
123:     ajtmp = bj + bi[i];
124:     for  (j=0; j<nz; j++) {
125:       x = rtmp+4*ajtmp[j];
126:       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
127:     }
128:     /* load in initial (unfactored row) */
129:     nz       = ai[i+1] - ai[i];
130:     ajtmpold = aj + ai[i];
131:     v        = aa + 4*ai[i];
132:     for (j=0; j<nz; j++) {
133:       x    = rtmp+4*ajtmpold[j];
134:       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
135:       v    += 4;
136:     }
137:     row = *ajtmp++;
138:     while (row < i) {
139:       pc  = rtmp + 4*row;
140:       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
141:       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
142:         pv = ba + 4*diag_offset[row];
143:         pj = bj + diag_offset[row] + 1;
144:         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
145:         pc[0] = m1 = p1*x1 + p3*x2;
146:         pc[1] = m2 = p2*x1 + p4*x2;
147:         pc[2] = m3 = p1*x3 + p3*x4;
148:         pc[3] = m4 = p2*x3 + p4*x4;
149:         nz = bi[row+1] - diag_offset[row] - 1;
150:         pv += 4;
151:         for (j=0; j<nz; j++) {
152:           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
153:           x    = rtmp + 4*pj[j];
154:           x[0] -= m1*x1 + m3*x2;
155:           x[1] -= m2*x1 + m4*x2;
156:           x[2] -= m1*x3 + m3*x4;
157:           x[3] -= m2*x3 + m4*x4;
158:           pv   += 4;
159:         }
160:         PetscLogFlops(16*nz+12);
161:       }
162:       row = *ajtmp++;
163:     }
164:     /* finished row so stick it into b->a */
165:     pv = ba + 4*bi[i];
166:     pj = bj + bi[i];
167:     nz = bi[i+1] - bi[i];
168:     for (j=0; j<nz; j++) {
169:       x      = rtmp+4*pj[j];
170:       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
171:       pv   += 4;
172:     }
173:     /* invert diagonal block */
174:     w = ba + 4*diag_offset[i];
175:     Kernel_A_gets_inverse_A_2(w,shift);
176:   }

178:   PetscFree(rtmp);
179:   C->ops->solve          = MatSolve_SeqBAIJ_2_NaturalOrdering;
180:   C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
181:   C->assembled = PETSC_TRUE;
182:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
183:   return(0);
184: }

186: /* ----------------------------------------------------------- */
187: /*
188:      Version for when blocks are 1 by 1.
189: */
192: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat C,Mat A,const MatFactorInfo *info)
193: {
194:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
195:   IS             isrow = b->row,isicol = b->icol;
197:   const PetscInt *r,*ic;
198:   PetscInt       i,j,n = a->mbs,*bi = b->i,*bj = b->j;
199:   PetscInt       *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
200:   PetscInt       *diag_offset = b->diag,diag,*pj;
201:   MatScalar      *pv,*v,*rtmp,multiplier,*pc;
202:   MatScalar      *ba = b->a,*aa = a->a;
203:   PetscTruth     row_identity, col_identity;

206:   ISGetIndices(isrow,&r);
207:   ISGetIndices(isicol,&ic);
208:   PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);

210:   for (i=0; i<n; i++) {
211:     nz    = bi[i+1] - bi[i];
212:     ajtmp = bj + bi[i];
213:     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;

215:     /* load in initial (unfactored row) */
216:     nz       = ai[r[i]+1] - ai[r[i]];
217:     ajtmpold = aj + ai[r[i]];
218:     v        = aa + ai[r[i]];
219:     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];

221:     row = *ajtmp++;
222:     while (row < i) {
223:       pc = rtmp + row;
224:       if (*pc != 0.0) {
225:         pv         = ba + diag_offset[row];
226:         pj         = bj + diag_offset[row] + 1;
227:         multiplier = *pc * *pv++;
228:         *pc        = multiplier;
229:         nz         = bi[row+1] - diag_offset[row] - 1;
230:         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
231:         PetscLogFlops(1+2*nz);
232:       }
233:       row = *ajtmp++;
234:     }
235:     /* finished row so stick it into b->a */
236:     pv = ba + bi[i];
237:     pj = bj + bi[i];
238:     nz = bi[i+1] - bi[i];
239:     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
240:     diag = diag_offset[i] - bi[i];
241:     /* check pivot entry for current row */
242:     if (pv[diag] == 0.0) {
243:       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
244:     }
245:     pv[diag] = 1.0/pv[diag];
246:   }

248:   PetscFree(rtmp);
249:   ISRestoreIndices(isicol,&ic);
250:   ISRestoreIndices(isrow,&r);
251:   ISIdentity(isrow,&row_identity);
252:   ISIdentity(isicol,&col_identity);
253:   if (row_identity && col_identity) {
254:     C->ops->solve          = MatSolve_SeqBAIJ_1_NaturalOrdering;
255:     C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
256:   } else {
257:     C->ops->solve          = MatSolve_SeqBAIJ_1;
258:     C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1;
259:   }
260:   C->assembled = PETSC_TRUE;
261:   PetscLogFlops(C->cmap->n);
262:   return(0);
263: }

268: PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A,MatFactorType ftype,Mat *B)
269: {
270:   PetscInt           n = A->rmap->n;
271:   PetscErrorCode     ierr;

274:   MatCreate(((PetscObject)A)->comm,B);
275:   MatSetSizes(*B,n,n,n,n);
276:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
277:     MatSetType(*B,MATSEQBAIJ);
278:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqBAIJ;
279:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ;
280:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
281:     MatSetType(*B,MATSEQSBAIJ);
282:     MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
283:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqBAIJ;
284:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ;
285:   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
286:   (*B)->factor = ftype;
287:   return(0);
288: }

294: PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
295: {
297:   *flg = PETSC_TRUE;
298:   return(0);
299: }

302: /* ----------------------------------------------------------- */
305: PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
306: {
308:   Mat            C;

311:   MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);
312:   MatLUFactorSymbolic(C,A,row,col,info);
313:   MatLUFactorNumeric(C,A,info);
314:   A->ops->solve            = C->ops->solve;
315:   A->ops->solvetranspose   = C->ops->solvetranspose;
316:   MatHeaderCopy(A,C);
317:   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
318:   return(0);
319: }

321:  #include ../src/mat/impls/sbaij/seq/sbaij.h
324: PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C,Mat A,const MatFactorInfo *info)
325: {
327:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
328:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
329:   IS             ip=b->row;
330:   const PetscInt *rip;
331:   PetscInt       i,j,mbs=a->mbs,bs=A->rmap->bs,*bi=b->i,*bj=b->j,*bcol;
332:   PetscInt       *ai=a->i,*aj=a->j;
333:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
334:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
335:   PetscReal      zeropivot,rs,shiftnz;
336:   PetscReal      shiftpd;
337:   ChShift_Ctx    sctx;
338:   PetscInt       newshift;

341:   if (bs > 1) {
342:     if (!a->sbaijMat){
343:       MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);
344:     }
345:     (a->sbaijMat)->ops->choleskyfactornumeric(C,a->sbaijMat,info);
346:     MatDestroy(a->sbaijMat);
347:     a->sbaijMat = PETSC_NULL;
348:     return(0);
349:   }
350: 
351:   /* initialization */
352:   shiftnz   = info->shiftnz;
353:   shiftpd   = info->shiftpd;
354:   zeropivot = info->zeropivot;

356:   ISGetIndices(ip,&rip);
357:   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
358:   PetscMalloc(nz,&il);
359:   jl   = il + mbs;
360:   rtmp = (MatScalar*)(jl + mbs);

362:   sctx.shift_amount = 0;
363:   sctx.nshift       = 0;
364:   do {
365:     sctx.chshift = PETSC_FALSE;
366:     for (i=0; i<mbs; i++) {
367:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
368:     }
369: 
370:     for (k = 0; k<mbs; k++){
371:       bval = ba + bi[k];
372:       /* initialize k-th row by the perm[k]-th row of A */
373:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
374:       for (j = jmin; j < jmax; j++){
375:         col = rip[aj[j]];
376:         if (col >= k){ /* only take upper triangular entry */
377:           rtmp[col] = aa[j];
378:           *bval++  = 0.0; /* for in-place factorization */
379:         }
380:       }
381: 
382:       /* shift the diagonal of the matrix */
383:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

385:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
386:       dk = rtmp[k];
387:       i = jl[k]; /* first row to be added to k_th row  */

389:       while (i < k){
390:         nexti = jl[i]; /* next row to be added to k_th row */

392:         /* compute multiplier, update diag(k) and U(i,k) */
393:         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
394:         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
395:         dk += uikdi*ba[ili];
396:         ba[ili] = uikdi; /* -U(i,k) */

398:         /* add multiple of row i to k-th row */
399:         jmin = ili + 1; jmax = bi[i+1];
400:         if (jmin < jmax){
401:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
402:           /* update il and jl for row i */
403:           il[i] = jmin;
404:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
405:         }
406:         i = nexti;
407:       }

409:       /* shift the diagonals when zero pivot is detected */
410:       /* compute rs=sum of abs(off-diagonal) */
411:       rs   = 0.0;
412:       jmin = bi[k]+1;
413:       nz   = bi[k+1] - jmin;
414:       if (nz){
415:         bcol = bj + jmin;
416:         while (nz--){
417:           rs += PetscAbsScalar(rtmp[*bcol]);
418:           bcol++;
419:         }
420:       }

422:       sctx.rs = rs;
423:       sctx.pv = dk;
424:       MatCholeskyCheckShift_inline(info,sctx,k,newshift);
425:       if (newshift == 1) break;

427:       /* copy data into U(k,:) */
428:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
429:       jmin = bi[k]+1; jmax = bi[k+1];
430:       if (jmin < jmax) {
431:         for (j=jmin; j<jmax; j++){
432:           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
433:         }
434:         /* add the k-th row into il and jl */
435:         il[k] = jmin;
436:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
437:       }
438:     }
439:   } while (sctx.chshift);
440:   PetscFree(il);

442:   ISRestoreIndices(ip,&rip);
443:   C->assembled    = PETSC_TRUE;
444:   C->preallocated = PETSC_TRUE;
445:   PetscLogFlops(C->rmap->N);
446:   if (sctx.nshift){
447:     if (shiftpd) {
448:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
449:     } else if (shiftnz) {
450:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
451:     }
452:   }
453:   return(0);
454: }

458: PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
459: {
460:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
461:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
463:   PetscInt       i,j,am=a->mbs;
464:   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
465:   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
466:   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
467:   PetscReal      zeropivot,rs,shiftnz;
468:   PetscReal      shiftpd;
469:   ChShift_Ctx    sctx;
470:   PetscInt       newshift;

473:   /* initialization */
474:   shiftnz   = info->shiftnz;
475:   shiftpd   = info->shiftpd;
476:   zeropivot = info->zeropivot;

478:   nz   = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar);
479:   PetscMalloc(nz,&il);
480:   jl   = il + am;
481:   rtmp = (MatScalar*)(jl + am);

483:   sctx.shift_amount = 0;
484:   sctx.nshift       = 0;
485:   do {
486:     sctx.chshift = PETSC_FALSE;
487:     for (i=0; i<am; i++) {
488:       rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
489:     }

491:     for (k = 0; k<am; k++){
492:     /* initialize k-th row with elements nonzero in row perm(k) of A */
493:       nz   = ai[k+1] - ai[k];
494:       acol = aj + ai[k];
495:       aval = aa + ai[k];
496:       bval = ba + bi[k];
497:       while (nz -- ){
498:         if (*acol < k) { /* skip lower triangular entries */
499:           acol++; aval++;
500:         } else {
501:           rtmp[*acol++] = *aval++;
502:           *bval++       = 0.0; /* for in-place factorization */
503:         }
504:       }
505: 
506:       /* shift the diagonal of the matrix */
507:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
508: 
509:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
510:       dk = rtmp[k];
511:       i  = jl[k]; /* first row to be added to k_th row  */

513:       while (i < k){
514:         nexti = jl[i]; /* next row to be added to k_th row */
515:         /* compute multiplier, update D(k) and U(i,k) */
516:         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
517:         uikdi = - ba[ili]*ba[bi[i]];
518:         dk   += uikdi*ba[ili];
519:         ba[ili] = uikdi; /* -U(i,k) */

521:         /* add multiple of row i to k-th row ... */
522:         jmin = ili + 1;
523:         nz   = bi[i+1] - jmin;
524:         if (nz > 0){
525:           bcol = bj + jmin;
526:           bval = ba + jmin;
527:           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
528:           /* update il and jl for i-th row */
529:           il[i] = jmin;
530:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
531:         }
532:         i = nexti;
533:       }

535:       /* shift the diagonals when zero pivot is detected */
536:       /* compute rs=sum of abs(off-diagonal) */
537:       rs   = 0.0;
538:       jmin = bi[k]+1;
539:       nz   = bi[k+1] - jmin;
540:       if (nz){
541:         bcol = bj + jmin;
542:         while (nz--){
543:           rs += PetscAbsScalar(rtmp[*bcol]);
544:           bcol++;
545:         }
546:       }

548:       sctx.rs = rs;
549:       sctx.pv = dk;
550:       MatCholeskyCheckShift_inline(info,sctx,k,newshift);
551:       if (newshift == 1) break;    /* sctx.shift_amount is updated */

553:       /* copy data into U(k,:) */
554:       ba[bi[k]] = 1.0/dk;
555:       jmin      = bi[k]+1;
556:       nz        = bi[k+1] - jmin;
557:       if (nz){
558:         bcol = bj + jmin;
559:         bval = ba + jmin;
560:         while (nz--){
561:           *bval++       = rtmp[*bcol];
562:           rtmp[*bcol++] = 0.0;
563:         }
564:         /* add k-th row into il and jl */
565:         il[k] = jmin;
566:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
567:       }
568:     }
569:   } while (sctx.chshift);
570:   PetscFree(il);
571: 
572:   C->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
573:   C->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
574:   C->assembled    = PETSC_TRUE;
575:   C->preallocated = PETSC_TRUE;
576:   PetscLogFlops(C->rmap->N);
577:     if (sctx.nshift){
578:     if (shiftnz) {
579:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
580:     } else if (shiftpd) {
581:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
582:     }
583:   }
584:   return(0);
585: }

587:  #include petscbt.h
588:  #include ../src/mat/utils/freespace.h
591: PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
592: {
593:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
594:   Mat_SeqSBAIJ       *b;
595:   Mat                B;
596:   PetscErrorCode     ierr;
597:   PetscTruth         perm_identity;
598:   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->rmap->bs,*ui;
599:   const PetscInt     *rip;
600:   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
601:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
602:   PetscReal          fill=info->fill,levels=info->levels;
603:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
604:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
605:   PetscBT            lnkbt;

608:   if (bs > 1){
609:     if (!a->sbaijMat){
610:       MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);
611:     }
612:     (fact)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ;  /* undue the change made in MatGetFactor_seqbaij_petsc */
613:     MatICCFactorSymbolic(fact,a->sbaijMat,perm,info);
614:     return(0);
615:   }

617:   ISIdentity(perm,&perm_identity);
618:   ISGetIndices(perm,&rip);

620:   /* special case that simply copies fill pattern */
621:   if (!levels && perm_identity) {
622:     MatMarkDiagonal_SeqBAIJ(A);
623:     PetscMalloc((am+1)*sizeof(PetscInt),&ui);
624:     for (i=0; i<am; i++) {
625:       ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
626:     }
627:     B = fact;
628:     MatSeqSBAIJSetPreallocation(B,1,0,ui);


631:     b  = (Mat_SeqSBAIJ*)B->data;
632:     uj = b->j;
633:     for (i=0; i<am; i++) {
634:       aj = a->j + a->diag[i];
635:       for (j=0; j<ui[i]; j++){
636:         *uj++ = *aj++;
637:       }
638:       b->ilen[i] = ui[i];
639:     }
640:     PetscFree(ui);
641:     B->factor = MAT_FACTOR_NONE;
642:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
643:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
644:     B->factor = MAT_FACTOR_ICC;

646:     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
647:     return(0);
648:   }

650:   /* initialization */
651:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
652:   ui[0] = 0;
653:   PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);

655:   /* jl: linked list for storing indices of the pivot rows 
656:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
657:   PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);
658:   il         = jl + am;
659:   uj_ptr     = (PetscInt**)(il + am);
660:   uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
661:   for (i=0; i<am; i++){
662:     jl[i] = am; il[i] = 0;
663:   }

665:   /* create and initialize a linked list for storing column indices of the active row k */
666:   nlnk = am + 1;
667:   PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

669:   /* initial FreeSpace size is fill*(ai[am]+1) */
670:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
671:   current_space = free_space;
672:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
673:   current_space_lvl = free_space_lvl;

675:   for (k=0; k<am; k++){  /* for each active row k */
676:     /* initialize lnk by the column indices of row rip[k] of A */
677:     nzk   = 0;
678:     ncols = ai[rip[k]+1] - ai[rip[k]];
679:     ncols_upper = 0;
680:     cols        = cols_lvl + am;
681:     for (j=0; j<ncols; j++){
682:       i = rip[*(aj + ai[rip[k]] + j)];
683:       if (i >= k){ /* only take upper triangular entry */
684:         cols[ncols_upper] = i;
685:         cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
686:         ncols_upper++;
687:       }
688:     }
689:     PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);
690:     nzk += nlnk;

692:     /* update lnk by computing fill-in for each pivot row to be merged in */
693:     prow = jl[k]; /* 1st pivot row */
694: 
695:     while (prow < k){
696:       nextprow = jl[prow];
697: 
698:       /* merge prow into k-th row */
699:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
700:       jmax = ui[prow+1];
701:       ncols = jmax-jmin;
702:       i     = jmin - ui[prow];
703:       cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
704:       for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
705:       PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);
706:       nzk += nlnk;

708:       /* update il and jl for prow */
709:       if (jmin < jmax){
710:         il[prow] = jmin;
711:         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
712:       }
713:       prow = nextprow;
714:     }

716:     /* if free space is not available, make more free space */
717:     if (current_space->local_remaining<nzk) {
718:       i = am - k + 1; /* num of unfactored rows */
719:       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
720:       PetscFreeSpaceGet(i,&current_space);
721:       PetscFreeSpaceGet(i,&current_space_lvl);
722:       reallocs++;
723:     }

725:     /* copy data into free_space and free_space_lvl, then initialize lnk */
726:     PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

728:     /* add the k-th row into il and jl */
729:     if (nzk-1 > 0){
730:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
731:       jl[k] = jl[i]; jl[i] = k;
732:       il[k] = ui[k] + 1;
733:     }
734:     uj_ptr[k]     = current_space->array;
735:     uj_lvl_ptr[k] = current_space_lvl->array;

737:     current_space->array           += nzk;
738:     current_space->local_used      += nzk;
739:     current_space->local_remaining -= nzk;

741:     current_space_lvl->array           += nzk;
742:     current_space_lvl->local_used      += nzk;
743:     current_space_lvl->local_remaining -= nzk;

745:     ui[k+1] = ui[k] + nzk;
746:   }

748: #if defined(PETSC_USE_INFO)
749:   if (ai[am] != 0) {
750:     PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]);
751:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
752:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
753:     PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
754:   } else {
755:     PetscInfo(A,"Empty matrix.\n");
756:   }
757: #endif

759:   ISRestoreIndices(perm,&rip);
760:   PetscFree(jl);
761:   PetscFree(cols_lvl);

763:   /* destroy list of free space and other temporary array(s) */
764:   PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
765:   PetscFreeSpaceContiguous(&free_space,uj);
766:   PetscIncompleteLLDestroy(lnk,lnkbt);
767:   PetscFreeSpaceDestroy(free_space_lvl);

769:   /* put together the new matrix in MATSEQSBAIJ format */
770:   B = fact;
771:   MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);

773:   b = (Mat_SeqSBAIJ*)B->data;
774:   b->singlemalloc = PETSC_FALSE;
775:   b->free_a       = PETSC_TRUE;
776:   b->free_ij       = PETSC_TRUE;
777:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
778:   b->j    = uj;
779:   b->i    = ui;
780:   b->diag = 0;
781:   b->ilen = 0;
782:   b->imax = 0;
783:   b->row  = perm;
784:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
785:   PetscObjectReference((PetscObject)perm);
786:   b->icol = perm;
787:   PetscObjectReference((PetscObject)perm);
788:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
789:   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
790:   b->maxnz = b->nz = ui[am];
791: 
792:   B->info.factor_mallocs    = reallocs;
793:   B->info.fill_ratio_given  = fill;
794:   if (ai[am] != 0) {
795:     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
796:   } else {
797:     B->info.fill_ratio_needed = 0.0;
798:   }
799:   if (perm_identity){
800:     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
801:     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
802:     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
803:   } else {
804:     (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
805:   }
806:   return(0);
807: }

811: PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
812: {
813:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
814:   Mat_SeqSBAIJ       *b;
815:   Mat                B;
816:   PetscErrorCode     ierr;
817:   PetscTruth         perm_identity;
818:   PetscReal          fill = info->fill;
819:   const PetscInt     *rip;
820:   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,*ai=a->i,*aj=a->j,reallocs=0,prow;
821:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
822:   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
823:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
824:   PetscBT            lnkbt;

827:   if (bs > 1) { /* convert to seqsbaij */
828:     if (!a->sbaijMat){
829:       MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);
830:     }
831:     (fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */
832:     MatCholeskyFactorSymbolic(fact,a->sbaijMat,perm,info);
833:     return(0);
834:   }

836:   /* check whether perm is the identity mapping */
837:   ISIdentity(perm,&perm_identity);
838:   if (!perm_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
839:   ISGetIndices(perm,&rip);

841:   /* initialization */
842:   PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);
843:   ui[0] = 0;

845:   /* jl: linked list for storing indices of the pivot rows 
846:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
847:   PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);
848:   il     = jl + mbs;
849:   cols   = il + mbs;
850:   ui_ptr = (PetscInt**)(cols + mbs);
851:   for (i=0; i<mbs; i++){
852:     jl[i] = mbs; il[i] = 0;
853:   }

855:   /* create and initialize a linked list for storing column indices of the active row k */
856:   nlnk = mbs + 1;
857:   PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);

859:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
860:   PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);
861:   current_space = free_space;

863:   for (k=0; k<mbs; k++){  /* for each active row k */
864:     /* initialize lnk by the column indices of row rip[k] of A */
865:     nzk   = 0;
866:     ncols = ai[rip[k]+1] - ai[rip[k]];
867:     ncols_upper = 0;
868:     for (j=0; j<ncols; j++){
869:       i = rip[*(aj + ai[rip[k]] + j)];
870:       if (i >= k){ /* only take upper triangular entry */
871:         cols[ncols_upper] = i;
872:         ncols_upper++;
873:       }
874:     }
875:     PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);
876:     nzk += nlnk;

878:     /* update lnk by computing fill-in for each pivot row to be merged in */
879:     prow = jl[k]; /* 1st pivot row */
880: 
881:     while (prow < k){
882:       nextprow = jl[prow];
883:       /* merge prow into k-th row */
884:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
885:       jmax = ui[prow+1];
886:       ncols = jmax-jmin;
887:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
888:       PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);
889:       nzk += nlnk;

891:       /* update il and jl for prow */
892:       if (jmin < jmax){
893:         il[prow] = jmin;
894:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
895:       }
896:       prow = nextprow;
897:     }

899:     /* if free space is not available, make more free space */
900:     if (current_space->local_remaining<nzk) {
901:       i = mbs - k + 1; /* num of unfactored rows */
902:       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
903:       PetscFreeSpaceGet(i,&current_space);
904:       reallocs++;
905:     }

907:     /* copy data into free space, then initialize lnk */
908:     PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);

910:     /* add the k-th row into il and jl */
911:     if (nzk-1 > 0){
912:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
913:       jl[k] = jl[i]; jl[i] = k;
914:       il[k] = ui[k] + 1;
915:     }
916:     ui_ptr[k] = current_space->array;
917:     current_space->array           += nzk;
918:     current_space->local_used      += nzk;
919:     current_space->local_remaining -= nzk;

921:     ui[k+1] = ui[k] + nzk;
922:   }

924: #if defined(PETSC_USE_INFO)
925:   if (ai[mbs] != 0) {
926:     PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
927:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
928:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
929:     PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
930:   } else {
931:     PetscInfo(A,"Empty matrix.\n");
932:   }
933: #endif

935:   ISRestoreIndices(perm,&rip);
936:   PetscFree(jl);

938:   /* destroy list of free space and other temporary array(s) */
939:   PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);
940:   PetscFreeSpaceContiguous(&free_space,uj);
941:   PetscLLDestroy(lnk,lnkbt);

943:   /* put together the new matrix in MATSEQSBAIJ format */
944:   B    = fact;
945:   MatSeqSBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);

947:   b = (Mat_SeqSBAIJ*)B->data;
948:   b->singlemalloc = PETSC_FALSE;
949:   b->free_a       = PETSC_TRUE;
950:   b->free_ij      = PETSC_TRUE;
951:   PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);
952:   b->j    = uj;
953:   b->i    = ui;
954:   b->diag = 0;
955:   b->ilen = 0;
956:   b->imax = 0;
957:   b->row  = perm;
958:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
959:   PetscObjectReference((PetscObject)perm);
960:   b->icol = perm;
961:   PetscObjectReference((PetscObject)perm);
962:   PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);
963:   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
964:   b->maxnz = b->nz = ui[mbs];
965: 
966:   B->info.factor_mallocs    = reallocs;
967:   B->info.fill_ratio_given  = fill;
968:   if (ai[mbs] != 0) {
969:     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
970:   } else {
971:     B->info.fill_ratio_needed = 0.0;
972:   }
973:   if (perm_identity){
974:     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
975:   } else {
976:     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
977:   }
978:   return(0);
979: }