Actual source code: aijfact.c

  1: #define PETSCMAT_DLL

 3:  #include ../src/mat/impls/aij/seq/aij.h
 4:  #include ../src/inline/dot.h
 5:  #include ../src/inline/spops.h
 6:  #include petscbt.h
 7:  #include ../src/mat/utils/freespace.h

 11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
 12: {

 15:   SETERRQ(PETSC_ERR_SUP,"Code not written");
 16: #if !defined(PETSC_USE_DEBUG)
 17:   return(0);
 18: #endif
 19: }


 22: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
 23: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
 24: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
 25: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
 26: #endif

 30:   /* ------------------------------------------------------------

 32:           This interface was contribed by Tony Caola

 34:      This routine is an interface to the pivoting drop-tolerance 
 35:      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 
 36:      SPARSEKIT2.

 38:      The SPARSEKIT2 routines used here are covered by the GNU 
 39:      copyright; see the file gnu in this directory.

 41:      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
 42:      help in getting this routine ironed out.

 44:      The major drawback to this routine is that if info->fill is 
 45:      not large enough it fails rather than allocating more space;
 46:      this can be fixed by hacking/improving the f2c version of 
 47:      Yousef Saad's code.

 49:      ------------------------------------------------------------
 50: */
 51: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
 52: {
 53: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
 55:   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
 56:   You can obtain the drop tolerance routines by installing PETSc from\n\
 57:   www.mcs.anl.gov/petsc\n");
 58: #else
 59:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
 60:   IS             iscolf,isicol,isirow;
 61:   PetscTruth     reorder;
 62:   PetscErrorCode ierr,sierr;
 63:   const PetscInt *c,*r,*ic;
 64:   PetscInt       i,n = A->rmap->n,*cc,*cr;
 65:   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
 66:   PetscInt       *ordcol,*iwk,*iperm,*jw;
 67:   PetscInt       jmax,lfill,job,*o_i,*o_j;
 68:   MatScalar      *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
 69:   PetscReal      af,dt,dtcount,dtcol,fill;


 73:   if (info->dt == PETSC_DEFAULT)      dt      = .005; else dt = info->dt;
 74:   if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);  else dtcount = info->dtcount;
 75:   if (info->dtcol == PETSC_DEFAULT)   dtcol   = .01; else dtcol = info->dtcol;
 76:   if (info->fill == PETSC_DEFAULT)    fill    = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill;
 77:   lfill   = (PetscInt)(dtcount/2.0);
 78:   jmax    = (PetscInt)(fill*a->nz);


 81:   /* ------------------------------------------------------------
 82:      If reorder=.TRUE., then the original matrix has to be 
 83:      reordered to reflect the user selected ordering scheme, and
 84:      then de-reordered so it is in it's original format.  
 85:      Because Saad's dperm() is NOT in place, we have to copy 
 86:      the original matrix and allocate more storage. . . 
 87:      ------------------------------------------------------------
 88:   */

 90:   /* set reorder to true if either isrow or iscol is not identity */
 91:   ISIdentity(isrow,&reorder);
 92:   if (reorder) {ISIdentity(iscol,&reorder);}
 93:   reorder = PetscNot(reorder);

 95: 
 96:   /* storage for ilu factor */
 97:   PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
 98:   PetscMalloc(jmax*sizeof(PetscInt),&new_j);
 99:   PetscMalloc(jmax*sizeof(MatScalar),&new_a);
100:   PetscMalloc(n*sizeof(PetscInt),&ordcol);

102:   /* ------------------------------------------------------------
103:      Make sure that everything is Fortran formatted (1-Based)
104:      ------------------------------------------------------------
105:   */
106:   for (i=old_i[0];i<old_i[n];i++) {
107:     old_j[i]++;
108:   }
109:   for(i=0;i<n+1;i++) {
110:     old_i[i]++;
111:   };
112: 

114:   if (reorder) {
115:     ISGetIndices(iscol,&c);
116:     ISGetIndices(isrow,&r);
117:     PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);
118:     for(i=0;i<n;i++) {
119:       cr[i]  = r[i]+1;
120:       cc[i]  = c[i]+1;
121:     }
122:     ISRestoreIndices(iscol,&c);
123:     ISRestoreIndices(isrow,&r);
124:     PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
125:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
126:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);
127:     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job);
128:     PetscFree2(cc,cr);
129:     o_a = old_a2;
130:     o_j = old_j2;
131:     o_i = old_i2;
132:   } else {
133:     o_a = old_a;
134:     o_j = old_j;
135:     o_i = old_i;
136:   }

138:   /* ------------------------------------------------------------
139:      Call Saad's ilutp() routine to generate the factorization
140:      ------------------------------------------------------------
141:   */

143:   PetscMalloc(2*n*sizeof(PetscInt),&iperm);
144:   PetscMalloc(2*n*sizeof(PetscInt),&jw);
145:   PetscMalloc(n*sizeof(PetscScalar),&w);

147:   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148:   if (sierr) {
149:     switch (sierr) {
150:       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax);
151:       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax);
152:       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153:       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154:       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax);
155:       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156:     }
157:   }

159:   PetscFree(w);
160:   PetscFree(jw);

162:   /* ------------------------------------------------------------
163:      Saad's routine gives the result in Modified Sparse Row (msr)
164:      Convert to Compressed Sparse Row format (csr) 
165:      ------------------------------------------------------------
166:   */

168:   PetscMalloc(n*sizeof(PetscScalar),&wk);
169:   PetscMalloc((n+1)*sizeof(PetscInt),&iwk);

171:   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);

173:   PetscFree(iwk);
174:   PetscFree(wk);

176:   if (reorder) {
177:     PetscFree(old_a2);
178:     PetscFree(old_j2);
179:     PetscFree(old_i2);
180:   } else {
181:     /* fix permutation of old_j that the factorization introduced */
182:     for (i=old_i[0]; i<old_i[n]; i++) {
183:       old_j[i-1] = iperm[old_j[i-1]-1];
184:     }
185:   }

187:   /* get rid of the shift to indices starting at 1 */
188:   for (i=0; i<n+1; i++) {
189:     old_i[i]--;
190:   }
191:   for (i=old_i[0];i<old_i[n];i++) {
192:     old_j[i]--;
193:   }
194: 
195:   /* Make the factored matrix 0-based */
196:   for (i=0; i<n+1; i++) {
197:     new_i[i]--;
198:   }
199:   for (i=new_i[0];i<new_i[n];i++) {
200:     new_j[i]--;
201:   }

203:   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204:   /*-- permute the right-hand-side and solution vectors           --*/
205:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
206:   ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
207:   ISGetIndices(isicol,&ic);
208:   for(i=0; i<n; i++) {
209:     ordcol[i] = ic[iperm[i]-1];
210:   };
211:   ISRestoreIndices(isicol,&ic);
212:   ISDestroy(isicol);

214:   PetscFree(iperm);

216:   ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
217:   PetscFree(ordcol);

219:   /*----- put together the new matrix -----*/

221:   MatCreate(((PetscObject)A)->comm,fact);
222:   MatSetSizes(*fact,n,n,n,n);
223:   MatSetType(*fact,((PetscObject)A)->type_name);
224:   MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
225:   (*fact)->factor    = MAT_FACTOR_LU;
226:   (*fact)->assembled = PETSC_TRUE;

228:   b = (Mat_SeqAIJ*)(*fact)->data;
229:   b->free_a        = PETSC_TRUE;
230:   b->free_ij       = PETSC_TRUE;
231:   b->singlemalloc  = PETSC_FALSE;
232:   b->a             = new_a;
233:   b->j             = new_j;
234:   b->i             = new_i;
235:   b->ilen          = 0;
236:   b->imax          = 0;
237:   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
238:   b->row           = isirow;
239:   b->col           = iscolf;
240:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
241:   b->maxnz = b->nz = new_i[n];
242:   MatMarkDiagonal_SeqAIJ(*fact);
243:   (*fact)->info.factor_mallocs = 0;

245:   af = ((double)b->nz)/((double)a->nz) + .001;
246:   PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);
247:   PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
248:   PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
249:   PetscInfo(A,"for best performance.\n");

251:   if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ;
252:   else         (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
253:   (*fact)->ops->solveadd           = MatSolveAdd_SeqAIJ;
254:   (*fact)->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
255:   (*fact)->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
256: 
257:   MatILUDTFactor_Inode(A,isrow,iscol,info,fact);

259:   return(0);
260: #endif
261: }

266: PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
267: {
269:   *flg = PETSC_TRUE;
270:   return(0);
271: }

277: PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
278: {
279:   PetscInt           n = A->rmap->n;
280:   PetscErrorCode     ierr;

283:   MatCreate(((PetscObject)A)->comm,B);
284:   MatSetSizes(*B,n,n,n,n);
285:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
286:     MatSetType(*B,MATSEQAIJ);
287:     if (ftype == MAT_FACTOR_ILU) {
288:       (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ;
289:     } else {
290:       (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
291:     }
292:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
293:     MatSetType(*B,MATSEQSBAIJ);
294:     MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
295:     if (ftype == MAT_FACTOR_ICC) {
296:       (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
297:     } else {
298:       (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
299:     }
300:   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
301:   (*B)->factor = ftype;
302:   return(0);
303: }

308: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
309: {
310:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
311:   IS                 isicol;
312:   PetscErrorCode     ierr;
313:   const PetscInt     *r,*ic;
314:   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
315:   PetscInt           *bi,*bj,*ajtmp;
316:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
317:   PetscReal          f;
318:   PetscInt           nlnk,*lnk,k,**bi_ptr;
319:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
320:   PetscBT            lnkbt;

323:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
324:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
325:   ISGetIndices(isrow,&r);
326:   ISGetIndices(isicol,&ic);

328:   /* get new row pointers */
329:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
330:   bi[0] = 0;

332:   /* bdiag is location of diagonal in factor */
333:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
334:   bdiag[0] = 0;

336:   /* linked list for storing column indices of the active row */
337:   nlnk = n + 1;
338:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

340:   PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);

342:   /* initial FreeSpace size is f*(ai[n]+1) */
343:   f = info->fill;
344:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
345:   current_space = free_space;

347:   for (i=0; i<n; i++) {
348:     /* copy previous fill into linked list */
349:     nzi = 0;
350:     nnz = ai[r[i]+1] - ai[r[i]];
351:     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
352:     ajtmp = aj + ai[r[i]];
353:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
354:     nzi += nlnk;

356:     /* add pivot rows into linked list */
357:     row = lnk[n];
358:     while (row < i) {
359:       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
360:       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
361:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
362:       nzi += nlnk;
363:       row  = lnk[row];
364:     }
365:     bi[i+1] = bi[i] + nzi;
366:     im[i]   = nzi;

368:     /* mark bdiag */
369:     nzbd = 0;
370:     nnz  = nzi;
371:     k    = lnk[n];
372:     while (nnz-- && k < i){
373:       nzbd++;
374:       k = lnk[k];
375:     }
376:     bdiag[i] = bi[i] + nzbd;

378:     /* if free space is not available, make more free space */
379:     if (current_space->local_remaining<nzi) {
380:       nnz = (n - i)*nzi; /* estimated and max additional space needed */
381:       PetscFreeSpaceGet(nnz,&current_space);
382:       reallocs++;
383:     }

385:     /* copy data into free space, then initialize lnk */
386:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
387:     bi_ptr[i] = current_space->array;
388:     current_space->array           += nzi;
389:     current_space->local_used      += nzi;
390:     current_space->local_remaining -= nzi;
391:   }
392: #if defined(PETSC_USE_INFO)
393:   if (ai[n] != 0) {
394:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
395:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
396:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
397:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
398:     PetscInfo(A,"for best performance.\n");
399:   } else {
400:     PetscInfo(A,"Empty matrix\n");
401:   }
402: #endif

404:   ISRestoreIndices(isrow,&r);
405:   ISRestoreIndices(isicol,&ic);

407:   /* destroy list of free space and other temporary array(s) */
408:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
409:   PetscFreeSpaceContiguous(&free_space,bj);
410:   PetscLLDestroy(lnk,lnkbt);
411:   PetscFree2(bi_ptr,im);

413:   /* put together the new matrix */
414:   MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
415:   PetscLogObjectParent(B,isicol);
416:   b    = (Mat_SeqAIJ*)(B)->data;
417:   b->free_a       = PETSC_TRUE;
418:   b->free_ij      = PETSC_TRUE;
419:   b->singlemalloc = PETSC_FALSE;
420:   PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
421:   b->j          = bj;
422:   b->i          = bi;
423:   b->diag       = bdiag;
424:   b->ilen       = 0;
425:   b->imax       = 0;
426:   b->row        = isrow;
427:   b->col        = iscol;
428:   PetscObjectReference((PetscObject)isrow);
429:   PetscObjectReference((PetscObject)iscol);
430:   b->icol       = isicol;
431:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);

433:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
434:   PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
435:   b->maxnz = b->nz = bi[n] ;

437:   (B)->factor                 =  MAT_FACTOR_LU;
438:   (B)->info.factor_mallocs    = reallocs;
439:   (B)->info.fill_ratio_given  = f;

441:   if (ai[n] != 0) {
442:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
443:   } else {
444:     (B)->info.fill_ratio_needed = 0.0;
445:   }
446:   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
447:   (B)->ops->solve            = MatSolve_SeqAIJ;
448:   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
449:   /* switch to inodes if appropriate */
450:   MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);
451:   return(0);
452: }

454: /*
455:     Trouble in factorization, should we dump the original matrix?
456: */
459: PetscErrorCode MatFactorDumpMatrix(Mat A)
460: {
462:   PetscTruth     flg;

465:   PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);
466:   if (flg) {
467:     PetscViewer viewer;
468:     char        filename[PETSC_MAX_PATH_LEN];

470:     PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
471:     PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);
472:     MatView(A,viewer);
473:     PetscViewerDestroy(viewer);
474:   }
475:   return(0);
476: }


480: /* ----------------------------------------------------------- */
483: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
484: {
485:   Mat            C=B;
486:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
487:   IS             isrow = b->row,isicol = b->icol;
489:   const PetscInt  *r,*ic,*ics;
490:   PetscInt       i,j,n=A->rmap->n,*bi=b->i,*bj=b->j;
491:   PetscInt       *ajtmp,*bjtmp,nz,row;
492:   PetscInt       *diag_offset = b->diag,diag,*pj;
493:   PetscScalar    *rtmp,*pc,multiplier,*rtmps;
494:   MatScalar      *v,*pv;
495:   PetscScalar    d;
496:   PetscReal      rs;
497:   LUShift_Ctx    sctx;
498:   PetscInt       newshift,*ddiag;

501:   ISGetIndices(isrow,&r);
502:   ISGetIndices(isicol,&ic);
503:   PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
504:   PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
505:   rtmps = rtmp; ics = ic;

507:   sctx.shift_top      = 0;
508:   sctx.nshift_max     = 0;
509:   sctx.shift_lo       = 0;
510:   sctx.shift_hi       = 0;
511:   sctx.shift_fraction = 0;

513:   /* if both shift schemes are chosen by user, only use info->shiftpd */
514:   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
515:     PetscInt *aai = a->i;
516:     ddiag          = a->diag;
517:     sctx.shift_top = 0;
518:     for (i=0; i<n; i++) {
519:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
520:       d  = (a->a)[ddiag[i]];
521:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
522:       v  = a->a+aai[i];
523:       nz = aai[i+1] - aai[i];
524:       for (j=0; j<nz; j++)
525:         rs += PetscAbsScalar(v[j]);
526:       if (rs>sctx.shift_top) sctx.shift_top = rs;
527:     }
528:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
529:     sctx.shift_top    *= 1.1;
530:     sctx.nshift_max   = 5;
531:     sctx.shift_lo     = 0.;
532:     sctx.shift_hi     = 1.;
533:   }

535:   sctx.shift_amount = 0;
536:   sctx.nshift       = 0;
537:   do {
538:     sctx.lushift = PETSC_FALSE;
539:     for (i=0; i<n; i++){
540:       nz    = bi[i+1] - bi[i];
541:       bjtmp = bj + bi[i];
542:       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;

544:       /* load in initial (unfactored row) */
545:       nz    = a->i[r[i]+1] - a->i[r[i]];
546:       ajtmp = a->j + a->i[r[i]];
547:       v     = a->a + a->i[r[i]];
548:       for (j=0; j<nz; j++) {
549:         rtmp[ics[ajtmp[j]]] = v[j];
550:       }
551:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

553:       row = *bjtmp++;
554:       while  (row < i) {
555:         pc = rtmp + row;
556:         if (*pc != 0.0) {
557:           pv         = b->a + diag_offset[row];
558:           pj         = b->j + diag_offset[row] + 1;
559:           multiplier = *pc / *pv++;
560:           *pc        = multiplier;
561:           nz         = bi[row+1] - diag_offset[row] - 1;
562:           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
563:           PetscLogFlops(2*nz);
564:         }
565:         row = *bjtmp++;
566:       }
567:       /* finished row so stick it into b->a */
568:       pv   = b->a + bi[i] ;
569:       pj   = b->j + bi[i] ;
570:       nz   = bi[i+1] - bi[i];
571:       diag = diag_offset[i] - bi[i];
572:       rs   = 0.0;
573:       for (j=0; j<nz; j++) {
574:         pv[j] = rtmps[pj[j]];
575:         if (j != diag) rs += PetscAbsScalar(pv[j]);
576:       }

578:       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
579:       sctx.rs  = rs;
580:       sctx.pv  = pv[diag];
581:       MatLUCheckShift_inline(info,sctx,i,newshift);
582:       if (newshift == 1) break;
583:     }

585:     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
586:       /*
587:        * if no shift in this attempt & shifting & started shifting & can refine,
588:        * then try lower shift
589:        */
590:       sctx.shift_hi        = sctx.shift_fraction;
591:       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
592:       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
593:       sctx.lushift         = PETSC_TRUE;
594:       sctx.nshift++;
595:     }
596:   } while (sctx.lushift);

598:   /* invert diagonal entries for simplier triangular solves */
599:   for (i=0; i<n; i++) {
600:     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
601:   }
602:   PetscFree(rtmp);
603:   ISRestoreIndices(isicol,&ic);
604:   ISRestoreIndices(isrow,&r);
605:   if (b->inode.use) {
606:     C->ops->solve   = MatSolve_Inode;
607:   } else {
608:     PetscTruth row_identity, col_identity;
609:     ISIdentity(isrow,&row_identity);
610:     ISIdentity(isicol,&col_identity);
611:     if (row_identity && col_identity) {
612:       C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
613:     } else {
614:       C->ops->solve   = MatSolve_SeqAIJ;
615:     }
616:   }
617:   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
618:   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
619:   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
620:   C->ops->matsolve           = MatMatSolve_SeqAIJ;
621:   C->assembled    = PETSC_TRUE;
622:   C->preallocated = PETSC_TRUE;
623:   PetscLogFlops(C->cmap->n);
624:   if (sctx.nshift){
625:      if (info->shiftpd) {
626:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
627:     } else if (info->shiftnz) {
628:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
629:     }
630:   }
631:   return(0);
632: }

634: /* 
635:    This routine implements inplace ILU(0) with row or/and column permutations. 
636:    Input: 
637:      A - original matrix
638:    Output;
639:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 
640:          a->j (col index) is permuted by the inverse of colperm, then sorted
641:          a->a reordered accordingly with a->j
642:          a->diag (ptr to diagonal elements) is updated.
643: */
646: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
647: {
648:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
649:   IS             isrow = a->row,isicol = a->icol;
651:   const PetscInt *r,*ic,*ics;
652:   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
653:   PetscInt       *ajtmp,nz,row;
654:   PetscInt       *diag = a->diag,nbdiag,*pj;
655:   PetscScalar    *rtmp,*pc,multiplier,d;
656:   MatScalar      *v,*pv;
657:   PetscReal      rs;
658:   LUShift_Ctx    sctx;
659:   PetscInt       newshift;

662:   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
663:   ISGetIndices(isrow,&r);
664:   ISGetIndices(isicol,&ic);
665:   PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
666:   PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
667:   ics = ic;

669:   sctx.shift_top      = 0;
670:   sctx.nshift_max     = 0;
671:   sctx.shift_lo       = 0;
672:   sctx.shift_hi       = 0;
673:   sctx.shift_fraction = 0;

675:   /* if both shift schemes are chosen by user, only use info->shiftpd */
676:   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
677:     sctx.shift_top = 0;
678:     for (i=0; i<n; i++) {
679:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
680:       d  = (a->a)[diag[i]];
681:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
682:       v  = a->a+ai[i];
683:       nz = ai[i+1] - ai[i];
684:       for (j=0; j<nz; j++)
685:         rs += PetscAbsScalar(v[j]);
686:       if (rs>sctx.shift_top) sctx.shift_top = rs;
687:     }
688:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
689:     sctx.shift_top    *= 1.1;
690:     sctx.nshift_max   = 5;
691:     sctx.shift_lo     = 0.;
692:     sctx.shift_hi     = 1.;
693:   }

695:   sctx.shift_amount = 0;
696:   sctx.nshift       = 0;
697:   do {
698:     sctx.lushift = PETSC_FALSE;
699:     for (i=0; i<n; i++){
700:       /* load in initial unfactored row */
701:       nz    = ai[r[i]+1] - ai[r[i]];
702:       ajtmp = aj + ai[r[i]];
703:       v     = a->a + ai[r[i]];
704:       /* sort permuted ajtmp and values v accordingly */
705:       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
706:       PetscSortIntWithScalarArray(nz,ajtmp,v);

708:       diag[r[i]] = ai[r[i]];
709:       for (j=0; j<nz; j++) {
710:         rtmp[ajtmp[j]] = v[j];
711:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
712:       }
713:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

715:       row = *ajtmp++;
716:       while  (row < i) {
717:         pc = rtmp + row;
718:         if (*pc != 0.0) {
719:           pv         = a->a + diag[r[row]];
720:           pj         = aj + diag[r[row]] + 1;

722:           multiplier = *pc / *pv++;
723:           *pc        = multiplier;
724:           nz         = ai[r[row]+1] - diag[r[row]] - 1;
725:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
726:           PetscLogFlops(2*nz);
727:         }
728:         row = *ajtmp++;
729:       }
730:       /* finished row so overwrite it onto a->a */
731:       pv   = a->a + ai[r[i]] ;
732:       pj   = aj + ai[r[i]] ;
733:       nz   = ai[r[i]+1] - ai[r[i]];
734:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
735: 
736:       rs   = 0.0;
737:       for (j=0; j<nz; j++) {
738:         pv[j] = rtmp[pj[j]];
739:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
740:       }

742:       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
743:       sctx.rs  = rs;
744:       sctx.pv  = pv[nbdiag];
745:       MatLUCheckShift_inline(info,sctx,i,newshift);
746:       if (newshift == 1) break;
747:     }

749:     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
750:       /*
751:        * if no shift in this attempt & shifting & started shifting & can refine,
752:        * then try lower shift
753:        */
754:       sctx.shift_hi        = sctx.shift_fraction;
755:       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
756:       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
757:       sctx.lushift         = PETSC_TRUE;
758:       sctx.nshift++;
759:     }
760:   } while (sctx.lushift);

762:   /* invert diagonal entries for simplier triangular solves */
763:   for (i=0; i<n; i++) {
764:     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
765:   }

767:   PetscFree(rtmp);
768:   ISRestoreIndices(isicol,&ic);
769:   ISRestoreIndices(isrow,&r);
770:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
771:   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
772:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
773:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
774:   A->assembled = PETSC_TRUE;
775:   A->preallocated = PETSC_TRUE;
776:   PetscLogFlops(A->cmap->n);
777:   if (sctx.nshift){
778:     if (info->shiftpd) {
779:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
780:     } else if (info->shiftnz) {
781:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
782:     }
783:   }
784:   return(0);
785: }

787: /* ----------------------------------------------------------- */
790: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
791: {
793:   Mat            C;

796:   MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);
797:   MatLUFactorSymbolic(C,A,row,col,info);
798:   MatLUFactorNumeric(C,A,info);
799:   A->ops->solve            = C->ops->solve;
800:   A->ops->solvetranspose   = C->ops->solvetranspose;
801:   MatHeaderCopy(A,C);
802:   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
803:   return(0);
804: }
805: /* ----------------------------------------------------------- */
808: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
809: {
810:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
811:   IS                iscol = a->col,isrow = a->row;
812:   PetscErrorCode    ierr;
813:   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
814:   PetscInt          nz;
815:   const PetscInt    *rout,*cout,*r,*c;
816:   PetscScalar       *x,*tmp,*tmps,sum;
817:   const PetscScalar *b;
818:   const MatScalar   *aa = a->a,*v;

821:   if (!n) return(0);

823:   VecGetArray(bb,(PetscScalar**)&b);
824:   VecGetArray(xx,&x);
825:   tmp  = a->solve_work;

827:   ISGetIndices(isrow,&rout); r = rout;
828:   ISGetIndices(iscol,&cout); c = cout + (n-1);

830:   /* forward solve the lower triangular */
831:   tmp[0] = b[*r++];
832:   tmps   = tmp;
833:   for (i=1; i<n; i++) {
834:     v   = aa + ai[i] ;
835:     vi  = aj + ai[i] ;
836:     nz  = a->diag[i] - ai[i];
837:     sum = b[*r++];
838:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
839:     tmp[i] = sum;
840:   }

842:   /* backward solve the upper triangular */
843:   for (i=n-1; i>=0; i--){
844:     v   = aa + a->diag[i] + 1;
845:     vi  = aj + a->diag[i] + 1;
846:     nz  = ai[i+1] - a->diag[i] - 1;
847:     sum = tmp[i];
848:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
849:     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
850:   }

852:   ISRestoreIndices(isrow,&rout);
853:   ISRestoreIndices(iscol,&cout);
854:   VecRestoreArray(bb,(PetscScalar**)&b);
855:   VecRestoreArray(xx,&x);
856:   PetscLogFlops(2*a->nz - A->cmap->n);
857:   return(0);
858: }

862: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
863: {
864:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
865:   IS              iscol = a->col,isrow = a->row;
866:   PetscErrorCode  ierr;
867:   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
868:   PetscInt        nz,neq;
869:   const PetscInt  *rout,*cout,*r,*c;
870:   PetscScalar     *x,*b,*tmp,*tmps,sum;
871:   const MatScalar *aa = a->a,*v;
872:   PetscTruth      bisdense,xisdense;

875:   if (!n) return(0);

877:   PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
878:   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
879:   PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
880:   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");

882:   MatGetArray(B,&b);
883:   MatGetArray(X,&x);
884: 
885:   tmp  = a->solve_work;
886:   ISGetIndices(isrow,&rout); r = rout;
887:   ISGetIndices(iscol,&cout); c = cout;

889:   for (neq=0; neq<B->cmap->n; neq++){
890:     /* forward solve the lower triangular */
891:     tmp[0] = b[r[0]];
892:     tmps   = tmp;
893:     for (i=1; i<n; i++) {
894:       v   = aa + ai[i] ;
895:       vi  = aj + ai[i] ;
896:       nz  = a->diag[i] - ai[i];
897:       sum = b[r[i]];
898:       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
899:       tmp[i] = sum;
900:     }
901:     /* backward solve the upper triangular */
902:     for (i=n-1; i>=0; i--){
903:       v   = aa + a->diag[i] + 1;
904:       vi  = aj + a->diag[i] + 1;
905:       nz  = ai[i+1] - a->diag[i] - 1;
906:       sum = tmp[i];
907:       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
908:       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
909:     }

911:     b += n;
912:     x += n;
913:   }
914:   ISRestoreIndices(isrow,&rout);
915:   ISRestoreIndices(iscol,&cout);
916:   MatRestoreArray(B,&b);
917:   MatRestoreArray(X,&x);
918:   PetscLogFlops(B->cmap->n*(2*a->nz - n));
919:   return(0);
920: }

924: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
925: {
926:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
927:   IS              iscol = a->col,isrow = a->row;
928:   PetscErrorCode  ierr;
929:   const PetscInt  *r,*c,*rout,*cout;
930:   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
931:   PetscInt        nz,row;
932:   PetscScalar     *x,*b,*tmp,*tmps,sum;
933:   const MatScalar *aa = a->a,*v;

936:   if (!n) return(0);

938:   VecGetArray(bb,&b);
939:   VecGetArray(xx,&x);
940:   tmp  = a->solve_work;

942:   ISGetIndices(isrow,&rout); r = rout;
943:   ISGetIndices(iscol,&cout); c = cout + (n-1);

945:   /* forward solve the lower triangular */
946:   tmp[0] = b[*r++];
947:   tmps   = tmp;
948:   for (row=1; row<n; row++) {
949:     i   = rout[row]; /* permuted row */
950:     v   = aa + ai[i] ;
951:     vi  = aj + ai[i] ;
952:     nz  = a->diag[i] - ai[i];
953:     sum = b[*r++];
954:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
955:     tmp[row] = sum;
956:   }

958:   /* backward solve the upper triangular */
959:   for (row=n-1; row>=0; row--){
960:     i   = rout[row]; /* permuted row */
961:     v   = aa + a->diag[i] + 1;
962:     vi  = aj + a->diag[i] + 1;
963:     nz  = ai[i+1] - a->diag[i] - 1;
964:     sum = tmp[row];
965:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
966:     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
967:   }

969:   ISRestoreIndices(isrow,&rout);
970:   ISRestoreIndices(iscol,&cout);
971:   VecRestoreArray(bb,&b);
972:   VecRestoreArray(xx,&x);
973:   PetscLogFlops(2*a->nz - A->cmap->n);
974:   return(0);
975: }

977: /* ----------------------------------------------------------- */
980: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
981: {
982:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
983:   PetscErrorCode    ierr;
984:   PetscInt          n = A->rmap->n;
985:   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
986:   PetscScalar       *x;
987:   const PetscScalar *b;
988:   const MatScalar   *aa = a->a;
989: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
990:   PetscInt          adiag_i,i,nz,ai_i;
991:   const MatScalar   *v;
992:   PetscScalar       sum;
993: #endif

996:   if (!n) return(0);

998:   VecGetArray(bb,(PetscScalar**)&b);
999:   VecGetArray(xx,&x);

1001: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1002:   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1003: #else
1004:   /* forward solve the lower triangular */
1005:   x[0] = b[0];
1006:   for (i=1; i<n; i++) {
1007:     ai_i = ai[i];
1008:     v    = aa + ai_i;
1009:     vi   = aj + ai_i;
1010:     nz   = adiag[i] - ai_i;
1011:     sum  = b[i];
1012:     while (nz--) sum -= *v++ * x[*vi++];
1013:     x[i] = sum;
1014:   }

1016:   /* backward solve the upper triangular */
1017:   for (i=n-1; i>=0; i--){
1018:     adiag_i = adiag[i];
1019:     v       = aa + adiag_i + 1;
1020:     vi      = aj + adiag_i + 1;
1021:     nz      = ai[i+1] - adiag_i - 1;
1022:     sum     = x[i];
1023:     while (nz--) sum -= *v++ * x[*vi++];
1024:     x[i]    = sum*aa[adiag_i];
1025:   }
1026: #endif
1027:   PetscLogFlops(2*a->nz - A->cmap->n);
1028:   VecRestoreArray(bb,(PetscScalar**)&b);
1029:   VecRestoreArray(xx,&x);
1030:   return(0);
1031: }

1035: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1036: {
1037:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1038:   IS              iscol = a->col,isrow = a->row;
1039:   PetscErrorCode  ierr;
1040:   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1041:   PetscInt        nz;
1042:   const PetscInt  *rout,*cout,*r,*c;
1043:   PetscScalar     *x,*b,*tmp,sum;
1044:   const MatScalar *aa = a->a,*v;

1047:   if (yy != xx) {VecCopy(yy,xx);}

1049:   VecGetArray(bb,&b);
1050:   VecGetArray(xx,&x);
1051:   tmp  = a->solve_work;

1053:   ISGetIndices(isrow,&rout); r = rout;
1054:   ISGetIndices(iscol,&cout); c = cout + (n-1);

1056:   /* forward solve the lower triangular */
1057:   tmp[0] = b[*r++];
1058:   for (i=1; i<n; i++) {
1059:     v   = aa + ai[i] ;
1060:     vi  = aj + ai[i] ;
1061:     nz  = a->diag[i] - ai[i];
1062:     sum = b[*r++];
1063:     while (nz--) sum -= *v++ * tmp[*vi++ ];
1064:     tmp[i] = sum;
1065:   }

1067:   /* backward solve the upper triangular */
1068:   for (i=n-1; i>=0; i--){
1069:     v   = aa + a->diag[i] + 1;
1070:     vi  = aj + a->diag[i] + 1;
1071:     nz  = ai[i+1] - a->diag[i] - 1;
1072:     sum = tmp[i];
1073:     while (nz--) sum -= *v++ * tmp[*vi++ ];
1074:     tmp[i] = sum*aa[a->diag[i]];
1075:     x[*c--] += tmp[i];
1076:   }

1078:   ISRestoreIndices(isrow,&rout);
1079:   ISRestoreIndices(iscol,&cout);
1080:   VecRestoreArray(bb,&b);
1081:   VecRestoreArray(xx,&x);
1082:   PetscLogFlops(2*a->nz);

1084:   return(0);
1085: }
1086: /* -------------------------------------------------------------------*/
1089: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1090: {
1091:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1092:   IS              iscol = a->col,isrow = a->row;
1093:   PetscErrorCode  ierr;
1094:   const PetscInt  *rout,*cout,*r,*c;
1095:   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1096:   PetscInt        nz,*diag = a->diag;
1097:   PetscScalar     *x,*b,*tmp,s1;
1098:   const MatScalar *aa = a->a,*v;

1101:   VecGetArray(bb,&b);
1102:   VecGetArray(xx,&x);
1103:   tmp  = a->solve_work;

1105:   ISGetIndices(isrow,&rout); r = rout;
1106:   ISGetIndices(iscol,&cout); c = cout;

1108:   /* copy the b into temp work space according to permutation */
1109:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1111:   /* forward solve the U^T */
1112:   for (i=0; i<n; i++) {
1113:     v   = aa + diag[i] ;
1114:     vi  = aj + diag[i] + 1;
1115:     nz  = ai[i+1] - diag[i] - 1;
1116:     s1  = tmp[i];
1117:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1118:     while (nz--) {
1119:       tmp[*vi++ ] -= (*v++)*s1;
1120:     }
1121:     tmp[i] = s1;
1122:   }

1124:   /* backward solve the L^T */
1125:   for (i=n-1; i>=0; i--){
1126:     v   = aa + diag[i] - 1 ;
1127:     vi  = aj + diag[i] - 1 ;
1128:     nz  = diag[i] - ai[i];
1129:     s1  = tmp[i];
1130:     while (nz--) {
1131:       tmp[*vi-- ] -= (*v--)*s1;
1132:     }
1133:   }

1135:   /* copy tmp into x according to permutation */
1136:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

1138:   ISRestoreIndices(isrow,&rout);
1139:   ISRestoreIndices(iscol,&cout);
1140:   VecRestoreArray(bb,&b);
1141:   VecRestoreArray(xx,&x);

1143:   PetscLogFlops(2*a->nz-A->cmap->n);
1144:   return(0);
1145: }

1149: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1150: {
1151:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1152:   IS              iscol = a->col,isrow = a->row;
1153:   PetscErrorCode  ierr;
1154:   const PetscInt  *r,*c,*rout,*cout;
1155:   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1156:   PetscInt        nz,*diag = a->diag;
1157:   PetscScalar     *x,*b,*tmp;
1158:   const MatScalar *aa = a->a,*v;

1161:   if (zz != xx) {VecCopy(zz,xx);}

1163:   VecGetArray(bb,&b);
1164:   VecGetArray(xx,&x);
1165:   tmp = a->solve_work;

1167:   ISGetIndices(isrow,&rout); r = rout;
1168:   ISGetIndices(iscol,&cout); c = cout;

1170:   /* copy the b into temp work space according to permutation */
1171:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1173:   /* forward solve the U^T */
1174:   for (i=0; i<n; i++) {
1175:     v   = aa + diag[i] ;
1176:     vi  = aj + diag[i] + 1;
1177:     nz  = ai[i+1] - diag[i] - 1;
1178:     tmp[i] *= *v++;
1179:     while (nz--) {
1180:       tmp[*vi++ ] -= (*v++)*tmp[i];
1181:     }
1182:   }

1184:   /* backward solve the L^T */
1185:   for (i=n-1; i>=0; i--){
1186:     v   = aa + diag[i] - 1 ;
1187:     vi  = aj + diag[i] - 1 ;
1188:     nz  = diag[i] - ai[i];
1189:     while (nz--) {
1190:       tmp[*vi-- ] -= (*v--)*tmp[i];
1191:     }
1192:   }

1194:   /* copy tmp into x according to permutation */
1195:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

1197:   ISRestoreIndices(isrow,&rout);
1198:   ISRestoreIndices(iscol,&cout);
1199:   VecRestoreArray(bb,&b);
1200:   VecRestoreArray(xx,&x);

1202:   PetscLogFlops(2*a->nz);
1203:   return(0);
1204: }
1205: /* ----------------------------------------------------------------*/
1206: EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1207: EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);

1211: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1212: {
1213:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1214:   IS                 isicol;
1215:   PetscErrorCode     ierr;
1216:   const PetscInt     *r,*ic;
1217:   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1218:   PetscInt           *bi,*cols,nnz,*cols_lvl;
1219:   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1220:   PetscInt           i,levels,diagonal_fill;
1221:   PetscTruth         col_identity,row_identity;
1222:   PetscReal          f;
1223:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1224:   PetscBT            lnkbt;
1225:   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1226:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1227:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1228:   PetscTruth         missing;

1231:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1232:   f             = info->fill;
1233:   levels        = (PetscInt)info->levels;
1234:   diagonal_fill = (PetscInt)info->diagonal_fill;
1235:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

1237:   /* special case that simply copies fill pattern */
1238:   ISIdentity(isrow,&row_identity);
1239:   ISIdentity(iscol,&col_identity);
1240:   if (!levels && row_identity && col_identity) {
1241:     MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);
1242:     fact->factor = MAT_FACTOR_ILU;
1243:     (fact)->info.factor_mallocs    = 0;
1244:     (fact)->info.fill_ratio_given  = info->fill;
1245:     (fact)->info.fill_ratio_needed = 1.0;
1246:     b               = (Mat_SeqAIJ*)(fact)->data;
1247:     MatMissingDiagonal(A,&missing,&d);
1248:     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1249:     b->row              = isrow;
1250:     b->col              = iscol;
1251:     b->icol             = isicol;
1252:     PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1253:     PetscObjectReference((PetscObject)isrow);
1254:     PetscObjectReference((PetscObject)iscol);
1255:     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1256:     MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);
1257:     return(0);
1258:   }

1260:   ISGetIndices(isrow,&r);
1261:   ISGetIndices(isicol,&ic);

1263:   /* get new row pointers */
1264:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1265:   bi[0] = 0;
1266:   /* bdiag is location of diagonal in factor */
1267:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1268:   bdiag[0]  = 0;

1270:   PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
1271:   bjlvl_ptr = (PetscInt**)(bj_ptr + n);

1273:   /* create a linked list for storing column indices of the active row */
1274:   nlnk = n + 1;
1275:   PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);

1277:   /* initial FreeSpace size is f*(ai[n]+1) */
1278:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1279:   current_space = free_space;
1280:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1281:   current_space_lvl = free_space_lvl;
1282: 
1283:   for (i=0; i<n; i++) {
1284:     nzi = 0;
1285:     /* copy current row into linked list */
1286:     nnz  = ai[r[i]+1] - ai[r[i]];
1287:     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1288:     cols = aj + ai[r[i]];
1289:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1290:     PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1291:     nzi += nlnk;

1293:     /* make sure diagonal entry is included */
1294:     if (diagonal_fill && lnk[i] == -1) {
1295:       fm = n;
1296:       while (lnk[fm] < i) fm = lnk[fm];
1297:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1298:       lnk[fm]    = i;
1299:       lnk_lvl[i] = 0;
1300:       nzi++; dcount++;
1301:     }

1303:     /* add pivot rows into the active row */
1304:     nzbd = 0;
1305:     prow = lnk[n];
1306:     while (prow < i) {
1307:       nnz      = bdiag[prow];
1308:       cols     = bj_ptr[prow] + nnz + 1;
1309:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1310:       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1311:       PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1312:       nzi += nlnk;
1313:       prow = lnk[prow];
1314:       nzbd++;
1315:     }
1316:     bdiag[i] = nzbd;
1317:     bi[i+1]  = bi[i] + nzi;

1319:     /* if free space is not available, make more free space */
1320:     if (current_space->local_remaining<nzi) {
1321:       nnz = nzi*(n - i); /* estimated and max additional space needed */
1322:       PetscFreeSpaceGet(nnz,&current_space);
1323:       PetscFreeSpaceGet(nnz,&current_space_lvl);
1324:       reallocs++;
1325:     }

1327:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1328:     PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1329:     bj_ptr[i]    = current_space->array;
1330:     bjlvl_ptr[i] = current_space_lvl->array;

1332:     /* make sure the active row i has diagonal entry */
1333:     if (*(bj_ptr[i]+bdiag[i]) != i) {
1334:       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1335:     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1336:     }

1338:     current_space->array           += nzi;
1339:     current_space->local_used      += nzi;
1340:     current_space->local_remaining -= nzi;
1341:     current_space_lvl->array           += nzi;
1342:     current_space_lvl->local_used      += nzi;
1343:     current_space_lvl->local_remaining -= nzi;
1344:   }

1346:   ISRestoreIndices(isrow,&r);
1347:   ISRestoreIndices(isicol,&ic);

1349:   /* destroy list of free space and other temporary arrays */
1350:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1351:   PetscFreeSpaceContiguous(&free_space,bj);
1352:   PetscIncompleteLLDestroy(lnk,lnkbt);
1353:   PetscFreeSpaceDestroy(free_space_lvl);
1354:   PetscFree(bj_ptr);

1356: #if defined(PETSC_USE_INFO)
1357:   {
1358:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1359:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1360:     PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1361:     PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1362:     PetscInfo(A,"for best performance.\n");
1363:     if (diagonal_fill) {
1364:       PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1365:     }
1366:   }
1367: #endif

1369:   /* put together the new matrix */
1370:   MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1371:   PetscLogObjectParent(fact,isicol);
1372:   b = (Mat_SeqAIJ*)(fact)->data;
1373:   b->free_a       = PETSC_TRUE;
1374:   b->free_ij      = PETSC_TRUE;
1375:   b->singlemalloc = PETSC_FALSE;
1376:   len = (bi[n] )*sizeof(PetscScalar);
1377:   PetscMalloc(len+1,&b->a);
1378:   b->j          = bj;
1379:   b->i          = bi;
1380:   for (i=0; i<n; i++) bdiag[i] += bi[i];
1381:   b->diag       = bdiag;
1382:   b->ilen       = 0;
1383:   b->imax       = 0;
1384:   b->row        = isrow;
1385:   b->col        = iscol;
1386:   PetscObjectReference((PetscObject)isrow);
1387:   PetscObjectReference((PetscObject)iscol);
1388:   b->icol       = isicol;
1389:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1390:   /* In b structure:  Free imax, ilen, old a, old j.  
1391:      Allocate bdiag, solve_work, new a, new j */
1392:   PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1393:   b->maxnz             = b->nz = bi[n] ;
1394:   (fact)->info.factor_mallocs    = reallocs;
1395:   (fact)->info.fill_ratio_given  = f;
1396:   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1397:   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1398:   MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);
1399:   return(0);
1400: }

1402:  #include ../src/mat/impls/sbaij/seq/sbaij.h
1405: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1406: {
1407:   Mat            C = B;
1408:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1409:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1410:   IS             ip=b->row,iip = b->icol;
1412:   const PetscInt *rip,*riip;
1413:   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1414:   PetscInt       *ai=a->i,*aj=a->j;
1415:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1416:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1417:   PetscReal      zeropivot,rs,shiftnz;
1418:   PetscReal      shiftpd;
1419:   ChShift_Ctx    sctx;
1420:   PetscInt       newshift;
1421:   PetscTruth     perm_identity;


1425:   shiftnz   = info->shiftnz;
1426:   shiftpd   = info->shiftpd;
1427:   zeropivot = info->zeropivot;

1429:   ISGetIndices(ip,&rip);
1430:   ISGetIndices(iip,&riip);
1431: 
1432:   /* initialization */
1433:   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1434:   PetscMalloc(nz,&il);
1435:   jl   = il + mbs;
1436:   rtmp = (MatScalar*)(jl + mbs);

1438:   sctx.shift_amount = 0;
1439:   sctx.nshift       = 0;
1440:   do {
1441:     sctx.chshift = PETSC_FALSE;
1442:     for (i=0; i<mbs; i++) {
1443:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1444:     }
1445: 
1446:     for (k = 0; k<mbs; k++){
1447:       bval = ba + bi[k];
1448:       /* initialize k-th row by the perm[k]-th row of A */
1449:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1450:       for (j = jmin; j < jmax; j++){
1451:         col = riip[aj[j]];
1452:         if (col >= k){ /* only take upper triangular entry */
1453:           rtmp[col] = aa[j];
1454:           *bval++  = 0.0; /* for in-place factorization */
1455:         }
1456:       }
1457:       /* shift the diagonal of the matrix */
1458:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1460:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1461:       dk = rtmp[k];
1462:       i = jl[k]; /* first row to be added to k_th row  */

1464:       while (i < k){
1465:         nexti = jl[i]; /* next row to be added to k_th row */

1467:         /* compute multiplier, update diag(k) and U(i,k) */
1468:         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1469:         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1470:         dk += uikdi*ba[ili];
1471:         ba[ili] = uikdi; /* -U(i,k) */

1473:         /* add multiple of row i to k-th row */
1474:         jmin = ili + 1; jmax = bi[i+1];
1475:         if (jmin < jmax){
1476:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1477:           /* update il and jl for row i */
1478:           il[i] = jmin;
1479:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1480:         }
1481:         i = nexti;
1482:       }

1484:       /* shift the diagonals when zero pivot is detected */
1485:       /* compute rs=sum of abs(off-diagonal) */
1486:       rs   = 0.0;
1487:       jmin = bi[k]+1;
1488:       nz   = bi[k+1] - jmin;
1489:       bcol = bj + jmin;
1490:       while (nz--){
1491:         rs += PetscAbsScalar(rtmp[*bcol]);
1492:         bcol++;
1493:       }

1495:       sctx.rs = rs;
1496:       sctx.pv = dk;
1497:       MatCholeskyCheckShift_inline(info,sctx,k,newshift);

1499:       if (newshift == 1) {
1500:         if (!sctx.shift_amount) {
1501:           sctx.shift_amount = 1e-5;
1502:         }
1503:         break;
1504:       }
1505: 
1506:       /* copy data into U(k,:) */
1507:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1508:       jmin = bi[k]+1; jmax = bi[k+1];
1509:       if (jmin < jmax) {
1510:         for (j=jmin; j<jmax; j++){
1511:           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1512:         }
1513:         /* add the k-th row into il and jl */
1514:         il[k] = jmin;
1515:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1516:       }
1517:     }
1518:   } while (sctx.chshift);
1519:   PetscFree(il);

1521:   ISRestoreIndices(ip,&rip);
1522:   ISRestoreIndices(iip,&riip);

1524:   ISIdentity(ip,&perm_identity);
1525:   if (perm_identity){
1526:     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1527:     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1528:     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1529:     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1530:   } else {
1531:     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1532:     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1533:     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1534:     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1535:   }

1537:   C->assembled    = PETSC_TRUE;
1538:   C->preallocated = PETSC_TRUE;
1539:   PetscLogFlops(C->rmap->n);
1540:   if (sctx.nshift){
1541:     if (shiftnz) {
1542:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1543:     } else if (shiftpd) {
1544:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1545:     }
1546:   }
1547:   return(0);
1548: }

1552: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1553: {
1554:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1555:   Mat_SeqSBAIJ       *b;
1556:   PetscErrorCode     ierr;
1557:   PetscTruth         perm_identity,missing;
1558:   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1559:   const PetscInt     *rip,*riip;
1560:   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1561:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1562:   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1563:   PetscReal          fill=info->fill,levels=info->levels;
1564:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1565:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1566:   PetscBT            lnkbt;
1567:   IS                 iperm;
1568: 
1570:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1571:   MatMissingDiagonal(A,&missing,&d);
1572:   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1573:   ISIdentity(perm,&perm_identity);
1574:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);

1576:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1577:   ui[0] = 0;

1579:   /* ICC(0) without matrix ordering: simply copies fill pattern */
1580:   if (!levels && perm_identity) {

1582:     for (i=0; i<am; i++) {
1583:       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1584:     }
1585:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1586:     cols = uj;
1587:     for (i=0; i<am; i++) {
1588:       aj    = a->j + a->diag[i];
1589:       ncols = ui[i+1] - ui[i];
1590:       for (j=0; j<ncols; j++) *cols++ = *aj++;
1591:     }
1592:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1593:     ISGetIndices(iperm,&riip);
1594:     ISGetIndices(perm,&rip);

1596:     /* initialization */
1597:     PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);

1599:     /* jl: linked list for storing indices of the pivot rows 
1600:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1601:     PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1602:     il         = jl + am;
1603:     uj_ptr     = (PetscInt**)(il + am);
1604:     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1605:     for (i=0; i<am; i++){
1606:       jl[i] = am; il[i] = 0;
1607:     }

1609:     /* create and initialize a linked list for storing column indices of the active row k */
1610:     nlnk = am + 1;
1611:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

1613:     /* initial FreeSpace size is fill*(ai[am]+1) */
1614:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1615:     current_space = free_space;
1616:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1617:     current_space_lvl = free_space_lvl;

1619:     for (k=0; k<am; k++){  /* for each active row k */
1620:       /* initialize lnk by the column indices of row rip[k] of A */
1621:       nzk   = 0;
1622:       ncols = ai[rip[k]+1] - ai[rip[k]];
1623:       if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1624:       ncols_upper = 0;
1625:       for (j=0; j<ncols; j++){
1626:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1627:         if (riip[i] >= k){ /* only take upper triangular entry */
1628:           ajtmp[ncols_upper] = i;
1629:           ncols_upper++;
1630:         }
1631:       }
1632:       PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
1633:       nzk += nlnk;

1635:       /* update lnk by computing fill-in for each pivot row to be merged in */
1636:       prow = jl[k]; /* 1st pivot row */
1637: 
1638:       while (prow < k){
1639:         nextprow = jl[prow];
1640: 
1641:         /* merge prow into k-th row */
1642:         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1643:         jmax = ui[prow+1];
1644:         ncols = jmax-jmin;
1645:         i     = jmin - ui[prow];
1646:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1647:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1648:         j     = *(uj - 1);
1649:         PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1650:         nzk += nlnk;

1652:         /* update il and jl for prow */
1653:         if (jmin < jmax){
1654:           il[prow] = jmin;
1655:           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1656:         }
1657:         prow = nextprow;
1658:       }

1660:       /* if free space is not available, make more free space */
1661:       if (current_space->local_remaining<nzk) {
1662:         i = am - k + 1; /* num of unfactored rows */
1663:         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1664:         PetscFreeSpaceGet(i,&current_space);
1665:         PetscFreeSpaceGet(i,&current_space_lvl);
1666:         reallocs++;
1667:       }

1669:       /* copy data into free_space and free_space_lvl, then initialize lnk */
1670:       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1671:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

1673:       /* add the k-th row into il and jl */
1674:       if (nzk > 1){
1675:         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1676:         jl[k] = jl[i]; jl[i] = k;
1677:         il[k] = ui[k] + 1;
1678:       }
1679:       uj_ptr[k]     = current_space->array;
1680:       uj_lvl_ptr[k] = current_space_lvl->array;

1682:       current_space->array           += nzk;
1683:       current_space->local_used      += nzk;
1684:       current_space->local_remaining -= nzk;

1686:       current_space_lvl->array           += nzk;
1687:       current_space_lvl->local_used      += nzk;
1688:       current_space_lvl->local_remaining -= nzk;

1690:       ui[k+1] = ui[k] + nzk;
1691:     }

1693: #if defined(PETSC_USE_INFO)
1694:     if (ai[am] != 0) {
1695:       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1696:       PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1697:       PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1698:       PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1699:     } else {
1700:       PetscInfo(A,"Empty matrix.\n");
1701:     }
1702: #endif

1704:     ISRestoreIndices(perm,&rip);
1705:     ISRestoreIndices(iperm,&riip);
1706:     PetscFree(jl);
1707:     PetscFree(ajtmp);

1709:     /* destroy list of free space and other temporary array(s) */
1710:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1711:     PetscFreeSpaceContiguous(&free_space,uj);
1712:     PetscIncompleteLLDestroy(lnk,lnkbt);
1713:     PetscFreeSpaceDestroy(free_space_lvl);

1715:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

1717:   /* put together the new matrix in MATSEQSBAIJ format */

1719:   b    = (Mat_SeqSBAIJ*)(fact)->data;
1720:   b->singlemalloc = PETSC_FALSE;
1721:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1722:   b->j    = uj;
1723:   b->i    = ui;
1724:   b->diag = 0;
1725:   b->ilen = 0;
1726:   b->imax = 0;
1727:   b->row  = perm;
1728:   b->col  = perm;
1729:   PetscObjectReference((PetscObject)perm);
1730:   PetscObjectReference((PetscObject)perm);
1731:   b->icol = iperm;
1732:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1733:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1734:   PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1735:   b->maxnz   = b->nz = ui[am];
1736:   b->free_a  = PETSC_TRUE;
1737:   b->free_ij = PETSC_TRUE;
1738: 
1739:   (fact)->info.factor_mallocs    = reallocs;
1740:   (fact)->info.fill_ratio_given  = fill;
1741:   if (ai[am] != 0) {
1742:     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1743:   } else {
1744:     (fact)->info.fill_ratio_needed = 0.0;
1745:   }
1746:   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1747:   return(0);
1748: }

1752: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1753: {
1754:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1755:   Mat_SeqSBAIJ       *b;
1756:   PetscErrorCode     ierr;
1757:   PetscTruth         perm_identity;
1758:   PetscReal          fill = info->fill;
1759:   const PetscInt     *rip,*riip;
1760:   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1761:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1762:   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1763:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1764:   PetscBT            lnkbt;
1765:   IS                 iperm;

1768:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1769:   /* check whether perm is the identity mapping */
1770:   ISIdentity(perm,&perm_identity);
1771:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1772:   ISGetIndices(iperm,&riip);
1773:   ISGetIndices(perm,&rip);

1775:   /* initialization */
1776:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1777:   ui[0] = 0;

1779:   /* jl: linked list for storing indices of the pivot rows 
1780:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1781:   PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1782:   il     = jl + am;
1783:   cols   = il + am;
1784:   ui_ptr = (PetscInt**)(cols + am);
1785:   for (i=0; i<am; i++){
1786:     jl[i] = am; il[i] = 0;
1787:   }

1789:   /* create and initialize a linked list for storing column indices of the active row k */
1790:   nlnk = am + 1;
1791:   PetscLLCreate(am,am,nlnk,lnk,lnkbt);

1793:   /* initial FreeSpace size is fill*(ai[am]+1) */
1794:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1795:   current_space = free_space;

1797:   for (k=0; k<am; k++){  /* for each active row k */
1798:     /* initialize lnk by the column indices of row rip[k] of A */
1799:     nzk   = 0;
1800:     ncols = ai[rip[k]+1] - ai[rip[k]];
1801:     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1802:     ncols_upper = 0;
1803:     for (j=0; j<ncols; j++){
1804:       i = riip[*(aj + ai[rip[k]] + j)];
1805:       if (i >= k){ /* only take upper triangular entry */
1806:         cols[ncols_upper] = i;
1807:         ncols_upper++;
1808:       }
1809:     }
1810:     PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1811:     nzk += nlnk;

1813:     /* update lnk by computing fill-in for each pivot row to be merged in */
1814:     prow = jl[k]; /* 1st pivot row */
1815: 
1816:     while (prow < k){
1817:       nextprow = jl[prow];
1818:       /* merge prow into k-th row */
1819:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1820:       jmax = ui[prow+1];
1821:       ncols = jmax-jmin;
1822:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1823:       PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1824:       nzk += nlnk;

1826:       /* update il and jl for prow */
1827:       if (jmin < jmax){
1828:         il[prow] = jmin;
1829:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1830:       }
1831:       prow = nextprow;
1832:     }

1834:     /* if free space is not available, make more free space */
1835:     if (current_space->local_remaining<nzk) {
1836:       i = am - k + 1; /* num of unfactored rows */
1837:       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1838:       PetscFreeSpaceGet(i,&current_space);
1839:       reallocs++;
1840:     }

1842:     /* copy data into free space, then initialize lnk */
1843:     PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);

1845:     /* add the k-th row into il and jl */
1846:     if (nzk-1 > 0){
1847:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1848:       jl[k] = jl[i]; jl[i] = k;
1849:       il[k] = ui[k] + 1;
1850:     }
1851:     ui_ptr[k] = current_space->array;
1852:     current_space->array           += nzk;
1853:     current_space->local_used      += nzk;
1854:     current_space->local_remaining -= nzk;

1856:     ui[k+1] = ui[k] + nzk;
1857:   }

1859: #if defined(PETSC_USE_INFO)
1860:   if (ai[am] != 0) {
1861:     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1862:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1863:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1864:     PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1865:   } else {
1866:      PetscInfo(A,"Empty matrix.\n");
1867:   }
1868: #endif

1870:   ISRestoreIndices(perm,&rip);
1871:   ISRestoreIndices(iperm,&riip);
1872:   PetscFree(jl);

1874:   /* destroy list of free space and other temporary array(s) */
1875:   PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1876:   PetscFreeSpaceContiguous(&free_space,uj);
1877:   PetscLLDestroy(lnk,lnkbt);

1879:   /* put together the new matrix in MATSEQSBAIJ format */

1881:   b = (Mat_SeqSBAIJ*)(fact)->data;
1882:   b->singlemalloc = PETSC_FALSE;
1883:   b->free_a       = PETSC_TRUE;
1884:   b->free_ij      = PETSC_TRUE;
1885:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1886:   b->j    = uj;
1887:   b->i    = ui;
1888:   b->diag = 0;
1889:   b->ilen = 0;
1890:   b->imax = 0;
1891:   b->row  = perm;
1892:   b->col  = perm;
1893:   PetscObjectReference((PetscObject)perm);
1894:   PetscObjectReference((PetscObject)perm);
1895:   b->icol = iperm;
1896:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1897:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1898:   PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1899:   b->maxnz = b->nz = ui[am];
1900: 
1901:   (fact)->info.factor_mallocs    = reallocs;
1902:   (fact)->info.fill_ratio_given  = fill;
1903:   if (ai[am] != 0) {
1904:     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1905:   } else {
1906:     (fact)->info.fill_ratio_needed = 0.0;
1907:   }
1908:   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1909:   return(0);
1910: }