Actual source code: aijfact.c
1: #define PETSCMAT_DLL
3: #include ../src/mat/impls/aij/seq/aij.h
4: #include ../src/inline/dot.h
5: #include ../src/inline/spops.h
6: #include petscbt.h
7: #include ../src/mat/utils/freespace.h
11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12: {
15: SETERRQ(PETSC_ERR_SUP,"Code not written");
16: #if !defined(PETSC_USE_DEBUG)
17: return(0);
18: #endif
19: }
22: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
26: #endif
30: /* ------------------------------------------------------------
32: This interface was contribed by Tony Caola
34: This routine is an interface to the pivoting drop-tolerance
35: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36: SPARSEKIT2.
38: The SPARSEKIT2 routines used here are covered by the GNU
39: copyright; see the file gnu in this directory.
41: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42: help in getting this routine ironed out.
44: The major drawback to this routine is that if info->fill is
45: not large enough it fails rather than allocating more space;
46: this can be fixed by hacking/improving the f2c version of
47: Yousef Saad's code.
49: ------------------------------------------------------------
50: */
51: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
52: {
53: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
55: SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56: You can obtain the drop tolerance routines by installing PETSc from\n\
57: www.mcs.anl.gov/petsc\n");
58: #else
59: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
60: IS iscolf,isicol,isirow;
61: PetscTruth reorder;
62: PetscErrorCode ierr,sierr;
63: const PetscInt *c,*r,*ic;
64: PetscInt i,n = A->rmap->n,*cc,*cr;
65: PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
66: PetscInt *ordcol,*iwk,*iperm,*jw;
67: PetscInt jmax,lfill,job,*o_i,*o_j;
68: MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
69: PetscReal af,dt,dtcount,dtcol,fill;
73: if (info->dt == PETSC_DEFAULT) dt = .005; else dt = info->dt;
74: if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); else dtcount = info->dtcount;
75: if (info->dtcol == PETSC_DEFAULT) dtcol = .01; else dtcol = info->dtcol;
76: if (info->fill == PETSC_DEFAULT) fill = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill;
77: lfill = (PetscInt)(dtcount/2.0);
78: jmax = (PetscInt)(fill*a->nz);
81: /* ------------------------------------------------------------
82: If reorder=.TRUE., then the original matrix has to be
83: reordered to reflect the user selected ordering scheme, and
84: then de-reordered so it is in it's original format.
85: Because Saad's dperm() is NOT in place, we have to copy
86: the original matrix and allocate more storage. . .
87: ------------------------------------------------------------
88: */
90: /* set reorder to true if either isrow or iscol is not identity */
91: ISIdentity(isrow,&reorder);
92: if (reorder) {ISIdentity(iscol,&reorder);}
93: reorder = PetscNot(reorder);
95:
96: /* storage for ilu factor */
97: PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
98: PetscMalloc(jmax*sizeof(PetscInt),&new_j);
99: PetscMalloc(jmax*sizeof(MatScalar),&new_a);
100: PetscMalloc(n*sizeof(PetscInt),&ordcol);
102: /* ------------------------------------------------------------
103: Make sure that everything is Fortran formatted (1-Based)
104: ------------------------------------------------------------
105: */
106: for (i=old_i[0];i<old_i[n];i++) {
107: old_j[i]++;
108: }
109: for(i=0;i<n+1;i++) {
110: old_i[i]++;
111: };
112:
114: if (reorder) {
115: ISGetIndices(iscol,&c);
116: ISGetIndices(isrow,&r);
117: PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);
118: for(i=0;i<n;i++) {
119: cr[i] = r[i]+1;
120: cc[i] = c[i]+1;
121: }
122: ISRestoreIndices(iscol,&c);
123: ISRestoreIndices(isrow,&r);
124: PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
125: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
126: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);
127: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job);
128: PetscFree2(cc,cr);
129: o_a = old_a2;
130: o_j = old_j2;
131: o_i = old_i2;
132: } else {
133: o_a = old_a;
134: o_j = old_j;
135: o_i = old_i;
136: }
138: /* ------------------------------------------------------------
139: Call Saad's ilutp() routine to generate the factorization
140: ------------------------------------------------------------
141: */
143: PetscMalloc(2*n*sizeof(PetscInt),&iperm);
144: PetscMalloc(2*n*sizeof(PetscInt),&jw);
145: PetscMalloc(n*sizeof(PetscScalar),&w);
147: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148: if (sierr) {
149: switch (sierr) {
150: case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax);
151: case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax);
152: case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153: case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154: case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax);
155: default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156: }
157: }
159: PetscFree(w);
160: PetscFree(jw);
162: /* ------------------------------------------------------------
163: Saad's routine gives the result in Modified Sparse Row (msr)
164: Convert to Compressed Sparse Row format (csr)
165: ------------------------------------------------------------
166: */
168: PetscMalloc(n*sizeof(PetscScalar),&wk);
169: PetscMalloc((n+1)*sizeof(PetscInt),&iwk);
171: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
173: PetscFree(iwk);
174: PetscFree(wk);
176: if (reorder) {
177: PetscFree(old_a2);
178: PetscFree(old_j2);
179: PetscFree(old_i2);
180: } else {
181: /* fix permutation of old_j that the factorization introduced */
182: for (i=old_i[0]; i<old_i[n]; i++) {
183: old_j[i-1] = iperm[old_j[i-1]-1];
184: }
185: }
187: /* get rid of the shift to indices starting at 1 */
188: for (i=0; i<n+1; i++) {
189: old_i[i]--;
190: }
191: for (i=old_i[0];i<old_i[n];i++) {
192: old_j[i]--;
193: }
194:
195: /* Make the factored matrix 0-based */
196: for (i=0; i<n+1; i++) {
197: new_i[i]--;
198: }
199: for (i=new_i[0];i<new_i[n];i++) {
200: new_j[i]--;
201: }
203: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204: /*-- permute the right-hand-side and solution vectors --*/
205: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
206: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
207: ISGetIndices(isicol,&ic);
208: for(i=0; i<n; i++) {
209: ordcol[i] = ic[iperm[i]-1];
210: };
211: ISRestoreIndices(isicol,&ic);
212: ISDestroy(isicol);
214: PetscFree(iperm);
216: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
217: PetscFree(ordcol);
219: /*----- put together the new matrix -----*/
221: MatCreate(((PetscObject)A)->comm,fact);
222: MatSetSizes(*fact,n,n,n,n);
223: MatSetType(*fact,((PetscObject)A)->type_name);
224: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
225: (*fact)->factor = MAT_FACTOR_LU;
226: (*fact)->assembled = PETSC_TRUE;
228: b = (Mat_SeqAIJ*)(*fact)->data;
229: b->free_a = PETSC_TRUE;
230: b->free_ij = PETSC_TRUE;
231: b->singlemalloc = PETSC_FALSE;
232: b->a = new_a;
233: b->j = new_j;
234: b->i = new_i;
235: b->ilen = 0;
236: b->imax = 0;
237: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
238: b->row = isirow;
239: b->col = iscolf;
240: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
241: b->maxnz = b->nz = new_i[n];
242: MatMarkDiagonal_SeqAIJ(*fact);
243: (*fact)->info.factor_mallocs = 0;
245: af = ((double)b->nz)/((double)a->nz) + .001;
246: PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);
247: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
248: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
249: PetscInfo(A,"for best performance.\n");
251: if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ;
252: else (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
253: (*fact)->ops->solveadd = MatSolveAdd_SeqAIJ;
254: (*fact)->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
255: (*fact)->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
256:
257: MatILUDTFactor_Inode(A,isrow,iscol,info,fact);
259: return(0);
260: #endif
261: }
266: PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
267: {
269: *flg = PETSC_TRUE;
270: return(0);
271: }
277: PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
278: {
279: PetscInt n = A->rmap->n;
280: PetscErrorCode ierr;
283: MatCreate(((PetscObject)A)->comm,B);
284: MatSetSizes(*B,n,n,n,n);
285: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
286: MatSetType(*B,MATSEQAIJ);
287: if (ftype == MAT_FACTOR_ILU) {
288: (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ;
289: } else {
290: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
291: }
292: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
293: MatSetType(*B,MATSEQSBAIJ);
294: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
295: if (ftype == MAT_FACTOR_ICC) {
296: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
297: } else {
298: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
299: }
300: } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
301: (*B)->factor = ftype;
302: return(0);
303: }
308: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
309: {
310: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
311: IS isicol;
312: PetscErrorCode ierr;
313: const PetscInt *r,*ic;
314: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
315: PetscInt *bi,*bj,*ajtmp;
316: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
317: PetscReal f;
318: PetscInt nlnk,*lnk,k,**bi_ptr;
319: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
320: PetscBT lnkbt;
323: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
324: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
325: ISGetIndices(isrow,&r);
326: ISGetIndices(isicol,&ic);
328: /* get new row pointers */
329: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
330: bi[0] = 0;
332: /* bdiag is location of diagonal in factor */
333: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
334: bdiag[0] = 0;
336: /* linked list for storing column indices of the active row */
337: nlnk = n + 1;
338: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
340: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
342: /* initial FreeSpace size is f*(ai[n]+1) */
343: f = info->fill;
344: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
345: current_space = free_space;
347: for (i=0; i<n; i++) {
348: /* copy previous fill into linked list */
349: nzi = 0;
350: nnz = ai[r[i]+1] - ai[r[i]];
351: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
352: ajtmp = aj + ai[r[i]];
353: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
354: nzi += nlnk;
356: /* add pivot rows into linked list */
357: row = lnk[n];
358: while (row < i) {
359: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
360: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
361: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
362: nzi += nlnk;
363: row = lnk[row];
364: }
365: bi[i+1] = bi[i] + nzi;
366: im[i] = nzi;
368: /* mark bdiag */
369: nzbd = 0;
370: nnz = nzi;
371: k = lnk[n];
372: while (nnz-- && k < i){
373: nzbd++;
374: k = lnk[k];
375: }
376: bdiag[i] = bi[i] + nzbd;
378: /* if free space is not available, make more free space */
379: if (current_space->local_remaining<nzi) {
380: nnz = (n - i)*nzi; /* estimated and max additional space needed */
381: PetscFreeSpaceGet(nnz,¤t_space);
382: reallocs++;
383: }
385: /* copy data into free space, then initialize lnk */
386: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
387: bi_ptr[i] = current_space->array;
388: current_space->array += nzi;
389: current_space->local_used += nzi;
390: current_space->local_remaining -= nzi;
391: }
392: #if defined(PETSC_USE_INFO)
393: if (ai[n] != 0) {
394: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
395: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
396: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
397: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
398: PetscInfo(A,"for best performance.\n");
399: } else {
400: PetscInfo(A,"Empty matrix\n");
401: }
402: #endif
404: ISRestoreIndices(isrow,&r);
405: ISRestoreIndices(isicol,&ic);
407: /* destroy list of free space and other temporary array(s) */
408: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
409: PetscFreeSpaceContiguous(&free_space,bj);
410: PetscLLDestroy(lnk,lnkbt);
411: PetscFree2(bi_ptr,im);
413: /* put together the new matrix */
414: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
415: PetscLogObjectParent(B,isicol);
416: b = (Mat_SeqAIJ*)(B)->data;
417: b->free_a = PETSC_TRUE;
418: b->free_ij = PETSC_TRUE;
419: b->singlemalloc = PETSC_FALSE;
420: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
421: b->j = bj;
422: b->i = bi;
423: b->diag = bdiag;
424: b->ilen = 0;
425: b->imax = 0;
426: b->row = isrow;
427: b->col = iscol;
428: PetscObjectReference((PetscObject)isrow);
429: PetscObjectReference((PetscObject)iscol);
430: b->icol = isicol;
431: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
433: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
434: PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
435: b->maxnz = b->nz = bi[n] ;
437: (B)->factor = MAT_FACTOR_LU;
438: (B)->info.factor_mallocs = reallocs;
439: (B)->info.fill_ratio_given = f;
441: if (ai[n] != 0) {
442: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
443: } else {
444: (B)->info.fill_ratio_needed = 0.0;
445: }
446: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
447: (B)->ops->solve = MatSolve_SeqAIJ;
448: (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
449: /* switch to inodes if appropriate */
450: MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);
451: return(0);
452: }
454: /*
455: Trouble in factorization, should we dump the original matrix?
456: */
459: PetscErrorCode MatFactorDumpMatrix(Mat A)
460: {
462: PetscTruth flg;
465: PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);
466: if (flg) {
467: PetscViewer viewer;
468: char filename[PETSC_MAX_PATH_LEN];
470: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
471: PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);
472: MatView(A,viewer);
473: PetscViewerDestroy(viewer);
474: }
475: return(0);
476: }
480: /* ----------------------------------------------------------- */
483: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
484: {
485: Mat C=B;
486: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
487: IS isrow = b->row,isicol = b->icol;
489: const PetscInt *r,*ic,*ics;
490: PetscInt i,j,n=A->rmap->n,*bi=b->i,*bj=b->j;
491: PetscInt *ajtmp,*bjtmp,nz,row;
492: PetscInt *diag_offset = b->diag,diag,*pj;
493: PetscScalar *rtmp,*pc,multiplier,*rtmps;
494: MatScalar *v,*pv;
495: PetscScalar d;
496: PetscReal rs;
497: LUShift_Ctx sctx;
498: PetscInt newshift,*ddiag;
501: ISGetIndices(isrow,&r);
502: ISGetIndices(isicol,&ic);
503: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
504: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
505: rtmps = rtmp; ics = ic;
507: sctx.shift_top = 0;
508: sctx.nshift_max = 0;
509: sctx.shift_lo = 0;
510: sctx.shift_hi = 0;
511: sctx.shift_fraction = 0;
513: /* if both shift schemes are chosen by user, only use info->shiftpd */
514: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
515: PetscInt *aai = a->i;
516: ddiag = a->diag;
517: sctx.shift_top = 0;
518: for (i=0; i<n; i++) {
519: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
520: d = (a->a)[ddiag[i]];
521: rs = -PetscAbsScalar(d) - PetscRealPart(d);
522: v = a->a+aai[i];
523: nz = aai[i+1] - aai[i];
524: for (j=0; j<nz; j++)
525: rs += PetscAbsScalar(v[j]);
526: if (rs>sctx.shift_top) sctx.shift_top = rs;
527: }
528: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
529: sctx.shift_top *= 1.1;
530: sctx.nshift_max = 5;
531: sctx.shift_lo = 0.;
532: sctx.shift_hi = 1.;
533: }
535: sctx.shift_amount = 0;
536: sctx.nshift = 0;
537: do {
538: sctx.lushift = PETSC_FALSE;
539: for (i=0; i<n; i++){
540: nz = bi[i+1] - bi[i];
541: bjtmp = bj + bi[i];
542: for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
544: /* load in initial (unfactored row) */
545: nz = a->i[r[i]+1] - a->i[r[i]];
546: ajtmp = a->j + a->i[r[i]];
547: v = a->a + a->i[r[i]];
548: for (j=0; j<nz; j++) {
549: rtmp[ics[ajtmp[j]]] = v[j];
550: }
551: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
553: row = *bjtmp++;
554: while (row < i) {
555: pc = rtmp + row;
556: if (*pc != 0.0) {
557: pv = b->a + diag_offset[row];
558: pj = b->j + diag_offset[row] + 1;
559: multiplier = *pc / *pv++;
560: *pc = multiplier;
561: nz = bi[row+1] - diag_offset[row] - 1;
562: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
563: PetscLogFlops(2*nz);
564: }
565: row = *bjtmp++;
566: }
567: /* finished row so stick it into b->a */
568: pv = b->a + bi[i] ;
569: pj = b->j + bi[i] ;
570: nz = bi[i+1] - bi[i];
571: diag = diag_offset[i] - bi[i];
572: rs = 0.0;
573: for (j=0; j<nz; j++) {
574: pv[j] = rtmps[pj[j]];
575: if (j != diag) rs += PetscAbsScalar(pv[j]);
576: }
578: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
579: sctx.rs = rs;
580: sctx.pv = pv[diag];
581: MatLUCheckShift_inline(info,sctx,i,newshift);
582: if (newshift == 1) break;
583: }
585: if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
586: /*
587: * if no shift in this attempt & shifting & started shifting & can refine,
588: * then try lower shift
589: */
590: sctx.shift_hi = sctx.shift_fraction;
591: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
592: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
593: sctx.lushift = PETSC_TRUE;
594: sctx.nshift++;
595: }
596: } while (sctx.lushift);
598: /* invert diagonal entries for simplier triangular solves */
599: for (i=0; i<n; i++) {
600: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
601: }
602: PetscFree(rtmp);
603: ISRestoreIndices(isicol,&ic);
604: ISRestoreIndices(isrow,&r);
605: if (b->inode.use) {
606: C->ops->solve = MatSolve_Inode;
607: } else {
608: PetscTruth row_identity, col_identity;
609: ISIdentity(isrow,&row_identity);
610: ISIdentity(isicol,&col_identity);
611: if (row_identity && col_identity) {
612: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
613: } else {
614: C->ops->solve = MatSolve_SeqAIJ;
615: }
616: }
617: C->ops->solveadd = MatSolveAdd_SeqAIJ;
618: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
619: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
620: C->ops->matsolve = MatMatSolve_SeqAIJ;
621: C->assembled = PETSC_TRUE;
622: C->preallocated = PETSC_TRUE;
623: PetscLogFlops(C->cmap->n);
624: if (sctx.nshift){
625: if (info->shiftpd) {
626: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
627: } else if (info->shiftnz) {
628: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
629: }
630: }
631: return(0);
632: }
634: /*
635: This routine implements inplace ILU(0) with row or/and column permutations.
636: Input:
637: A - original matrix
638: Output;
639: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
640: a->j (col index) is permuted by the inverse of colperm, then sorted
641: a->a reordered accordingly with a->j
642: a->diag (ptr to diagonal elements) is updated.
643: */
646: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
647: {
648: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
649: IS isrow = a->row,isicol = a->icol;
651: const PetscInt *r,*ic,*ics;
652: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
653: PetscInt *ajtmp,nz,row;
654: PetscInt *diag = a->diag,nbdiag,*pj;
655: PetscScalar *rtmp,*pc,multiplier,d;
656: MatScalar *v,*pv;
657: PetscReal rs;
658: LUShift_Ctx sctx;
659: PetscInt newshift;
662: if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
663: ISGetIndices(isrow,&r);
664: ISGetIndices(isicol,&ic);
665: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
666: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
667: ics = ic;
669: sctx.shift_top = 0;
670: sctx.nshift_max = 0;
671: sctx.shift_lo = 0;
672: sctx.shift_hi = 0;
673: sctx.shift_fraction = 0;
675: /* if both shift schemes are chosen by user, only use info->shiftpd */
676: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
677: sctx.shift_top = 0;
678: for (i=0; i<n; i++) {
679: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
680: d = (a->a)[diag[i]];
681: rs = -PetscAbsScalar(d) - PetscRealPart(d);
682: v = a->a+ai[i];
683: nz = ai[i+1] - ai[i];
684: for (j=0; j<nz; j++)
685: rs += PetscAbsScalar(v[j]);
686: if (rs>sctx.shift_top) sctx.shift_top = rs;
687: }
688: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
689: sctx.shift_top *= 1.1;
690: sctx.nshift_max = 5;
691: sctx.shift_lo = 0.;
692: sctx.shift_hi = 1.;
693: }
695: sctx.shift_amount = 0;
696: sctx.nshift = 0;
697: do {
698: sctx.lushift = PETSC_FALSE;
699: for (i=0; i<n; i++){
700: /* load in initial unfactored row */
701: nz = ai[r[i]+1] - ai[r[i]];
702: ajtmp = aj + ai[r[i]];
703: v = a->a + ai[r[i]];
704: /* sort permuted ajtmp and values v accordingly */
705: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
706: PetscSortIntWithScalarArray(nz,ajtmp,v);
708: diag[r[i]] = ai[r[i]];
709: for (j=0; j<nz; j++) {
710: rtmp[ajtmp[j]] = v[j];
711: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
712: }
713: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
715: row = *ajtmp++;
716: while (row < i) {
717: pc = rtmp + row;
718: if (*pc != 0.0) {
719: pv = a->a + diag[r[row]];
720: pj = aj + diag[r[row]] + 1;
722: multiplier = *pc / *pv++;
723: *pc = multiplier;
724: nz = ai[r[row]+1] - diag[r[row]] - 1;
725: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
726: PetscLogFlops(2*nz);
727: }
728: row = *ajtmp++;
729: }
730: /* finished row so overwrite it onto a->a */
731: pv = a->a + ai[r[i]] ;
732: pj = aj + ai[r[i]] ;
733: nz = ai[r[i]+1] - ai[r[i]];
734: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
735:
736: rs = 0.0;
737: for (j=0; j<nz; j++) {
738: pv[j] = rtmp[pj[j]];
739: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
740: }
742: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
743: sctx.rs = rs;
744: sctx.pv = pv[nbdiag];
745: MatLUCheckShift_inline(info,sctx,i,newshift);
746: if (newshift == 1) break;
747: }
749: if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
750: /*
751: * if no shift in this attempt & shifting & started shifting & can refine,
752: * then try lower shift
753: */
754: sctx.shift_hi = sctx.shift_fraction;
755: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
756: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
757: sctx.lushift = PETSC_TRUE;
758: sctx.nshift++;
759: }
760: } while (sctx.lushift);
762: /* invert diagonal entries for simplier triangular solves */
763: for (i=0; i<n; i++) {
764: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
765: }
767: PetscFree(rtmp);
768: ISRestoreIndices(isicol,&ic);
769: ISRestoreIndices(isrow,&r);
770: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
771: A->ops->solveadd = MatSolveAdd_SeqAIJ;
772: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
773: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
774: A->assembled = PETSC_TRUE;
775: A->preallocated = PETSC_TRUE;
776: PetscLogFlops(A->cmap->n);
777: if (sctx.nshift){
778: if (info->shiftpd) {
779: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
780: } else if (info->shiftnz) {
781: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
782: }
783: }
784: return(0);
785: }
787: /* ----------------------------------------------------------- */
790: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
791: {
793: Mat C;
796: MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);
797: MatLUFactorSymbolic(C,A,row,col,info);
798: MatLUFactorNumeric(C,A,info);
799: A->ops->solve = C->ops->solve;
800: A->ops->solvetranspose = C->ops->solvetranspose;
801: MatHeaderCopy(A,C);
802: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
803: return(0);
804: }
805: /* ----------------------------------------------------------- */
808: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
809: {
810: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
811: IS iscol = a->col,isrow = a->row;
812: PetscErrorCode ierr;
813: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
814: PetscInt nz;
815: const PetscInt *rout,*cout,*r,*c;
816: PetscScalar *x,*tmp,*tmps,sum;
817: const PetscScalar *b;
818: const MatScalar *aa = a->a,*v;
821: if (!n) return(0);
823: VecGetArray(bb,(PetscScalar**)&b);
824: VecGetArray(xx,&x);
825: tmp = a->solve_work;
827: ISGetIndices(isrow,&rout); r = rout;
828: ISGetIndices(iscol,&cout); c = cout + (n-1);
830: /* forward solve the lower triangular */
831: tmp[0] = b[*r++];
832: tmps = tmp;
833: for (i=1; i<n; i++) {
834: v = aa + ai[i] ;
835: vi = aj + ai[i] ;
836: nz = a->diag[i] - ai[i];
837: sum = b[*r++];
838: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
839: tmp[i] = sum;
840: }
842: /* backward solve the upper triangular */
843: for (i=n-1; i>=0; i--){
844: v = aa + a->diag[i] + 1;
845: vi = aj + a->diag[i] + 1;
846: nz = ai[i+1] - a->diag[i] - 1;
847: sum = tmp[i];
848: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
849: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
850: }
852: ISRestoreIndices(isrow,&rout);
853: ISRestoreIndices(iscol,&cout);
854: VecRestoreArray(bb,(PetscScalar**)&b);
855: VecRestoreArray(xx,&x);
856: PetscLogFlops(2*a->nz - A->cmap->n);
857: return(0);
858: }
862: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
863: {
864: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
865: IS iscol = a->col,isrow = a->row;
866: PetscErrorCode ierr;
867: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
868: PetscInt nz,neq;
869: const PetscInt *rout,*cout,*r,*c;
870: PetscScalar *x,*b,*tmp,*tmps,sum;
871: const MatScalar *aa = a->a,*v;
872: PetscTruth bisdense,xisdense;
875: if (!n) return(0);
877: PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
878: if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
879: PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
880: if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
882: MatGetArray(B,&b);
883: MatGetArray(X,&x);
884:
885: tmp = a->solve_work;
886: ISGetIndices(isrow,&rout); r = rout;
887: ISGetIndices(iscol,&cout); c = cout;
889: for (neq=0; neq<B->cmap->n; neq++){
890: /* forward solve the lower triangular */
891: tmp[0] = b[r[0]];
892: tmps = tmp;
893: for (i=1; i<n; i++) {
894: v = aa + ai[i] ;
895: vi = aj + ai[i] ;
896: nz = a->diag[i] - ai[i];
897: sum = b[r[i]];
898: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
899: tmp[i] = sum;
900: }
901: /* backward solve the upper triangular */
902: for (i=n-1; i>=0; i--){
903: v = aa + a->diag[i] + 1;
904: vi = aj + a->diag[i] + 1;
905: nz = ai[i+1] - a->diag[i] - 1;
906: sum = tmp[i];
907: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
908: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
909: }
911: b += n;
912: x += n;
913: }
914: ISRestoreIndices(isrow,&rout);
915: ISRestoreIndices(iscol,&cout);
916: MatRestoreArray(B,&b);
917: MatRestoreArray(X,&x);
918: PetscLogFlops(B->cmap->n*(2*a->nz - n));
919: return(0);
920: }
924: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
925: {
926: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
927: IS iscol = a->col,isrow = a->row;
928: PetscErrorCode ierr;
929: const PetscInt *r,*c,*rout,*cout;
930: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
931: PetscInt nz,row;
932: PetscScalar *x,*b,*tmp,*tmps,sum;
933: const MatScalar *aa = a->a,*v;
936: if (!n) return(0);
938: VecGetArray(bb,&b);
939: VecGetArray(xx,&x);
940: tmp = a->solve_work;
942: ISGetIndices(isrow,&rout); r = rout;
943: ISGetIndices(iscol,&cout); c = cout + (n-1);
945: /* forward solve the lower triangular */
946: tmp[0] = b[*r++];
947: tmps = tmp;
948: for (row=1; row<n; row++) {
949: i = rout[row]; /* permuted row */
950: v = aa + ai[i] ;
951: vi = aj + ai[i] ;
952: nz = a->diag[i] - ai[i];
953: sum = b[*r++];
954: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
955: tmp[row] = sum;
956: }
958: /* backward solve the upper triangular */
959: for (row=n-1; row>=0; row--){
960: i = rout[row]; /* permuted row */
961: v = aa + a->diag[i] + 1;
962: vi = aj + a->diag[i] + 1;
963: nz = ai[i+1] - a->diag[i] - 1;
964: sum = tmp[row];
965: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
966: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
967: }
969: ISRestoreIndices(isrow,&rout);
970: ISRestoreIndices(iscol,&cout);
971: VecRestoreArray(bb,&b);
972: VecRestoreArray(xx,&x);
973: PetscLogFlops(2*a->nz - A->cmap->n);
974: return(0);
975: }
977: /* ----------------------------------------------------------- */
980: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
981: {
982: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
983: PetscErrorCode ierr;
984: PetscInt n = A->rmap->n;
985: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
986: PetscScalar *x;
987: const PetscScalar *b;
988: const MatScalar *aa = a->a;
989: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
990: PetscInt adiag_i,i,nz,ai_i;
991: const MatScalar *v;
992: PetscScalar sum;
993: #endif
996: if (!n) return(0);
998: VecGetArray(bb,(PetscScalar**)&b);
999: VecGetArray(xx,&x);
1001: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1002: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1003: #else
1004: /* forward solve the lower triangular */
1005: x[0] = b[0];
1006: for (i=1; i<n; i++) {
1007: ai_i = ai[i];
1008: v = aa + ai_i;
1009: vi = aj + ai_i;
1010: nz = adiag[i] - ai_i;
1011: sum = b[i];
1012: while (nz--) sum -= *v++ * x[*vi++];
1013: x[i] = sum;
1014: }
1016: /* backward solve the upper triangular */
1017: for (i=n-1; i>=0; i--){
1018: adiag_i = adiag[i];
1019: v = aa + adiag_i + 1;
1020: vi = aj + adiag_i + 1;
1021: nz = ai[i+1] - adiag_i - 1;
1022: sum = x[i];
1023: while (nz--) sum -= *v++ * x[*vi++];
1024: x[i] = sum*aa[adiag_i];
1025: }
1026: #endif
1027: PetscLogFlops(2*a->nz - A->cmap->n);
1028: VecRestoreArray(bb,(PetscScalar**)&b);
1029: VecRestoreArray(xx,&x);
1030: return(0);
1031: }
1035: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1036: {
1037: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1038: IS iscol = a->col,isrow = a->row;
1039: PetscErrorCode ierr;
1040: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1041: PetscInt nz;
1042: const PetscInt *rout,*cout,*r,*c;
1043: PetscScalar *x,*b,*tmp,sum;
1044: const MatScalar *aa = a->a,*v;
1047: if (yy != xx) {VecCopy(yy,xx);}
1049: VecGetArray(bb,&b);
1050: VecGetArray(xx,&x);
1051: tmp = a->solve_work;
1053: ISGetIndices(isrow,&rout); r = rout;
1054: ISGetIndices(iscol,&cout); c = cout + (n-1);
1056: /* forward solve the lower triangular */
1057: tmp[0] = b[*r++];
1058: for (i=1; i<n; i++) {
1059: v = aa + ai[i] ;
1060: vi = aj + ai[i] ;
1061: nz = a->diag[i] - ai[i];
1062: sum = b[*r++];
1063: while (nz--) sum -= *v++ * tmp[*vi++ ];
1064: tmp[i] = sum;
1065: }
1067: /* backward solve the upper triangular */
1068: for (i=n-1; i>=0; i--){
1069: v = aa + a->diag[i] + 1;
1070: vi = aj + a->diag[i] + 1;
1071: nz = ai[i+1] - a->diag[i] - 1;
1072: sum = tmp[i];
1073: while (nz--) sum -= *v++ * tmp[*vi++ ];
1074: tmp[i] = sum*aa[a->diag[i]];
1075: x[*c--] += tmp[i];
1076: }
1078: ISRestoreIndices(isrow,&rout);
1079: ISRestoreIndices(iscol,&cout);
1080: VecRestoreArray(bb,&b);
1081: VecRestoreArray(xx,&x);
1082: PetscLogFlops(2*a->nz);
1084: return(0);
1085: }
1086: /* -------------------------------------------------------------------*/
1089: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1090: {
1091: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1092: IS iscol = a->col,isrow = a->row;
1093: PetscErrorCode ierr;
1094: const PetscInt *rout,*cout,*r,*c;
1095: PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1096: PetscInt nz,*diag = a->diag;
1097: PetscScalar *x,*b,*tmp,s1;
1098: const MatScalar *aa = a->a,*v;
1101: VecGetArray(bb,&b);
1102: VecGetArray(xx,&x);
1103: tmp = a->solve_work;
1105: ISGetIndices(isrow,&rout); r = rout;
1106: ISGetIndices(iscol,&cout); c = cout;
1108: /* copy the b into temp work space according to permutation */
1109: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1111: /* forward solve the U^T */
1112: for (i=0; i<n; i++) {
1113: v = aa + diag[i] ;
1114: vi = aj + diag[i] + 1;
1115: nz = ai[i+1] - diag[i] - 1;
1116: s1 = tmp[i];
1117: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1118: while (nz--) {
1119: tmp[*vi++ ] -= (*v++)*s1;
1120: }
1121: tmp[i] = s1;
1122: }
1124: /* backward solve the L^T */
1125: for (i=n-1; i>=0; i--){
1126: v = aa + diag[i] - 1 ;
1127: vi = aj + diag[i] - 1 ;
1128: nz = diag[i] - ai[i];
1129: s1 = tmp[i];
1130: while (nz--) {
1131: tmp[*vi-- ] -= (*v--)*s1;
1132: }
1133: }
1135: /* copy tmp into x according to permutation */
1136: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1138: ISRestoreIndices(isrow,&rout);
1139: ISRestoreIndices(iscol,&cout);
1140: VecRestoreArray(bb,&b);
1141: VecRestoreArray(xx,&x);
1143: PetscLogFlops(2*a->nz-A->cmap->n);
1144: return(0);
1145: }
1149: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1150: {
1151: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1152: IS iscol = a->col,isrow = a->row;
1153: PetscErrorCode ierr;
1154: const PetscInt *r,*c,*rout,*cout;
1155: PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1156: PetscInt nz,*diag = a->diag;
1157: PetscScalar *x,*b,*tmp;
1158: const MatScalar *aa = a->a,*v;
1161: if (zz != xx) {VecCopy(zz,xx);}
1163: VecGetArray(bb,&b);
1164: VecGetArray(xx,&x);
1165: tmp = a->solve_work;
1167: ISGetIndices(isrow,&rout); r = rout;
1168: ISGetIndices(iscol,&cout); c = cout;
1170: /* copy the b into temp work space according to permutation */
1171: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1173: /* forward solve the U^T */
1174: for (i=0; i<n; i++) {
1175: v = aa + diag[i] ;
1176: vi = aj + diag[i] + 1;
1177: nz = ai[i+1] - diag[i] - 1;
1178: tmp[i] *= *v++;
1179: while (nz--) {
1180: tmp[*vi++ ] -= (*v++)*tmp[i];
1181: }
1182: }
1184: /* backward solve the L^T */
1185: for (i=n-1; i>=0; i--){
1186: v = aa + diag[i] - 1 ;
1187: vi = aj + diag[i] - 1 ;
1188: nz = diag[i] - ai[i];
1189: while (nz--) {
1190: tmp[*vi-- ] -= (*v--)*tmp[i];
1191: }
1192: }
1194: /* copy tmp into x according to permutation */
1195: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1197: ISRestoreIndices(isrow,&rout);
1198: ISRestoreIndices(iscol,&cout);
1199: VecRestoreArray(bb,&b);
1200: VecRestoreArray(xx,&x);
1202: PetscLogFlops(2*a->nz);
1203: return(0);
1204: }
1205: /* ----------------------------------------------------------------*/
1206: EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1207: EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);
1211: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1212: {
1213: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1214: IS isicol;
1215: PetscErrorCode ierr;
1216: const PetscInt *r,*ic;
1217: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1218: PetscInt *bi,*cols,nnz,*cols_lvl;
1219: PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1220: PetscInt i,levels,diagonal_fill;
1221: PetscTruth col_identity,row_identity;
1222: PetscReal f;
1223: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1224: PetscBT lnkbt;
1225: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1226: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1227: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1228: PetscTruth missing;
1231: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1232: f = info->fill;
1233: levels = (PetscInt)info->levels;
1234: diagonal_fill = (PetscInt)info->diagonal_fill;
1235: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1237: /* special case that simply copies fill pattern */
1238: ISIdentity(isrow,&row_identity);
1239: ISIdentity(iscol,&col_identity);
1240: if (!levels && row_identity && col_identity) {
1241: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);
1242: fact->factor = MAT_FACTOR_ILU;
1243: (fact)->info.factor_mallocs = 0;
1244: (fact)->info.fill_ratio_given = info->fill;
1245: (fact)->info.fill_ratio_needed = 1.0;
1246: b = (Mat_SeqAIJ*)(fact)->data;
1247: MatMissingDiagonal(A,&missing,&d);
1248: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1249: b->row = isrow;
1250: b->col = iscol;
1251: b->icol = isicol;
1252: PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1253: PetscObjectReference((PetscObject)isrow);
1254: PetscObjectReference((PetscObject)iscol);
1255: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1256: MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);
1257: return(0);
1258: }
1260: ISGetIndices(isrow,&r);
1261: ISGetIndices(isicol,&ic);
1263: /* get new row pointers */
1264: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1265: bi[0] = 0;
1266: /* bdiag is location of diagonal in factor */
1267: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1268: bdiag[0] = 0;
1270: PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
1271: bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1273: /* create a linked list for storing column indices of the active row */
1274: nlnk = n + 1;
1275: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1277: /* initial FreeSpace size is f*(ai[n]+1) */
1278: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1279: current_space = free_space;
1280: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1281: current_space_lvl = free_space_lvl;
1282:
1283: for (i=0; i<n; i++) {
1284: nzi = 0;
1285: /* copy current row into linked list */
1286: nnz = ai[r[i]+1] - ai[r[i]];
1287: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1288: cols = aj + ai[r[i]];
1289: lnk[i] = -1; /* marker to indicate if diagonal exists */
1290: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1291: nzi += nlnk;
1293: /* make sure diagonal entry is included */
1294: if (diagonal_fill && lnk[i] == -1) {
1295: fm = n;
1296: while (lnk[fm] < i) fm = lnk[fm];
1297: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1298: lnk[fm] = i;
1299: lnk_lvl[i] = 0;
1300: nzi++; dcount++;
1301: }
1303: /* add pivot rows into the active row */
1304: nzbd = 0;
1305: prow = lnk[n];
1306: while (prow < i) {
1307: nnz = bdiag[prow];
1308: cols = bj_ptr[prow] + nnz + 1;
1309: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1310: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1311: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1312: nzi += nlnk;
1313: prow = lnk[prow];
1314: nzbd++;
1315: }
1316: bdiag[i] = nzbd;
1317: bi[i+1] = bi[i] + nzi;
1319: /* if free space is not available, make more free space */
1320: if (current_space->local_remaining<nzi) {
1321: nnz = nzi*(n - i); /* estimated and max additional space needed */
1322: PetscFreeSpaceGet(nnz,¤t_space);
1323: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1324: reallocs++;
1325: }
1327: /* copy data into free_space and free_space_lvl, then initialize lnk */
1328: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1329: bj_ptr[i] = current_space->array;
1330: bjlvl_ptr[i] = current_space_lvl->array;
1332: /* make sure the active row i has diagonal entry */
1333: if (*(bj_ptr[i]+bdiag[i]) != i) {
1334: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1335: try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1336: }
1338: current_space->array += nzi;
1339: current_space->local_used += nzi;
1340: current_space->local_remaining -= nzi;
1341: current_space_lvl->array += nzi;
1342: current_space_lvl->local_used += nzi;
1343: current_space_lvl->local_remaining -= nzi;
1344: }
1346: ISRestoreIndices(isrow,&r);
1347: ISRestoreIndices(isicol,&ic);
1349: /* destroy list of free space and other temporary arrays */
1350: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1351: PetscFreeSpaceContiguous(&free_space,bj);
1352: PetscIncompleteLLDestroy(lnk,lnkbt);
1353: PetscFreeSpaceDestroy(free_space_lvl);
1354: PetscFree(bj_ptr);
1356: #if defined(PETSC_USE_INFO)
1357: {
1358: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1359: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1360: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1361: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1362: PetscInfo(A,"for best performance.\n");
1363: if (diagonal_fill) {
1364: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1365: }
1366: }
1367: #endif
1369: /* put together the new matrix */
1370: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1371: PetscLogObjectParent(fact,isicol);
1372: b = (Mat_SeqAIJ*)(fact)->data;
1373: b->free_a = PETSC_TRUE;
1374: b->free_ij = PETSC_TRUE;
1375: b->singlemalloc = PETSC_FALSE;
1376: len = (bi[n] )*sizeof(PetscScalar);
1377: PetscMalloc(len+1,&b->a);
1378: b->j = bj;
1379: b->i = bi;
1380: for (i=0; i<n; i++) bdiag[i] += bi[i];
1381: b->diag = bdiag;
1382: b->ilen = 0;
1383: b->imax = 0;
1384: b->row = isrow;
1385: b->col = iscol;
1386: PetscObjectReference((PetscObject)isrow);
1387: PetscObjectReference((PetscObject)iscol);
1388: b->icol = isicol;
1389: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1390: /* In b structure: Free imax, ilen, old a, old j.
1391: Allocate bdiag, solve_work, new a, new j */
1392: PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1393: b->maxnz = b->nz = bi[n] ;
1394: (fact)->info.factor_mallocs = reallocs;
1395: (fact)->info.fill_ratio_given = f;
1396: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1397: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1398: MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);
1399: return(0);
1400: }
1402: #include ../src/mat/impls/sbaij/seq/sbaij.h
1405: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1406: {
1407: Mat C = B;
1408: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
1409: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
1410: IS ip=b->row,iip = b->icol;
1412: const PetscInt *rip,*riip;
1413: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1414: PetscInt *ai=a->i,*aj=a->j;
1415: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1416: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1417: PetscReal zeropivot,rs,shiftnz;
1418: PetscReal shiftpd;
1419: ChShift_Ctx sctx;
1420: PetscInt newshift;
1421: PetscTruth perm_identity;
1425: shiftnz = info->shiftnz;
1426: shiftpd = info->shiftpd;
1427: zeropivot = info->zeropivot;
1429: ISGetIndices(ip,&rip);
1430: ISGetIndices(iip,&riip);
1431:
1432: /* initialization */
1433: nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1434: PetscMalloc(nz,&il);
1435: jl = il + mbs;
1436: rtmp = (MatScalar*)(jl + mbs);
1438: sctx.shift_amount = 0;
1439: sctx.nshift = 0;
1440: do {
1441: sctx.chshift = PETSC_FALSE;
1442: for (i=0; i<mbs; i++) {
1443: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1444: }
1445:
1446: for (k = 0; k<mbs; k++){
1447: bval = ba + bi[k];
1448: /* initialize k-th row by the perm[k]-th row of A */
1449: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1450: for (j = jmin; j < jmax; j++){
1451: col = riip[aj[j]];
1452: if (col >= k){ /* only take upper triangular entry */
1453: rtmp[col] = aa[j];
1454: *bval++ = 0.0; /* for in-place factorization */
1455: }
1456: }
1457: /* shift the diagonal of the matrix */
1458: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1460: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1461: dk = rtmp[k];
1462: i = jl[k]; /* first row to be added to k_th row */
1464: while (i < k){
1465: nexti = jl[i]; /* next row to be added to k_th row */
1467: /* compute multiplier, update diag(k) and U(i,k) */
1468: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1469: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
1470: dk += uikdi*ba[ili];
1471: ba[ili] = uikdi; /* -U(i,k) */
1473: /* add multiple of row i to k-th row */
1474: jmin = ili + 1; jmax = bi[i+1];
1475: if (jmin < jmax){
1476: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1477: /* update il and jl for row i */
1478: il[i] = jmin;
1479: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1480: }
1481: i = nexti;
1482: }
1484: /* shift the diagonals when zero pivot is detected */
1485: /* compute rs=sum of abs(off-diagonal) */
1486: rs = 0.0;
1487: jmin = bi[k]+1;
1488: nz = bi[k+1] - jmin;
1489: bcol = bj + jmin;
1490: while (nz--){
1491: rs += PetscAbsScalar(rtmp[*bcol]);
1492: bcol++;
1493: }
1495: sctx.rs = rs;
1496: sctx.pv = dk;
1497: MatCholeskyCheckShift_inline(info,sctx,k,newshift);
1499: if (newshift == 1) {
1500: if (!sctx.shift_amount) {
1501: sctx.shift_amount = 1e-5;
1502: }
1503: break;
1504: }
1505:
1506: /* copy data into U(k,:) */
1507: ba[bi[k]] = 1.0/dk; /* U(k,k) */
1508: jmin = bi[k]+1; jmax = bi[k+1];
1509: if (jmin < jmax) {
1510: for (j=jmin; j<jmax; j++){
1511: col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1512: }
1513: /* add the k-th row into il and jl */
1514: il[k] = jmin;
1515: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1516: }
1517: }
1518: } while (sctx.chshift);
1519: PetscFree(il);
1521: ISRestoreIndices(ip,&rip);
1522: ISRestoreIndices(iip,&riip);
1524: ISIdentity(ip,&perm_identity);
1525: if (perm_identity){
1526: (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1527: (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1528: (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1529: (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1530: } else {
1531: (B)->ops->solve = MatSolve_SeqSBAIJ_1;
1532: (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1533: (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
1534: (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
1535: }
1537: C->assembled = PETSC_TRUE;
1538: C->preallocated = PETSC_TRUE;
1539: PetscLogFlops(C->rmap->n);
1540: if (sctx.nshift){
1541: if (shiftnz) {
1542: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1543: } else if (shiftpd) {
1544: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1545: }
1546: }
1547: return(0);
1548: }
1552: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1553: {
1554: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1555: Mat_SeqSBAIJ *b;
1556: PetscErrorCode ierr;
1557: PetscTruth perm_identity,missing;
1558: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1559: const PetscInt *rip,*riip;
1560: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1561: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1562: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1563: PetscReal fill=info->fill,levels=info->levels;
1564: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1565: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1566: PetscBT lnkbt;
1567: IS iperm;
1568:
1570: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1571: MatMissingDiagonal(A,&missing,&d);
1572: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1573: ISIdentity(perm,&perm_identity);
1574: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1576: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1577: ui[0] = 0;
1579: /* ICC(0) without matrix ordering: simply copies fill pattern */
1580: if (!levels && perm_identity) {
1582: for (i=0; i<am; i++) {
1583: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1584: }
1585: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1586: cols = uj;
1587: for (i=0; i<am; i++) {
1588: aj = a->j + a->diag[i];
1589: ncols = ui[i+1] - ui[i];
1590: for (j=0; j<ncols; j++) *cols++ = *aj++;
1591: }
1592: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1593: ISGetIndices(iperm,&riip);
1594: ISGetIndices(perm,&rip);
1596: /* initialization */
1597: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
1599: /* jl: linked list for storing indices of the pivot rows
1600: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1601: PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1602: il = jl + am;
1603: uj_ptr = (PetscInt**)(il + am);
1604: uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1605: for (i=0; i<am; i++){
1606: jl[i] = am; il[i] = 0;
1607: }
1609: /* create and initialize a linked list for storing column indices of the active row k */
1610: nlnk = am + 1;
1611: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
1613: /* initial FreeSpace size is fill*(ai[am]+1) */
1614: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1615: current_space = free_space;
1616: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1617: current_space_lvl = free_space_lvl;
1619: for (k=0; k<am; k++){ /* for each active row k */
1620: /* initialize lnk by the column indices of row rip[k] of A */
1621: nzk = 0;
1622: ncols = ai[rip[k]+1] - ai[rip[k]];
1623: if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1624: ncols_upper = 0;
1625: for (j=0; j<ncols; j++){
1626: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1627: if (riip[i] >= k){ /* only take upper triangular entry */
1628: ajtmp[ncols_upper] = i;
1629: ncols_upper++;
1630: }
1631: }
1632: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
1633: nzk += nlnk;
1635: /* update lnk by computing fill-in for each pivot row to be merged in */
1636: prow = jl[k]; /* 1st pivot row */
1637:
1638: while (prow < k){
1639: nextprow = jl[prow];
1640:
1641: /* merge prow into k-th row */
1642: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1643: jmax = ui[prow+1];
1644: ncols = jmax-jmin;
1645: i = jmin - ui[prow];
1646: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1647: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
1648: j = *(uj - 1);
1649: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1650: nzk += nlnk;
1652: /* update il and jl for prow */
1653: if (jmin < jmax){
1654: il[prow] = jmin;
1655: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1656: }
1657: prow = nextprow;
1658: }
1660: /* if free space is not available, make more free space */
1661: if (current_space->local_remaining<nzk) {
1662: i = am - k + 1; /* num of unfactored rows */
1663: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1664: PetscFreeSpaceGet(i,¤t_space);
1665: PetscFreeSpaceGet(i,¤t_space_lvl);
1666: reallocs++;
1667: }
1669: /* copy data into free_space and free_space_lvl, then initialize lnk */
1670: if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1671: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1673: /* add the k-th row into il and jl */
1674: if (nzk > 1){
1675: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1676: jl[k] = jl[i]; jl[i] = k;
1677: il[k] = ui[k] + 1;
1678: }
1679: uj_ptr[k] = current_space->array;
1680: uj_lvl_ptr[k] = current_space_lvl->array;
1682: current_space->array += nzk;
1683: current_space->local_used += nzk;
1684: current_space->local_remaining -= nzk;
1686: current_space_lvl->array += nzk;
1687: current_space_lvl->local_used += nzk;
1688: current_space_lvl->local_remaining -= nzk;
1690: ui[k+1] = ui[k] + nzk;
1691: }
1693: #if defined(PETSC_USE_INFO)
1694: if (ai[am] != 0) {
1695: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1696: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1697: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1698: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1699: } else {
1700: PetscInfo(A,"Empty matrix.\n");
1701: }
1702: #endif
1704: ISRestoreIndices(perm,&rip);
1705: ISRestoreIndices(iperm,&riip);
1706: PetscFree(jl);
1707: PetscFree(ajtmp);
1709: /* destroy list of free space and other temporary array(s) */
1710: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1711: PetscFreeSpaceContiguous(&free_space,uj);
1712: PetscIncompleteLLDestroy(lnk,lnkbt);
1713: PetscFreeSpaceDestroy(free_space_lvl);
1715: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1717: /* put together the new matrix in MATSEQSBAIJ format */
1719: b = (Mat_SeqSBAIJ*)(fact)->data;
1720: b->singlemalloc = PETSC_FALSE;
1721: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1722: b->j = uj;
1723: b->i = ui;
1724: b->diag = 0;
1725: b->ilen = 0;
1726: b->imax = 0;
1727: b->row = perm;
1728: b->col = perm;
1729: PetscObjectReference((PetscObject)perm);
1730: PetscObjectReference((PetscObject)perm);
1731: b->icol = iperm;
1732: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1733: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1734: PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1735: b->maxnz = b->nz = ui[am];
1736: b->free_a = PETSC_TRUE;
1737: b->free_ij = PETSC_TRUE;
1738:
1739: (fact)->info.factor_mallocs = reallocs;
1740: (fact)->info.fill_ratio_given = fill;
1741: if (ai[am] != 0) {
1742: (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1743: } else {
1744: (fact)->info.fill_ratio_needed = 0.0;
1745: }
1746: (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1747: return(0);
1748: }
1752: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1753: {
1754: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1755: Mat_SeqSBAIJ *b;
1756: PetscErrorCode ierr;
1757: PetscTruth perm_identity;
1758: PetscReal fill = info->fill;
1759: const PetscInt *rip,*riip;
1760: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1761: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1762: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1763: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1764: PetscBT lnkbt;
1765: IS iperm;
1768: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1769: /* check whether perm is the identity mapping */
1770: ISIdentity(perm,&perm_identity);
1771: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1772: ISGetIndices(iperm,&riip);
1773: ISGetIndices(perm,&rip);
1775: /* initialization */
1776: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1777: ui[0] = 0;
1779: /* jl: linked list for storing indices of the pivot rows
1780: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1781: PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1782: il = jl + am;
1783: cols = il + am;
1784: ui_ptr = (PetscInt**)(cols + am);
1785: for (i=0; i<am; i++){
1786: jl[i] = am; il[i] = 0;
1787: }
1789: /* create and initialize a linked list for storing column indices of the active row k */
1790: nlnk = am + 1;
1791: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
1793: /* initial FreeSpace size is fill*(ai[am]+1) */
1794: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1795: current_space = free_space;
1797: for (k=0; k<am; k++){ /* for each active row k */
1798: /* initialize lnk by the column indices of row rip[k] of A */
1799: nzk = 0;
1800: ncols = ai[rip[k]+1] - ai[rip[k]];
1801: if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1802: ncols_upper = 0;
1803: for (j=0; j<ncols; j++){
1804: i = riip[*(aj + ai[rip[k]] + j)];
1805: if (i >= k){ /* only take upper triangular entry */
1806: cols[ncols_upper] = i;
1807: ncols_upper++;
1808: }
1809: }
1810: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1811: nzk += nlnk;
1813: /* update lnk by computing fill-in for each pivot row to be merged in */
1814: prow = jl[k]; /* 1st pivot row */
1815:
1816: while (prow < k){
1817: nextprow = jl[prow];
1818: /* merge prow into k-th row */
1819: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1820: jmax = ui[prow+1];
1821: ncols = jmax-jmin;
1822: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1823: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1824: nzk += nlnk;
1826: /* update il and jl for prow */
1827: if (jmin < jmax){
1828: il[prow] = jmin;
1829: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1830: }
1831: prow = nextprow;
1832: }
1834: /* if free space is not available, make more free space */
1835: if (current_space->local_remaining<nzk) {
1836: i = am - k + 1; /* num of unfactored rows */
1837: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1838: PetscFreeSpaceGet(i,¤t_space);
1839: reallocs++;
1840: }
1842: /* copy data into free space, then initialize lnk */
1843: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
1845: /* add the k-th row into il and jl */
1846: if (nzk-1 > 0){
1847: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1848: jl[k] = jl[i]; jl[i] = k;
1849: il[k] = ui[k] + 1;
1850: }
1851: ui_ptr[k] = current_space->array;
1852: current_space->array += nzk;
1853: current_space->local_used += nzk;
1854: current_space->local_remaining -= nzk;
1856: ui[k+1] = ui[k] + nzk;
1857: }
1859: #if defined(PETSC_USE_INFO)
1860: if (ai[am] != 0) {
1861: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1862: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1863: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1864: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1865: } else {
1866: PetscInfo(A,"Empty matrix.\n");
1867: }
1868: #endif
1870: ISRestoreIndices(perm,&rip);
1871: ISRestoreIndices(iperm,&riip);
1872: PetscFree(jl);
1874: /* destroy list of free space and other temporary array(s) */
1875: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1876: PetscFreeSpaceContiguous(&free_space,uj);
1877: PetscLLDestroy(lnk,lnkbt);
1879: /* put together the new matrix in MATSEQSBAIJ format */
1881: b = (Mat_SeqSBAIJ*)(fact)->data;
1882: b->singlemalloc = PETSC_FALSE;
1883: b->free_a = PETSC_TRUE;
1884: b->free_ij = PETSC_TRUE;
1885: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1886: b->j = uj;
1887: b->i = ui;
1888: b->diag = 0;
1889: b->ilen = 0;
1890: b->imax = 0;
1891: b->row = perm;
1892: b->col = perm;
1893: PetscObjectReference((PetscObject)perm);
1894: PetscObjectReference((PetscObject)perm);
1895: b->icol = iperm;
1896: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1897: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1898: PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1899: b->maxnz = b->nz = ui[am];
1900:
1901: (fact)->info.factor_mallocs = reallocs;
1902: (fact)->info.fill_ratio_given = fill;
1903: if (ai[am] != 0) {
1904: (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1905: } else {
1906: (fact)->info.fill_ratio_needed = 0.0;
1907: }
1908: (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1909: return(0);
1910: }