Actual source code: baijfact.c
1: #define PETSCMAT_DLL
3: /*
4: Factorization code for BAIJ format.
5: */
6: #include ../src/mat/impls/baij/seq/baij.h
7: #include ../src/inline/ilu.h
9: /* ------------------------------------------------------------*/
10: /*
11: Version for when blocks are 2 by 2
12: */
15: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat B,Mat A,const MatFactorInfo *info)
16: {
17: Mat C = B;
18: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
19: IS isrow = b->row,isicol = b->icol;
21: const PetscInt *r,*ic;
22: PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j;
23: PetscInt *ajtmpold,*ajtmp,nz,row;
24: PetscInt *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
25: MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
26: MatScalar p1,p2,p3,p4;
27: MatScalar *ba = b->a,*aa = a->a;
28: PetscReal shift = info->shiftinblocks;
31: ISGetIndices(isrow,&r);
32: ISGetIndices(isicol,&ic);
33: PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);
35: for (i=0; i<n; i++) {
36: nz = bi[i+1] - bi[i];
37: ajtmp = bj + bi[i];
38: for (j=0; j<nz; j++) {
39: x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
40: }
41: /* load in initial (unfactored row) */
42: idx = r[i];
43: nz = ai[idx+1] - ai[idx];
44: ajtmpold = aj + ai[idx];
45: v = aa + 4*ai[idx];
46: for (j=0; j<nz; j++) {
47: x = rtmp+4*ic[ajtmpold[j]];
48: x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
49: v += 4;
50: }
51: row = *ajtmp++;
52: while (row < i) {
53: pc = rtmp + 4*row;
54: p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
55: if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
56: pv = ba + 4*diag_offset[row];
57: pj = bj + diag_offset[row] + 1;
58: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
59: pc[0] = m1 = p1*x1 + p3*x2;
60: pc[1] = m2 = p2*x1 + p4*x2;
61: pc[2] = m3 = p1*x3 + p3*x4;
62: pc[3] = m4 = p2*x3 + p4*x4;
63: nz = bi[row+1] - diag_offset[row] - 1;
64: pv += 4;
65: for (j=0; j<nz; j++) {
66: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
67: x = rtmp + 4*pj[j];
68: x[0] -= m1*x1 + m3*x2;
69: x[1] -= m2*x1 + m4*x2;
70: x[2] -= m1*x3 + m3*x4;
71: x[3] -= m2*x3 + m4*x4;
72: pv += 4;
73: }
74: PetscLogFlops(16*nz+12);
75: }
76: row = *ajtmp++;
77: }
78: /* finished row so stick it into b->a */
79: pv = ba + 4*bi[i];
80: pj = bj + bi[i];
81: nz = bi[i+1] - bi[i];
82: for (j=0; j<nz; j++) {
83: x = rtmp+4*pj[j];
84: pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
85: pv += 4;
86: }
87: /* invert diagonal block */
88: w = ba + 4*diag_offset[i];
89: Kernel_A_gets_inverse_A_2(w,shift);
90: }
92: PetscFree(rtmp);
93: ISRestoreIndices(isicol,&ic);
94: ISRestoreIndices(isrow,&r);
95: C->ops->solve = MatSolve_SeqBAIJ_2;
96: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2;
97: C->assembled = PETSC_TRUE;
98: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
99: return(0);
100: }
101: /*
102: Version for when blocks are 2 by 2 Using natural ordering
103: */
106: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
107: {
108: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
110: PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j;
111: PetscInt *ajtmpold,*ajtmp,nz,row;
112: PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
113: MatScalar *pv,*v,*rtmp,*pc,*w,*x;
114: MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
115: MatScalar *ba = b->a,*aa = a->a;
116: PetscReal shift = info->shiftinblocks;
119: PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);
121: for (i=0; i<n; i++) {
122: nz = bi[i+1] - bi[i];
123: ajtmp = bj + bi[i];
124: for (j=0; j<nz; j++) {
125: x = rtmp+4*ajtmp[j];
126: x[0] = x[1] = x[2] = x[3] = 0.0;
127: }
128: /* load in initial (unfactored row) */
129: nz = ai[i+1] - ai[i];
130: ajtmpold = aj + ai[i];
131: v = aa + 4*ai[i];
132: for (j=0; j<nz; j++) {
133: x = rtmp+4*ajtmpold[j];
134: x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
135: v += 4;
136: }
137: row = *ajtmp++;
138: while (row < i) {
139: pc = rtmp + 4*row;
140: p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
141: if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
142: pv = ba + 4*diag_offset[row];
143: pj = bj + diag_offset[row] + 1;
144: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
145: pc[0] = m1 = p1*x1 + p3*x2;
146: pc[1] = m2 = p2*x1 + p4*x2;
147: pc[2] = m3 = p1*x3 + p3*x4;
148: pc[3] = m4 = p2*x3 + p4*x4;
149: nz = bi[row+1] - diag_offset[row] - 1;
150: pv += 4;
151: for (j=0; j<nz; j++) {
152: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
153: x = rtmp + 4*pj[j];
154: x[0] -= m1*x1 + m3*x2;
155: x[1] -= m2*x1 + m4*x2;
156: x[2] -= m1*x3 + m3*x4;
157: x[3] -= m2*x3 + m4*x4;
158: pv += 4;
159: }
160: PetscLogFlops(16*nz+12);
161: }
162: row = *ajtmp++;
163: }
164: /* finished row so stick it into b->a */
165: pv = ba + 4*bi[i];
166: pj = bj + bi[i];
167: nz = bi[i+1] - bi[i];
168: for (j=0; j<nz; j++) {
169: x = rtmp+4*pj[j];
170: pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
171: pv += 4;
172: }
173: /* invert diagonal block */
174: w = ba + 4*diag_offset[i];
175: Kernel_A_gets_inverse_A_2(w,shift);
176: }
178: PetscFree(rtmp);
179: C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering;
180: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
181: C->assembled = PETSC_TRUE;
182: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
183: return(0);
184: }
186: /* ----------------------------------------------------------- */
187: /*
188: Version for when blocks are 1 by 1.
189: */
192: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat C,Mat A,const MatFactorInfo *info)
193: {
194: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
195: IS isrow = b->row,isicol = b->icol;
197: const PetscInt *r,*ic;
198: PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j;
199: PetscInt *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
200: PetscInt *diag_offset = b->diag,diag,*pj;
201: MatScalar *pv,*v,*rtmp,multiplier,*pc;
202: MatScalar *ba = b->a,*aa = a->a;
203: PetscTruth row_identity, col_identity;
206: ISGetIndices(isrow,&r);
207: ISGetIndices(isicol,&ic);
208: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
210: for (i=0; i<n; i++) {
211: nz = bi[i+1] - bi[i];
212: ajtmp = bj + bi[i];
213: for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
215: /* load in initial (unfactored row) */
216: nz = ai[r[i]+1] - ai[r[i]];
217: ajtmpold = aj + ai[r[i]];
218: v = aa + ai[r[i]];
219: for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j];
221: row = *ajtmp++;
222: while (row < i) {
223: pc = rtmp + row;
224: if (*pc != 0.0) {
225: pv = ba + diag_offset[row];
226: pj = bj + diag_offset[row] + 1;
227: multiplier = *pc * *pv++;
228: *pc = multiplier;
229: nz = bi[row+1] - diag_offset[row] - 1;
230: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
231: PetscLogFlops(1+2*nz);
232: }
233: row = *ajtmp++;
234: }
235: /* finished row so stick it into b->a */
236: pv = ba + bi[i];
237: pj = bj + bi[i];
238: nz = bi[i+1] - bi[i];
239: for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
240: diag = diag_offset[i] - bi[i];
241: /* check pivot entry for current row */
242: if (pv[diag] == 0.0) {
243: SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
244: }
245: pv[diag] = 1.0/pv[diag];
246: }
248: PetscFree(rtmp);
249: ISRestoreIndices(isicol,&ic);
250: ISRestoreIndices(isrow,&r);
251: ISIdentity(isrow,&row_identity);
252: ISIdentity(isicol,&col_identity);
253: if (row_identity && col_identity) {
254: C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering;
255: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
256: } else {
257: C->ops->solve = MatSolve_SeqBAIJ_1;
258: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1;
259: }
260: C->assembled = PETSC_TRUE;
261: PetscLogFlops(C->cmap->n);
262: return(0);
263: }
268: PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A,MatFactorType ftype,Mat *B)
269: {
270: PetscInt n = A->rmap->n;
271: PetscErrorCode ierr;
274: MatCreate(((PetscObject)A)->comm,B);
275: MatSetSizes(*B,n,n,n,n);
276: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
277: MatSetType(*B,MATSEQBAIJ);
278: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqBAIJ;
279: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ;
280: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
281: MatSetType(*B,MATSEQSBAIJ);
282: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
283: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqBAIJ;
284: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ;
285: } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
286: (*B)->factor = ftype;
287: return(0);
288: }
294: PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
295: {
297: *flg = PETSC_TRUE;
298: return(0);
299: }
302: /* ----------------------------------------------------------- */
305: PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
306: {
308: Mat C;
311: MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);
312: MatLUFactorSymbolic(C,A,row,col,info);
313: MatLUFactorNumeric(C,A,info);
314: A->ops->solve = C->ops->solve;
315: A->ops->solvetranspose = C->ops->solvetranspose;
316: MatHeaderCopy(A,C);
317: PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
318: return(0);
319: }
321: #include ../src/mat/impls/sbaij/seq/sbaij.h
324: PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C,Mat A,const MatFactorInfo *info)
325: {
327: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
328: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
329: IS ip=b->row;
330: const PetscInt *rip;
331: PetscInt i,j,mbs=a->mbs,bs=A->rmap->bs,*bi=b->i,*bj=b->j,*bcol;
332: PetscInt *ai=a->i,*aj=a->j;
333: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
334: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
335: PetscReal zeropivot,rs,shiftnz;
336: PetscReal shiftpd;
337: ChShift_Ctx sctx;
338: PetscInt newshift;
341: if (bs > 1) {
342: if (!a->sbaijMat){
343: MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);
344: }
345: (a->sbaijMat)->ops->choleskyfactornumeric(C,a->sbaijMat,info);
346: MatDestroy(a->sbaijMat);
347: a->sbaijMat = PETSC_NULL;
348: return(0);
349: }
350:
351: /* initialization */
352: shiftnz = info->shiftnz;
353: shiftpd = info->shiftpd;
354: zeropivot = info->zeropivot;
356: ISGetIndices(ip,&rip);
357: nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
358: PetscMalloc(nz,&il);
359: jl = il + mbs;
360: rtmp = (MatScalar*)(jl + mbs);
362: sctx.shift_amount = 0;
363: sctx.nshift = 0;
364: do {
365: sctx.chshift = PETSC_FALSE;
366: for (i=0; i<mbs; i++) {
367: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
368: }
369:
370: for (k = 0; k<mbs; k++){
371: bval = ba + bi[k];
372: /* initialize k-th row by the perm[k]-th row of A */
373: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
374: for (j = jmin; j < jmax; j++){
375: col = rip[aj[j]];
376: if (col >= k){ /* only take upper triangular entry */
377: rtmp[col] = aa[j];
378: *bval++ = 0.0; /* for in-place factorization */
379: }
380: }
381:
382: /* shift the diagonal of the matrix */
383: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
385: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
386: dk = rtmp[k];
387: i = jl[k]; /* first row to be added to k_th row */
389: while (i < k){
390: nexti = jl[i]; /* next row to be added to k_th row */
392: /* compute multiplier, update diag(k) and U(i,k) */
393: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
394: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
395: dk += uikdi*ba[ili];
396: ba[ili] = uikdi; /* -U(i,k) */
398: /* add multiple of row i to k-th row */
399: jmin = ili + 1; jmax = bi[i+1];
400: if (jmin < jmax){
401: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
402: /* update il and jl for row i */
403: il[i] = jmin;
404: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
405: }
406: i = nexti;
407: }
409: /* shift the diagonals when zero pivot is detected */
410: /* compute rs=sum of abs(off-diagonal) */
411: rs = 0.0;
412: jmin = bi[k]+1;
413: nz = bi[k+1] - jmin;
414: if (nz){
415: bcol = bj + jmin;
416: while (nz--){
417: rs += PetscAbsScalar(rtmp[*bcol]);
418: bcol++;
419: }
420: }
422: sctx.rs = rs;
423: sctx.pv = dk;
424: MatCholeskyCheckShift_inline(info,sctx,k,newshift);
425: if (newshift == 1) break;
427: /* copy data into U(k,:) */
428: ba[bi[k]] = 1.0/dk; /* U(k,k) */
429: jmin = bi[k]+1; jmax = bi[k+1];
430: if (jmin < jmax) {
431: for (j=jmin; j<jmax; j++){
432: col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
433: }
434: /* add the k-th row into il and jl */
435: il[k] = jmin;
436: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
437: }
438: }
439: } while (sctx.chshift);
440: PetscFree(il);
442: ISRestoreIndices(ip,&rip);
443: C->assembled = PETSC_TRUE;
444: C->preallocated = PETSC_TRUE;
445: PetscLogFlops(C->rmap->N);
446: if (sctx.nshift){
447: if (shiftpd) {
448: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
449: } else if (shiftnz) {
450: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
451: }
452: }
453: return(0);
454: }
458: PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
459: {
460: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
461: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
463: PetscInt i,j,am=a->mbs;
464: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
465: PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
466: MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
467: PetscReal zeropivot,rs,shiftnz;
468: PetscReal shiftpd;
469: ChShift_Ctx sctx;
470: PetscInt newshift;
473: /* initialization */
474: shiftnz = info->shiftnz;
475: shiftpd = info->shiftpd;
476: zeropivot = info->zeropivot;
478: nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar);
479: PetscMalloc(nz,&il);
480: jl = il + am;
481: rtmp = (MatScalar*)(jl + am);
483: sctx.shift_amount = 0;
484: sctx.nshift = 0;
485: do {
486: sctx.chshift = PETSC_FALSE;
487: for (i=0; i<am; i++) {
488: rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
489: }
491: for (k = 0; k<am; k++){
492: /* initialize k-th row with elements nonzero in row perm(k) of A */
493: nz = ai[k+1] - ai[k];
494: acol = aj + ai[k];
495: aval = aa + ai[k];
496: bval = ba + bi[k];
497: while (nz -- ){
498: if (*acol < k) { /* skip lower triangular entries */
499: acol++; aval++;
500: } else {
501: rtmp[*acol++] = *aval++;
502: *bval++ = 0.0; /* for in-place factorization */
503: }
504: }
505:
506: /* shift the diagonal of the matrix */
507: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
508:
509: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
510: dk = rtmp[k];
511: i = jl[k]; /* first row to be added to k_th row */
513: while (i < k){
514: nexti = jl[i]; /* next row to be added to k_th row */
515: /* compute multiplier, update D(k) and U(i,k) */
516: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
517: uikdi = - ba[ili]*ba[bi[i]];
518: dk += uikdi*ba[ili];
519: ba[ili] = uikdi; /* -U(i,k) */
521: /* add multiple of row i to k-th row ... */
522: jmin = ili + 1;
523: nz = bi[i+1] - jmin;
524: if (nz > 0){
525: bcol = bj + jmin;
526: bval = ba + jmin;
527: while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
528: /* update il and jl for i-th row */
529: il[i] = jmin;
530: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
531: }
532: i = nexti;
533: }
535: /* shift the diagonals when zero pivot is detected */
536: /* compute rs=sum of abs(off-diagonal) */
537: rs = 0.0;
538: jmin = bi[k]+1;
539: nz = bi[k+1] - jmin;
540: if (nz){
541: bcol = bj + jmin;
542: while (nz--){
543: rs += PetscAbsScalar(rtmp[*bcol]);
544: bcol++;
545: }
546: }
548: sctx.rs = rs;
549: sctx.pv = dk;
550: MatCholeskyCheckShift_inline(info,sctx,k,newshift);
551: if (newshift == 1) break; /* sctx.shift_amount is updated */
553: /* copy data into U(k,:) */
554: ba[bi[k]] = 1.0/dk;
555: jmin = bi[k]+1;
556: nz = bi[k+1] - jmin;
557: if (nz){
558: bcol = bj + jmin;
559: bval = ba + jmin;
560: while (nz--){
561: *bval++ = rtmp[*bcol];
562: rtmp[*bcol++] = 0.0;
563: }
564: /* add k-th row into il and jl */
565: il[k] = jmin;
566: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
567: }
568: }
569: } while (sctx.chshift);
570: PetscFree(il);
571:
572: C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
573: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
574: C->assembled = PETSC_TRUE;
575: C->preallocated = PETSC_TRUE;
576: PetscLogFlops(C->rmap->N);
577: if (sctx.nshift){
578: if (shiftnz) {
579: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
580: } else if (shiftpd) {
581: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
582: }
583: }
584: return(0);
585: }
587: #include petscbt.h
588: #include ../src/mat/utils/freespace.h
591: PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
592: {
593: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
594: Mat_SeqSBAIJ *b;
595: Mat B;
596: PetscErrorCode ierr;
597: PetscTruth perm_identity;
598: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->rmap->bs,*ui;
599: const PetscInt *rip;
600: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
601: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
602: PetscReal fill=info->fill,levels=info->levels;
603: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
604: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
605: PetscBT lnkbt;
608: if (bs > 1){
609: if (!a->sbaijMat){
610: MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);
611: }
612: (fact)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */
613: MatICCFactorSymbolic(fact,a->sbaijMat,perm,info);
614: return(0);
615: }
617: ISIdentity(perm,&perm_identity);
618: ISGetIndices(perm,&rip);
620: /* special case that simply copies fill pattern */
621: if (!levels && perm_identity) {
622: MatMarkDiagonal_SeqBAIJ(A);
623: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
624: for (i=0; i<am; i++) {
625: ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
626: }
627: B = fact;
628: MatSeqSBAIJSetPreallocation(B,1,0,ui);
631: b = (Mat_SeqSBAIJ*)B->data;
632: uj = b->j;
633: for (i=0; i<am; i++) {
634: aj = a->j + a->diag[i];
635: for (j=0; j<ui[i]; j++){
636: *uj++ = *aj++;
637: }
638: b->ilen[i] = ui[i];
639: }
640: PetscFree(ui);
641: B->factor = MAT_FACTOR_NONE;
642: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
643: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
644: B->factor = MAT_FACTOR_ICC;
646: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
647: return(0);
648: }
650: /* initialization */
651: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
652: ui[0] = 0;
653: PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);
655: /* jl: linked list for storing indices of the pivot rows
656: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
657: PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);
658: il = jl + am;
659: uj_ptr = (PetscInt**)(il + am);
660: uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
661: for (i=0; i<am; i++){
662: jl[i] = am; il[i] = 0;
663: }
665: /* create and initialize a linked list for storing column indices of the active row k */
666: nlnk = am + 1;
667: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
669: /* initial FreeSpace size is fill*(ai[am]+1) */
670: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
671: current_space = free_space;
672: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
673: current_space_lvl = free_space_lvl;
675: for (k=0; k<am; k++){ /* for each active row k */
676: /* initialize lnk by the column indices of row rip[k] of A */
677: nzk = 0;
678: ncols = ai[rip[k]+1] - ai[rip[k]];
679: ncols_upper = 0;
680: cols = cols_lvl + am;
681: for (j=0; j<ncols; j++){
682: i = rip[*(aj + ai[rip[k]] + j)];
683: if (i >= k){ /* only take upper triangular entry */
684: cols[ncols_upper] = i;
685: cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */
686: ncols_upper++;
687: }
688: }
689: PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);
690: nzk += nlnk;
692: /* update lnk by computing fill-in for each pivot row to be merged in */
693: prow = jl[k]; /* 1st pivot row */
694:
695: while (prow < k){
696: nextprow = jl[prow];
697:
698: /* merge prow into k-th row */
699: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
700: jmax = ui[prow+1];
701: ncols = jmax-jmin;
702: i = jmin - ui[prow];
703: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
704: for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
705: PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);
706: nzk += nlnk;
708: /* update il and jl for prow */
709: if (jmin < jmax){
710: il[prow] = jmin;
711: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
712: }
713: prow = nextprow;
714: }
716: /* if free space is not available, make more free space */
717: if (current_space->local_remaining<nzk) {
718: i = am - k + 1; /* num of unfactored rows */
719: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
720: PetscFreeSpaceGet(i,¤t_space);
721: PetscFreeSpaceGet(i,¤t_space_lvl);
722: reallocs++;
723: }
725: /* copy data into free_space and free_space_lvl, then initialize lnk */
726: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
728: /* add the k-th row into il and jl */
729: if (nzk-1 > 0){
730: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
731: jl[k] = jl[i]; jl[i] = k;
732: il[k] = ui[k] + 1;
733: }
734: uj_ptr[k] = current_space->array;
735: uj_lvl_ptr[k] = current_space_lvl->array;
737: current_space->array += nzk;
738: current_space->local_used += nzk;
739: current_space->local_remaining -= nzk;
741: current_space_lvl->array += nzk;
742: current_space_lvl->local_used += nzk;
743: current_space_lvl->local_remaining -= nzk;
745: ui[k+1] = ui[k] + nzk;
746: }
748: #if defined(PETSC_USE_INFO)
749: if (ai[am] != 0) {
750: PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]);
751: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
752: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
753: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
754: } else {
755: PetscInfo(A,"Empty matrix.\n");
756: }
757: #endif
759: ISRestoreIndices(perm,&rip);
760: PetscFree(jl);
761: PetscFree(cols_lvl);
763: /* destroy list of free space and other temporary array(s) */
764: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
765: PetscFreeSpaceContiguous(&free_space,uj);
766: PetscIncompleteLLDestroy(lnk,lnkbt);
767: PetscFreeSpaceDestroy(free_space_lvl);
769: /* put together the new matrix in MATSEQSBAIJ format */
770: B = fact;
771: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
773: b = (Mat_SeqSBAIJ*)B->data;
774: b->singlemalloc = PETSC_FALSE;
775: b->free_a = PETSC_TRUE;
776: b->free_ij = PETSC_TRUE;
777: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
778: b->j = uj;
779: b->i = ui;
780: b->diag = 0;
781: b->ilen = 0;
782: b->imax = 0;
783: b->row = perm;
784: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
785: PetscObjectReference((PetscObject)perm);
786: b->icol = perm;
787: PetscObjectReference((PetscObject)perm);
788: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
789: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
790: b->maxnz = b->nz = ui[am];
791:
792: B->info.factor_mallocs = reallocs;
793: B->info.fill_ratio_given = fill;
794: if (ai[am] != 0) {
795: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
796: } else {
797: B->info.fill_ratio_needed = 0.0;
798: }
799: if (perm_identity){
800: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
801: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
802: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
803: } else {
804: (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
805: }
806: return(0);
807: }
811: PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
812: {
813: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
814: Mat_SeqSBAIJ *b;
815: Mat B;
816: PetscErrorCode ierr;
817: PetscTruth perm_identity;
818: PetscReal fill = info->fill;
819: const PetscInt *rip;
820: PetscInt i,mbs=a->mbs,bs=A->rmap->bs,*ai=a->i,*aj=a->j,reallocs=0,prow;
821: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
822: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
823: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
824: PetscBT lnkbt;
827: if (bs > 1) { /* convert to seqsbaij */
828: if (!a->sbaijMat){
829: MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);
830: }
831: (fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */
832: MatCholeskyFactorSymbolic(fact,a->sbaijMat,perm,info);
833: return(0);
834: }
836: /* check whether perm is the identity mapping */
837: ISIdentity(perm,&perm_identity);
838: if (!perm_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
839: ISGetIndices(perm,&rip);
841: /* initialization */
842: PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);
843: ui[0] = 0;
845: /* jl: linked list for storing indices of the pivot rows
846: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
847: PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);
848: il = jl + mbs;
849: cols = il + mbs;
850: ui_ptr = (PetscInt**)(cols + mbs);
851: for (i=0; i<mbs; i++){
852: jl[i] = mbs; il[i] = 0;
853: }
855: /* create and initialize a linked list for storing column indices of the active row k */
856: nlnk = mbs + 1;
857: PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);
859: /* initial FreeSpace size is fill*(ai[mbs]+1) */
860: PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);
861: current_space = free_space;
863: for (k=0; k<mbs; k++){ /* for each active row k */
864: /* initialize lnk by the column indices of row rip[k] of A */
865: nzk = 0;
866: ncols = ai[rip[k]+1] - ai[rip[k]];
867: ncols_upper = 0;
868: for (j=0; j<ncols; j++){
869: i = rip[*(aj + ai[rip[k]] + j)];
870: if (i >= k){ /* only take upper triangular entry */
871: cols[ncols_upper] = i;
872: ncols_upper++;
873: }
874: }
875: PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);
876: nzk += nlnk;
878: /* update lnk by computing fill-in for each pivot row to be merged in */
879: prow = jl[k]; /* 1st pivot row */
880:
881: while (prow < k){
882: nextprow = jl[prow];
883: /* merge prow into k-th row */
884: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
885: jmax = ui[prow+1];
886: ncols = jmax-jmin;
887: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
888: PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);
889: nzk += nlnk;
891: /* update il and jl for prow */
892: if (jmin < jmax){
893: il[prow] = jmin;
894: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
895: }
896: prow = nextprow;
897: }
899: /* if free space is not available, make more free space */
900: if (current_space->local_remaining<nzk) {
901: i = mbs - k + 1; /* num of unfactored rows */
902: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
903: PetscFreeSpaceGet(i,¤t_space);
904: reallocs++;
905: }
907: /* copy data into free space, then initialize lnk */
908: PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);
910: /* add the k-th row into il and jl */
911: if (nzk-1 > 0){
912: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
913: jl[k] = jl[i]; jl[i] = k;
914: il[k] = ui[k] + 1;
915: }
916: ui_ptr[k] = current_space->array;
917: current_space->array += nzk;
918: current_space->local_used += nzk;
919: current_space->local_remaining -= nzk;
921: ui[k+1] = ui[k] + nzk;
922: }
924: #if defined(PETSC_USE_INFO)
925: if (ai[mbs] != 0) {
926: PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
927: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
928: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
929: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
930: } else {
931: PetscInfo(A,"Empty matrix.\n");
932: }
933: #endif
935: ISRestoreIndices(perm,&rip);
936: PetscFree(jl);
938: /* destroy list of free space and other temporary array(s) */
939: PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);
940: PetscFreeSpaceContiguous(&free_space,uj);
941: PetscLLDestroy(lnk,lnkbt);
943: /* put together the new matrix in MATSEQSBAIJ format */
944: B = fact;
945: MatSeqSBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
947: b = (Mat_SeqSBAIJ*)B->data;
948: b->singlemalloc = PETSC_FALSE;
949: b->free_a = PETSC_TRUE;
950: b->free_ij = PETSC_TRUE;
951: PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);
952: b->j = uj;
953: b->i = ui;
954: b->diag = 0;
955: b->ilen = 0;
956: b->imax = 0;
957: b->row = perm;
958: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
959: PetscObjectReference((PetscObject)perm);
960: b->icol = perm;
961: PetscObjectReference((PetscObject)perm);
962: PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);
963: PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
964: b->maxnz = b->nz = ui[mbs];
965:
966: B->info.factor_mallocs = reallocs;
967: B->info.fill_ratio_given = fill;
968: if (ai[mbs] != 0) {
969: B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
970: } else {
971: B->info.fill_ratio_needed = 0.0;
972: }
973: if (perm_identity){
974: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
975: } else {
976: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
977: }
978: return(0);
979: }