Actual source code: superlu_dist.c
1: #define PETSCMAT_DLL
3: /*
4: Provides an interface to the SuperLU_DIST_2.2 sparse solver
5: */
7: #include "../src/mat/impls/aij/seq/aij.h"
8: #include "../src/mat/impls/aij/mpi/mpiaij.h"
9: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10: #include "stdlib.h"
11: #endif
13: #if defined(PETSC_USE_64BIT_INDICES)
14: /* ugly SuperLU_Dist variable telling it to use long long int */
15: #define _LONGINT
16: #endif
19: #if defined(PETSC_USE_COMPLEX)
20: #include "superlu_zdefs.h"
21: #else
22: #include "superlu_ddefs.h"
23: #endif
26: typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
27: const char *SuperLU_MatInputModes[] = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};
29: typedef struct {
30: int_t nprow,npcol,*row,*col;
31: gridinfo_t grid;
32: superlu_options_t options;
33: SuperMatrix A_sup;
34: ScalePermstruct_t ScalePermstruct;
35: LUstruct_t LUstruct;
36: int StatPrint;
37: SuperLU_MatInputMode MatInputMode;
38: SOLVEstruct_t SOLVEstruct;
39: fact_t FactPattern;
40: MPI_Comm comm_superlu;
41: #if defined(PETSC_USE_COMPLEX)
42: doublecomplex *val;
43: #else
44: double *val;
45: #endif
47: /* Flag to clean up (non-global) SuperLU objects during Destroy */
48: PetscTruth CleanUpSuperLU_Dist;
49: } Mat_SuperLU_DIST;
61: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
62: {
63: PetscErrorCode ierr;
64: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
65:
67: if (lu->CleanUpSuperLU_Dist) {
68: /* Deallocate SuperLU_DIST storage */
69: if (lu->MatInputMode == GLOBAL) {
70: Destroy_CompCol_Matrix_dist(&lu->A_sup);
71: } else {
72: Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
73: if ( lu->options.SolveInitialized ) {
74: #if defined(PETSC_USE_COMPLEX)
75: zSolveFinalize(&lu->options, &lu->SOLVEstruct);
76: #else
77: dSolveFinalize(&lu->options, &lu->SOLVEstruct);
78: #endif
79: }
80: }
81: Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
82: ScalePermstructFree(&lu->ScalePermstruct);
83: LUstructFree(&lu->LUstruct);
84:
85: /* Release the SuperLU_DIST process grid. */
86: superlu_gridexit(&lu->grid);
87:
88: MPI_Comm_free(&(lu->comm_superlu));
89: }
91: MatDestroy_MPIAIJ(A);
92: return(0);
93: }
97: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
98: {
99: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
100: PetscErrorCode ierr;
101: PetscMPIInt size;
102: PetscInt m=A->rmap->N, N=A->cmap->N;
103: SuperLUStat_t stat;
104: double berr[1];
105: PetscScalar *bptr;
106: PetscInt nrhs=1;
107: Vec x_seq;
108: IS iden;
109: VecScatter scat;
110: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
113: MPI_Comm_size(((PetscObject)A)->comm,&size);
114: if (size > 1) {
115: if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
116: VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
117: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
118: VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
119: ISDestroy(iden);
121: VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
122: VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
123: VecGetArray(x_seq,&bptr);
124: } else { /* distributed mat input */
125: VecCopy(b_mpi,x);
126: VecGetArray(x,&bptr);
127: }
128: } else { /* size == 1 */
129: VecCopy(b_mpi,x);
130: VecGetArray(x,&bptr);
131: }
132:
133: if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
135: PStatInit(&stat); /* Initialize the statistics variables. */
136: if (lu->MatInputMode == GLOBAL) {
137: #if defined(PETSC_USE_COMPLEX)
138: pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
139: &lu->grid, &lu->LUstruct, berr, &stat, &info);
140: #else
141: pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
142: &lu->grid, &lu->LUstruct, berr, &stat, &info);
143: #endif
144: } else { /* distributed mat input */
145: #if defined(PETSC_USE_COMPLEX)
146: pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid,
147: &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
148: if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
149: #else
150: pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid,
151: &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
152: if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
153: #endif
154: }
155: if (lu->options.PrintStat) {
156: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
157: }
158: PStatFree(&stat);
159:
160: if (size > 1) {
161: if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
162: VecRestoreArray(x_seq,&bptr);
163: VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
164: VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
165: VecScatterDestroy(scat);
166: VecDestroy(x_seq);
167: } else {
168: VecRestoreArray(x,&bptr);
169: }
170: } else {
171: VecRestoreArray(x,&bptr);
172: }
173: return(0);
174: }
178: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
179: {
180: Mat *tseq,A_seq = PETSC_NULL;
181: Mat_SeqAIJ *aa,*bb;
182: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
183: PetscErrorCode ierr;
184: PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
185: m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
186: int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
187: PetscMPIInt size,rank;
188: SuperLUStat_t stat;
189: double *berr=0;
190: IS isrow;
191: PetscLogDouble time0,time,time_min,time_max;
192: Mat F_diag=PETSC_NULL;
193: #if defined(PETSC_USE_COMPLEX)
194: doublecomplex *av, *bv;
195: #else
196: double *av, *bv;
197: #endif
200: MPI_Comm_size(((PetscObject)A)->comm,&size);
201: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
202:
203: if (lu->options.PrintStat) { /* collect time for mat conversion */
204: MPI_Barrier(((PetscObject)A)->comm);
205: PetscGetTime(&time0);
206: }
208: if (lu->MatInputMode == GLOBAL) { /* global mat input */
209: if (size > 1) { /* convert mpi A to seq mat A */
210: ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
211: MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
212: ISDestroy(isrow);
213:
214: A_seq = *tseq;
215: PetscFree(tseq);
216: aa = (Mat_SeqAIJ*)A_seq->data;
217: } else {
218: aa = (Mat_SeqAIJ*)A->data;
219: }
221: /* Convert Petsc NR matrix to SuperLU_DIST NC.
222: Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
223: if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
224: if (lu->FactPattern == SamePattern_SameRowPerm){
225: Destroy_CompCol_Matrix_dist(&lu->A_sup);
226: /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
227: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
228: } else {
229: Destroy_CompCol_Matrix_dist(&lu->A_sup);
230: Destroy_LU(N, &lu->grid, &lu->LUstruct);
231: lu->options.Fact = SamePattern;
232: }
233: }
234: #if defined(PETSC_USE_COMPLEX)
235: zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
236: #else
237: dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
238: #endif
240: /* Create compressed column matrix A_sup. */
241: #if defined(PETSC_USE_COMPLEX)
242: zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
243: #else
244: dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
245: #endif
246: } else { /* distributed mat input */
247: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
248: aa=(Mat_SeqAIJ*)(mat->A)->data;
249: bb=(Mat_SeqAIJ*)(mat->B)->data;
250: ai=aa->i; aj=aa->j;
251: bi=bb->i; bj=bb->j;
252: #if defined(PETSC_USE_COMPLEX)
253: av=(doublecomplex*)aa->a;
254: bv=(doublecomplex*)bb->a;
255: #else
256: av=aa->a;
257: bv=bb->a;
258: #endif
259: rstart = A->rmap->rstart;
260: nz = aa->nz + bb->nz;
261: garray = mat->garray;
262:
263: if (lu->options.Fact == DOFACT) {/* first numeric factorization */
264: #if defined(PETSC_USE_COMPLEX)
265: zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
266: #else
267: dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
268: #endif
269: } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
270: if (lu->FactPattern == SamePattern_SameRowPerm){
271: /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
272: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
273: } else {
274: Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
275: lu->options.Fact = SamePattern;
276: }
277: }
278: nz = 0; irow = rstart;
279: for ( i=0; i<m; i++ ) {
280: lu->row[i] = nz;
281: countA = ai[i+1] - ai[i];
282: countB = bi[i+1] - bi[i];
283: ajj = aj + ai[i]; /* ptr to the beginning of this row */
284: bjj = bj + bi[i];
286: /* B part, smaller col index */
287: colA_start = rstart + ajj[0]; /* the smallest global col index of A */
288: jB = 0;
289: for (j=0; j<countB; j++){
290: jcol = garray[bjj[j]];
291: if (jcol > colA_start) {
292: jB = j;
293: break;
294: }
295: lu->col[nz] = jcol;
296: lu->val[nz++] = *bv++;
297: if (j==countB-1) jB = countB;
298: }
300: /* A part */
301: for (j=0; j<countA; j++){
302: lu->col[nz] = rstart + ajj[j];
303: lu->val[nz++] = *av++;
304: }
306: /* B part, larger col index */
307: for (j=jB; j<countB; j++){
308: lu->col[nz] = garray[bjj[j]];
309: lu->val[nz++] = *bv++;
310: }
311: }
312: lu->row[m] = nz;
313: #if defined(PETSC_USE_COMPLEX)
314: zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
315: lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
316: #else
317: dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
318: lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
319: #endif
320: }
321: if (lu->options.PrintStat) {
322: PetscGetTime(&time);
323: time0 = time - time0;
324: }
326: /* Factor the matrix. */
327: PStatInit(&stat); /* Initialize the statistics variables. */
328: CHKMEMQ;
329: if (lu->MatInputMode == GLOBAL) { /* global mat input */
330: #if defined(PETSC_USE_COMPLEX)
331: pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
332: &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
333: #else
334: pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
335: &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
336: #endif
337: } else { /* distributed mat input */
338: #if defined(PETSC_USE_COMPLEX)
339: pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
340: &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
341: if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
342: #else
343: pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
344: &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
345: if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
346: #endif
347: }
349: if (lu->MatInputMode == GLOBAL && size > 1){
350: MatDestroy(A_seq);
351: }
353: if (lu->options.PrintStat) {
354: if (size > 1){
355: MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
356: MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
357: MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
358: time = time/size; /* average time */
359: if (!rank) {
360: PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);
361: }
362: } else {
363: PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n %g\n",time0);
364: }
365: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
366: }
367: PStatFree(&stat);
368: if (size > 1){
369: F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
370: F_diag->assembled = PETSC_TRUE;
371: }
372: (F)->assembled = PETSC_TRUE;
373: (F)->preallocated = PETSC_TRUE;
374: lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
375: return(0);
376: }
378: /* Note the Petsc r and c permutations are ignored */
381: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
382: {
383: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*) (F)->spptr;
384: PetscInt M=A->rmap->N,N=A->cmap->N;
387: /* Initialize the SuperLU process grid. */
388: superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
390: /* Initialize ScalePermstruct and LUstruct. */
391: ScalePermstructInit(M, N, &lu->ScalePermstruct);
392: LUstructInit(M, N, &lu->LUstruct);
393: (F)->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
394: (F)->ops->solve = MatSolve_SuperLU_DIST;
395: return(0);
396: }
401: PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
402: {
404: *type = MAT_SOLVER_SUPERLU_DIST;
405: return(0);
406: }
411: PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
412: {
413: Mat B;
414: Mat_SuperLU_DIST *lu;
415: PetscErrorCode ierr;
416: PetscInt M=A->rmap->N,N=A->cmap->N,indx;
417: PetscMPIInt size;
418: superlu_options_t options;
419: PetscTruth flg;
420: const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
421: const char *prtype[] = {"LargeDiag","NATURAL"};
422: const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
425: /* Create the factorization matrix */
426: MatCreate(((PetscObject)A)->comm,&B);
427: MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);
428: MatSetType(B,((PetscObject)A)->type_name);
429: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
430: MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);
432: B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
433: B->ops->view = MatView_SuperLU_DIST;
434: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);
435: B->factor = MAT_FACTOR_LU;
437: PetscNewLog(B,Mat_SuperLU_DIST,&lu);
438: B->spptr = lu;
440: /* Set the default input options:
441: options.Fact = DOFACT;
442: options.Equil = YES;
443: options.ParSymbFact = NO;
444: options.ColPerm = MMD_AT_PLUS_A;
445: options.RowPerm = LargeDiag;
446: options.ReplaceTinyPivot = YES;
447: options.IterRefine = DOUBLE;
448: options.Trans = NOTRANS;
449: options.SolveInitialized = NO;
450: options.RefineInitialized = NO;
451: options.PrintStat = YES;
452: */
453: set_default_options_dist(&options);
455: MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));
456: MPI_Comm_size(((PetscObject)A)->comm,&size);
457: /* Default num of process columns and rows */
458: lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
459: if (!lu->npcol) lu->npcol = 1;
460: while (lu->npcol > 0) {
461: lu->nprow = PetscMPIIntCast(size/lu->npcol);
462: if (size == lu->nprow * lu->npcol) break;
463: lu->npcol --;
464: }
465:
466: PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");
467: PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
468: PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
469: if (size != lu->nprow * lu->npcol)
470: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
471:
472: lu->MatInputMode = DISTRIBUTED;
473: PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);
474: if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
476: PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
477: if (!flg) {
478: options.Equil = NO;
479: }
481: PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);
482: if (flg) {
483: switch (indx) {
484: case 0:
485: options.RowPerm = LargeDiag;
486: break;
487: case 1:
488: options.RowPerm = NOROWPERM;
489: break;
490: }
491: }
493: PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);
494: if (flg) {
495: switch (indx) {
496: case 0:
497: options.ColPerm = MMD_AT_PLUS_A;
498: break;
499: case 1:
500: options.ColPerm = NATURAL;
501: break;
502: case 2:
503: options.ColPerm = MMD_ATA;
504: break;
505: case 3:
506: options.ColPerm = PARMETIS;
507: break;
508: }
509: }
511: PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
512: if (!flg) {
513: options.ReplaceTinyPivot = NO;
514: }
516: options.ParSymbFact = NO;
517: PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);
518: if (flg){
519: #ifdef PETSC_HAVE_PARMETIS
520: options.ParSymbFact = YES;
521: options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
522: #else
523: printf("parsymbfact needs PARMETIS");
524: #endif
525: }
527: lu->FactPattern = SamePattern;
528: PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);
529: if (flg) {
530: switch (indx) {
531: case 0:
532: lu->FactPattern = SamePattern;
533: break;
534: case 1:
535: lu->FactPattern = SamePattern_SameRowPerm;
536: break;
537: }
538: }
539:
540: options.IterRefine = NOREFINE;
541: PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
542: if (flg) {
543: options.IterRefine = DOUBLE;
544: }
546: if (PetscLogPrintInfo) {
547: options.PrintStat = YES;
548: } else {
549: options.PrintStat = NO;
550: }
551: PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
552: (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);
553: PetscOptionsEnd();
555: lu->options = options;
556: lu->options.Fact = DOFACT;
557: lu->CleanUpSuperLU_Dist = PETSC_TRUE;
558: *F = B;
559: return(0);
560: }
565: PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
566: {
570: MatGetFactor_aij_superlu_dist(A,ftype,F);
571: return(0);
572: }
578: PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
579: {
583: MatGetFactor_aij_superlu_dist(A,ftype,F);
584: return(0);
585: }
590: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
591: {
592: Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr;
593: superlu_options_t options;
594: PetscErrorCode ierr;
597: /* check if matrix is superlu_dist type */
598: if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);
600: options = lu->options;
601: PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
602: PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);
603: PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);
604: PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);
605: PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);
606: PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);
607: PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
608: PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
609: if (options.ColPerm == NATURAL) {
610: PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");
611: } else if (options.ColPerm == MMD_AT_PLUS_A) {
612: PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");
613: } else if (options.ColPerm == MMD_ATA) {
614: PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");
615: } else if (options.ColPerm == PARMETIS) {
616: PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");
617: } else {
618: SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
619: }
621: PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);
622:
623: if (lu->FactPattern == SamePattern){
624: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");
625: } else {
626: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");
627: }
628: return(0);
629: }
633: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
634: {
635: PetscErrorCode ierr;
636: PetscTruth iascii;
637: PetscViewerFormat format;
640: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
641: if (iascii) {
642: PetscViewerGetFormat(viewer,&format);
643: if (format == PETSC_VIEWER_ASCII_INFO) {
644: MatFactorInfo_SuperLU_DIST(A,viewer);
645: }
646: }
647: return(0);
648: }
651: /*MC
652: MAT_SOLVER_SUPERLU_DIST - Parallel direct solver package for LU factorization
654: Works with AIJ matrices
656: Options Database Keys:
657: + -mat_superlu_dist_r <n> - number of rows in processor partition
658: . -mat_superlu_dist_c <n> - number of columns in processor partition
659: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
660: . -mat_superlu_dist_equil - equilibrate the matrix
661: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
662: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
663: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
664: . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
665: . -mat_superlu_dist_iterrefine - use iterative refinement
666: - -mat_superlu_dist_statprint - print factorization information
668: Level: beginner
670: .seealso: PCLU
672: .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage
674: M*/