Actual source code: superlu_dist.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:         Provides an interface to the SuperLU_DIST_2.2 sparse solver
  5: */

  7: #include "../src/mat/impls/aij/seq/aij.h"
  8: #include "../src/mat/impls/aij/mpi/mpiaij.h"
  9: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
 10: #include "stdlib.h"
 11: #endif

 13: #if defined(PETSC_USE_64BIT_INDICES)
 14: /* ugly SuperLU_Dist variable telling it to use long long int */
 15: #define _LONGINT
 16: #endif

 19: #if defined(PETSC_USE_COMPLEX)
 20: #include "superlu_zdefs.h"
 21: #else
 22: #include "superlu_ddefs.h"
 23: #endif

 26: typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
 27: const char *SuperLU_MatInputModes[]    = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};

 29: typedef struct {
 30:   int_t                   nprow,npcol,*row,*col;
 31:   gridinfo_t              grid;
 32:   superlu_options_t       options;
 33:   SuperMatrix             A_sup;
 34:   ScalePermstruct_t       ScalePermstruct;
 35:   LUstruct_t              LUstruct;
 36:   int                     StatPrint;
 37:   SuperLU_MatInputMode    MatInputMode;
 38:   SOLVEstruct_t           SOLVEstruct;
 39:   fact_t                  FactPattern;
 40:   MPI_Comm                comm_superlu;
 41: #if defined(PETSC_USE_COMPLEX)
 42:   doublecomplex           *val;
 43: #else
 44:   double                  *val;
 45: #endif

 47:   /* Flag to clean up (non-global) SuperLU objects during Destroy */
 48:   PetscTruth CleanUpSuperLU_Dist;
 49: } Mat_SuperLU_DIST;


 61: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
 62: {
 63:   PetscErrorCode   ierr;
 64:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
 65: 
 67:   if (lu->CleanUpSuperLU_Dist) {
 68:     /* Deallocate SuperLU_DIST storage */
 69:     if (lu->MatInputMode == GLOBAL) {
 70:       Destroy_CompCol_Matrix_dist(&lu->A_sup);
 71:     } else {
 72:       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
 73:       if ( lu->options.SolveInitialized ) {
 74: #if defined(PETSC_USE_COMPLEX)
 75:         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
 76: #else
 77:         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
 78: #endif
 79:       }
 80:     }
 81:     Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
 82:     ScalePermstructFree(&lu->ScalePermstruct);
 83:     LUstructFree(&lu->LUstruct);
 84: 
 85:     /* Release the SuperLU_DIST process grid. */
 86:     superlu_gridexit(&lu->grid);
 87: 
 88:     MPI_Comm_free(&(lu->comm_superlu));
 89:   }

 91:   MatDestroy_MPIAIJ(A);
 92:   return(0);
 93: }

 97: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
 98: {
 99:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
100:   PetscErrorCode   ierr;
101:   PetscMPIInt      size;
102:   PetscInt         m=A->rmap->N, N=A->cmap->N;
103:   SuperLUStat_t    stat;
104:   double           berr[1];
105:   PetscScalar      *bptr;
106:   PetscInt         nrhs=1;
107:   Vec              x_seq;
108:   IS               iden;
109:   VecScatter       scat;
110:   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */

113:   MPI_Comm_size(((PetscObject)A)->comm,&size);
114:   if (size > 1) {
115:     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
116:       VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
117:       ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
118:       VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
119:       ISDestroy(iden);

121:       VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
122:       VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
123:       VecGetArray(x_seq,&bptr);
124:     } else { /* distributed mat input */
125:       VecCopy(b_mpi,x);
126:       VecGetArray(x,&bptr);
127:     }
128:   } else { /* size == 1 */
129:     VecCopy(b_mpi,x);
130:     VecGetArray(x,&bptr);
131:   }
132: 
133:   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");

135:   PStatInit(&stat);        /* Initialize the statistics variables. */
136:   if (lu->MatInputMode == GLOBAL) {
137: #if defined(PETSC_USE_COMPLEX)
138:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
139:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
140: #else
141:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
142:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
143: #endif 
144:   } else { /* distributed mat input */
145: #if defined(PETSC_USE_COMPLEX)
146:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid,
147:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
148:     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
149: #else
150:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid,
151:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
152:     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
153: #endif
154:   }
155:   if (lu->options.PrintStat) {
156:      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
157:   }
158:   PStatFree(&stat);
159: 
160:   if (size > 1) {
161:     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
162:       VecRestoreArray(x_seq,&bptr);
163:       VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
164:       VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
165:       VecScatterDestroy(scat);
166:       VecDestroy(x_seq);
167:     } else {
168:       VecRestoreArray(x,&bptr);
169:     }
170:   } else {
171:     VecRestoreArray(x,&bptr);
172:   }
173:   return(0);
174: }

178: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
179: {
180:   Mat              *tseq,A_seq = PETSC_NULL;
181:   Mat_SeqAIJ       *aa,*bb;
182:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
183:   PetscErrorCode   ierr;
184:   PetscInt         M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
185:                    m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
186:   int              sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
187:   PetscMPIInt      size,rank;
188:   SuperLUStat_t    stat;
189:   double           *berr=0;
190:   IS               isrow;
191:   PetscLogDouble   time0,time,time_min,time_max;
192:   Mat              F_diag=PETSC_NULL;
193: #if defined(PETSC_USE_COMPLEX)
194:   doublecomplex    *av, *bv;
195: #else
196:   double           *av, *bv;
197: #endif

200:   MPI_Comm_size(((PetscObject)A)->comm,&size);
201:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);
202: 
203:   if (lu->options.PrintStat) { /* collect time for mat conversion */
204:     MPI_Barrier(((PetscObject)A)->comm);
205:     PetscGetTime(&time0);
206:   }

208:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
209:     if (size > 1) { /* convert mpi A to seq mat A */
210:       ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
211:       MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
212:       ISDestroy(isrow);
213: 
214:       A_seq = *tseq;
215:       PetscFree(tseq);
216:       aa =  (Mat_SeqAIJ*)A_seq->data;
217:     } else {
218:       aa =  (Mat_SeqAIJ*)A->data;
219:     }

221:     /* Convert Petsc NR matrix to SuperLU_DIST NC. 
222:        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
223:     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
224:       if (lu->FactPattern == SamePattern_SameRowPerm){
225:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
226:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
227:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
228:       } else {
229:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
230:         Destroy_LU(N, &lu->grid, &lu->LUstruct);
231:         lu->options.Fact = SamePattern;
232:       }
233:     }
234: #if defined(PETSC_USE_COMPLEX)
235:     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
236: #else
237:     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
238: #endif

240:     /* Create compressed column matrix A_sup. */
241: #if defined(PETSC_USE_COMPLEX)
242:     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
243: #else
244:     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
245: #endif
246:   } else { /* distributed mat input */
247:     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
248:     aa=(Mat_SeqAIJ*)(mat->A)->data;
249:     bb=(Mat_SeqAIJ*)(mat->B)->data;
250:     ai=aa->i; aj=aa->j;
251:     bi=bb->i; bj=bb->j;
252: #if defined(PETSC_USE_COMPLEX)
253:     av=(doublecomplex*)aa->a;
254:     bv=(doublecomplex*)bb->a;
255: #else
256:     av=aa->a;
257:     bv=bb->a;
258: #endif
259:     rstart = A->rmap->rstart;
260:     nz     = aa->nz + bb->nz;
261:     garray = mat->garray;
262: 
263:     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
264: #if defined(PETSC_USE_COMPLEX)
265:       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
266: #else
267:       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
268: #endif
269:     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
270:       if (lu->FactPattern == SamePattern_SameRowPerm){
271:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
272:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
273:       } else {
274:         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
275:         lu->options.Fact = SamePattern;
276:       }
277:     }
278:     nz = 0; irow = rstart;
279:     for ( i=0; i<m; i++ ) {
280:       lu->row[i] = nz;
281:       countA = ai[i+1] - ai[i];
282:       countB = bi[i+1] - bi[i];
283:       ajj = aj + ai[i];  /* ptr to the beginning of this row */
284:       bjj = bj + bi[i];

286:       /* B part, smaller col index */
287:       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
288:       jB = 0;
289:       for (j=0; j<countB; j++){
290:         jcol = garray[bjj[j]];
291:         if (jcol > colA_start) {
292:           jB = j;
293:           break;
294:         }
295:         lu->col[nz] = jcol;
296:         lu->val[nz++] = *bv++;
297:         if (j==countB-1) jB = countB;
298:       }

300:       /* A part */
301:       for (j=0; j<countA; j++){
302:         lu->col[nz] = rstart + ajj[j];
303:         lu->val[nz++] = *av++;
304:       }

306:       /* B part, larger col index */
307:       for (j=jB; j<countB; j++){
308:         lu->col[nz] = garray[bjj[j]];
309:         lu->val[nz++] = *bv++;
310:       }
311:     }
312:     lu->row[m] = nz;
313: #if defined(PETSC_USE_COMPLEX)
314:     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
315:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
316: #else
317:     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
318:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
319: #endif
320:   }
321:   if (lu->options.PrintStat) {
322:     PetscGetTime(&time);
323:     time0 = time - time0;
324:   }

326:   /* Factor the matrix. */
327:   PStatInit(&stat);   /* Initialize the statistics variables. */
328:   CHKMEMQ;
329:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
330: #if defined(PETSC_USE_COMPLEX)
331:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
332:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
333: #else
334:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
335:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
336: #endif 
337:   } else { /* distributed mat input */
338: #if defined(PETSC_USE_COMPLEX)
339:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
340:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
341:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
342: #else
343:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
344:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
345:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
346: #endif
347:   }

349:   if (lu->MatInputMode == GLOBAL && size > 1){
350:     MatDestroy(A_seq);
351:   }

353:   if (lu->options.PrintStat) {
354:     if (size > 1){
355:       MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
356:       MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
357:       MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
358:       time = time/size; /* average time */
359:       if (!rank) {
360:         PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);
361:       }
362:     } else {
363:       PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);
364:     }
365:     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
366:   }
367:   PStatFree(&stat);
368:   if (size > 1){
369:     F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
370:     F_diag->assembled = PETSC_TRUE;
371:   }
372:   (F)->assembled    = PETSC_TRUE;
373:   (F)->preallocated = PETSC_TRUE;
374:   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
375:   return(0);
376: }

378: /* Note the Petsc r and c permutations are ignored */
381: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
382: {
383:   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (F)->spptr;
384:   PetscInt          M=A->rmap->N,N=A->cmap->N;

387:   /* Initialize the SuperLU process grid. */
388:   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);

390:   /* Initialize ScalePermstruct and LUstruct. */
391:   ScalePermstructInit(M, N, &lu->ScalePermstruct);
392:   LUstructInit(M, N, &lu->LUstruct);
393:   (F)->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
394:   (F)->ops->solve            = MatSolve_SuperLU_DIST;
395:   return(0);
396: }

401: PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
402: {
404:   *type = MAT_SOLVER_SUPERLU_DIST;
405:   return(0);
406: }

411: PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
412: {
413:   Mat               B;
414:   Mat_SuperLU_DIST  *lu;
415:   PetscErrorCode    ierr;
416:   PetscInt          M=A->rmap->N,N=A->cmap->N,indx;
417:   PetscMPIInt       size;
418:   superlu_options_t options;
419:   PetscTruth        flg;
420:   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
421:   const char        *prtype[] = {"LargeDiag","NATURAL"};
422:   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};

425:   /* Create the factorization matrix */
426:   MatCreate(((PetscObject)A)->comm,&B);
427:   MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);
428:   MatSetType(B,((PetscObject)A)->type_name);
429:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
430:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);

432:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
433:   B->ops->view             = MatView_SuperLU_DIST;
434:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);
435:   B->factor                = MAT_FACTOR_LU;

437:   PetscNewLog(B,Mat_SuperLU_DIST,&lu);
438:   B->spptr = lu;

440:   /* Set the default input options:
441:      options.Fact              = DOFACT;
442:      options.Equil             = YES;
443:      options.ParSymbFact       = NO;
444:      options.ColPerm           = MMD_AT_PLUS_A;
445:      options.RowPerm           = LargeDiag;
446:      options.ReplaceTinyPivot  = YES;
447:      options.IterRefine        = DOUBLE;
448:      options.Trans             = NOTRANS;
449:      options.SolveInitialized  = NO;
450:      options.RefineInitialized = NO;
451:      options.PrintStat         = YES;
452:   */
453:   set_default_options_dist(&options);

455:   MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));
456:   MPI_Comm_size(((PetscObject)A)->comm,&size);
457:   /* Default num of process columns and rows */
458:   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
459:   if (!lu->npcol) lu->npcol = 1;
460:   while (lu->npcol > 0) {
461:     lu->nprow = PetscMPIIntCast(size/lu->npcol);
462:     if (size == lu->nprow * lu->npcol) break;
463:     lu->npcol --;
464:   }
465: 
466:   PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");
467:     PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
468:     PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
469:     if (size != lu->nprow * lu->npcol)
470:       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
471: 
472:     lu->MatInputMode = DISTRIBUTED;
473:     PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);
474:     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;

476:     PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
477:     if (!flg) {
478:       options.Equil = NO;
479:     }

481:     PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);
482:     if (flg) {
483:       switch (indx) {
484:       case 0:
485:         options.RowPerm = LargeDiag;
486:         break;
487:       case 1:
488:         options.RowPerm = NOROWPERM;
489:         break;
490:       }
491:     }

493:     PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);
494:     if (flg) {
495:       switch (indx) {
496:       case 0:
497:         options.ColPerm = MMD_AT_PLUS_A;
498:         break;
499:       case 1:
500:         options.ColPerm = NATURAL;
501:         break;
502:       case 2:
503:         options.ColPerm = MMD_ATA;
504:         break;
505:       case 3:
506:         options.ColPerm = PARMETIS;
507:         break;
508:       }
509:     }

511:     PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
512:     if (!flg) {
513:       options.ReplaceTinyPivot = NO;
514:     }

516:     options.ParSymbFact = NO;
517:     PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);
518:     if (flg){
519: #ifdef PETSC_HAVE_PARMETIS
520:       options.ParSymbFact = YES;
521:       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
522: #else
523:       printf("parsymbfact needs PARMETIS");
524: #endif
525:     }

527:     lu->FactPattern = SamePattern;
528:     PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);
529:     if (flg) {
530:       switch (indx) {
531:       case 0:
532:         lu->FactPattern = SamePattern;
533:         break;
534:       case 1:
535:         lu->FactPattern = SamePattern_SameRowPerm;
536:         break;
537:       }
538:     }
539: 
540:     options.IterRefine = NOREFINE;
541:     PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
542:     if (flg) {
543:       options.IterRefine = DOUBLE;
544:     }

546:     if (PetscLogPrintInfo) {
547:       options.PrintStat = YES;
548:     } else {
549:       options.PrintStat = NO;
550:     }
551:     PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
552:                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);
553:   PetscOptionsEnd();

555:   lu->options             = options;
556:   lu->options.Fact        = DOFACT;
557:   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
558:   *F = B;
559:   return(0);
560: }

565: PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
566: {

570:   MatGetFactor_aij_superlu_dist(A,ftype,F);
571:   return(0);
572: }

578: PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
579: {

583:   MatGetFactor_aij_superlu_dist(A,ftype,F);
584:   return(0);
585: }

590: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
591: {
592:   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
593:   superlu_options_t options;
594:   PetscErrorCode    ierr;

597:   /* check if matrix is superlu_dist type */
598:   if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);

600:   options = lu->options;
601:   PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
602:   PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);
603:   PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);
604:   PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);
605:   PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);
606:   PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);
607:   PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
608:   PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
609:   if (options.ColPerm == NATURAL) {
610:     PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");
611:   } else if (options.ColPerm == MMD_AT_PLUS_A) {
612:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");
613:   } else if (options.ColPerm == MMD_ATA) {
614:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");
615:   } else if (options.ColPerm == PARMETIS) {
616:     PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");
617:   } else {
618:     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
619:   }

621:   PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);
622: 
623:   if (lu->FactPattern == SamePattern){
624:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");
625:   } else {
626:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");
627:   }
628:   return(0);
629: }

633: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
634: {
635:   PetscErrorCode    ierr;
636:   PetscTruth        iascii;
637:   PetscViewerFormat format;

640:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
641:   if (iascii) {
642:     PetscViewerGetFormat(viewer,&format);
643:     if (format == PETSC_VIEWER_ASCII_INFO) {
644:       MatFactorInfo_SuperLU_DIST(A,viewer);
645:     }
646:   }
647:   return(0);
648: }


651: /*MC
652:   MAT_SOLVER_SUPERLU_DIST - Parallel direct solver package for LU factorization

654:    Works with AIJ matrices  

656:   Options Database Keys:
657: + -mat_superlu_dist_r <n> - number of rows in processor partition
658: . -mat_superlu_dist_c <n> - number of columns in processor partition
659: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
660: . -mat_superlu_dist_equil - equilibrate the matrix
661: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
662: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
663: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
664: . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
665: . -mat_superlu_dist_iterrefine - use iterative refinement
666: - -mat_superlu_dist_statprint - print factorization information

668:    Level: beginner

670: .seealso: PCLU

672: .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage

674: M*/