Actual source code: mpispooles.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:    Provides an interface to the Spooles parallel sparse solver (MPI SPOOLES)
  5: */

 7:  #include ../src/mat/impls/aij/seq/aij.h
 8:  #include ../src/mat/impls/sbaij/seq/sbaij.h
 9:  #include ../src/mat/impls/baij/seq/baij.h
 10:  #include ../src/mat/impls/aij/mpi/mpiaij.h
 11:  #include ../src/mat/impls/sbaij/mpi/mpisbaij.h
 12:  #include ../src/mat/impls/aij/seq/spooles/spooles.h

 14: EXTERN int SetSpoolesOptions(Mat, Spooles_options *);
 15: EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);

 19: PetscErrorCode MatDestroy_MPIAIJSpooles(Mat A)
 20: {
 21:   Mat_Spooles   *lu = (Mat_Spooles*)A->spptr;
 23: 
 25:   if (lu->CleanUpSpooles) {
 26:     FrontMtx_free(lu->frontmtx);
 27:     IV_free(lu->newToOldIV);
 28:     IV_free(lu->oldToNewIV);
 29:     IV_free(lu->vtxmapIV);
 30:     InpMtx_free(lu->mtxA);
 31:     ETree_free(lu->frontETree);
 32:     IVL_free(lu->symbfacIVL);
 33:     SubMtxManager_free(lu->mtxmanager);
 34:     DenseMtx_free(lu->mtxX);
 35:     DenseMtx_free(lu->mtxY);
 36:     MPI_Comm_free(&(lu->comm_spooles));
 37:     if ( lu->scat ){
 38:       VecDestroy(lu->vec_spooles);
 39:       ISDestroy(lu->iden);
 40:       ISDestroy(lu->is_petsc);
 41:       VecScatterDestroy(lu->scat);
 42:     }
 43:   }
 44:   MatDestroy_MPIAIJ(A);

 46:   return(0);
 47: }

 51: PetscErrorCode MatSolve_MPISpooles(Mat A,Vec b,Vec x)
 52: {
 53:   Mat_Spooles   *lu = (Mat_Spooles*)A->spptr;
 55:   int           size,rank,m=A->rmap->n,irow,*rowindY;
 56:   PetscScalar   *array;
 57:   DenseMtx      *newY ;
 58:   SubMtxManager *solvemanager ;
 59: #if defined(PETSC_USE_COMPLEX)
 60:   double x_real,x_imag;
 61: #endif

 64:   MPI_Comm_size(((PetscObject)A)->comm,&size);
 65:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);
 66: 
 67:   /* copy b into spooles' rhs mtxY */
 68:   DenseMtx_init(lu->mtxY, lu->options.typeflag, 0, 0, m, 1, 1, m);
 69:   VecGetArray(b,&array);

 71:   DenseMtx_rowIndices(lu->mtxY, &m, &rowindY);  /* get m, rowind */
 72:   for ( irow = 0 ; irow < m ; irow++ ) {
 73:     rowindY[irow] = irow + lu->rstart;           /* global rowind */
 74: #if !defined(PETSC_USE_COMPLEX)
 75:     DenseMtx_setRealEntry(lu->mtxY, irow, 0, *array++);
 76: #else
 77:     DenseMtx_setComplexEntry(lu->mtxY,irow,0,PetscRealPart(*array),PetscImaginaryPart(*array));
 78:     array++;
 79: #endif
 80:   }
 81:   VecRestoreArray(b,&array);
 82: 
 83:   if ( lu->options.msglvl > 2 ) {
 84:     int err;
 85:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n 1 matrix in original ordering");
 86:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
 87:     err = fflush(lu->options.msgFile);
 88:     if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
 89:   }
 90: 
 91:   /* permute and redistribute Y if necessary */
 92:   DenseMtx_permuteRows(lu->mtxY, lu->oldToNewIV);
 93:   if ( lu->options.msglvl > 2 ) {
 94:     int err;
 95:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix in new ordering");
 96:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
 97:     err = fflush(lu->options.msgFile);
 98:     if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
 99:   }

101:   MPI_Barrier(((PetscObject)A)->comm); /* for initializing firsttag, because the num. of tags used
102:                                    by FrontMtx_MPI_split() is unknown */
103:   lu->firsttag = 0;
104:   newY = DenseMtx_MPI_splitByRows(lu->mtxY, lu->vtxmapIV, lu->stats, lu->options.msglvl,
105:                                 lu->options.msgFile, lu->firsttag, lu->comm_spooles);
106:   DenseMtx_free(lu->mtxY);
107:   lu->mtxY = newY ;
108:   lu->firsttag += size ;
109:   if ( lu->options.msglvl > 2 ) {
110:     int err;
111:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split DenseMtx Y");
112:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
113:     err = fflush(lu->options.msgFile);
114:     if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
115:   }

117:   if ( FRONTMTX_IS_PIVOTING(lu->frontmtx) ) {
118:     /*   pivoting has taken place, redistribute the right hand side
119:          to match the final rows and columns in the fronts             */
120:     IV *rowmapIV ;
121:     rowmapIV = FrontMtx_MPI_rowmapIV(lu->frontmtx, lu->ownersIV, lu->options.msglvl,
122:                                     lu->options.msgFile, lu->comm_spooles);
123:     newY = DenseMtx_MPI_splitByRows(lu->mtxY, rowmapIV, lu->stats, lu->options.msglvl,
124:                                    lu->options.msgFile, lu->firsttag, lu->comm_spooles);
125:     DenseMtx_free(lu->mtxY);
126:     lu->mtxY = newY ;
127:     IV_free(rowmapIV);
128:     lu->firsttag += size;
129:   }
130:   if ( lu->options.msglvl > 2 ) {
131:     int err;
132:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix after split");
133:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
134:     err = fflush(lu->options.msgFile);
135:     if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
136:   }

138:   if ( lu->nmycol > 0 ) IVcopy(lu->nmycol,lu->rowindX,IV_entries(lu->ownedColumnsIV)); /* must do for each solve */
139: 
140:   /* solve the linear system */
141:   solvemanager = SubMtxManager_new();
142:   SubMtxManager_init(solvemanager, NO_LOCK, 0);
143:   FrontMtx_MPI_solve(lu->frontmtx, lu->mtxX, lu->mtxY, solvemanager, lu->solvemap, lu->cpus,
144:                    lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
145:   SubMtxManager_free(solvemanager);
146:   if ( lu->options.msglvl > 2 ) {
147:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n solution in new ordering");
148:     DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
149:   }

151:   /* permute the solution into the original ordering */
152:   DenseMtx_permuteRows(lu->mtxX, lu->newToOldIV);
153:   if ( lu->options.msglvl > 2 ) {
154:     int err;
155:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution in old ordering");
156:     DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
157:     err = fflush(lu->options.msgFile);
158:     if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
159:   }
160: 
161:   /* scatter local solution mtxX into mpi vector x */
162:   if( !lu->scat ){ /* create followings once for each numfactorization */
163:     /* vec_spooles <- mtxX */
164: #if !defined(PETSC_USE_COMPLEX) 
165:     VecCreateSeqWithArray(PETSC_COMM_SELF,lu->nmycol,lu->entX,&lu->vec_spooles);
166: #else    
167:     VecCreateSeq(PETSC_COMM_SELF,lu->nmycol,&lu->vec_spooles);
168: #endif 
169:     ISCreateStride(PETSC_COMM_SELF,lu->nmycol,0,1,&lu->iden);
170:     ISCreateGeneral(PETSC_COMM_SELF,lu->nmycol,lu->rowindX,&lu->is_petsc);
171:     VecScatterCreate(lu->vec_spooles,lu->iden,x,lu->is_petsc,&lu->scat);
172:   }
173: #if defined(PETSC_USE_COMPLEX)
174:     VecGetArray(lu->vec_spooles,&array);
175:     for (irow = 0; irow < lu->nmycol; irow++){
176:       DenseMtx_complexEntry(lu->mtxX,irow,0,&x_real,&x_imag);
177:       array[irow] = x_real+x_imag*PETSC_i;
178:     }
179:     VecRestoreArray(lu->vec_spooles,&array);
180: #endif 
181:   VecScatterBegin(lu->scat,lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD);
182:   VecScatterEnd(lu->scat,lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD);
183:   return(0);
184: }

188: PetscErrorCode MatFactorNumeric_MPISpooles(Mat F,Mat A,const MatFactorInfo *info)
189: {
190:   Mat_Spooles     *lu = (Mat_Spooles*)(F)->spptr;
191:   PetscErrorCode  ierr;
192:   int             rank,size,lookahead=0,sierr;
193:   ChvManager      *chvmanager ;
194:   Chv             *rootchv ;
195:   Graph           *graph ;
196:   IVL             *adjIVL;
197:   DV              *cumopsDV ;
198:   double          droptol=0.0,*opcounts,minops,cutoff;
199: #if !defined(PETSC_USE_COMPLEX)
200:   double          *val;
201: #endif
202:   InpMtx          *newA ;
203:   PetscScalar     *av, *bv;
204:   PetscInt        *ai, *aj, *bi,*bj, nz, *ajj, *bjj, *garray,
205:                   i,j,irow,jcol,countA,countB,jB,*row,*col,colA_start,jj;
206:   PetscInt        M=A->rmap->N,m=A->rmap->n,root,nedges,tagbound,lasttag;
207:   Mat             F_diag;
208: 
210:   MPI_Comm_size(((PetscObject)A)->comm,&size);
211:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);

213:   if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */
214:     /* get input parameters */
215:     SetSpoolesOptions(A, &lu->options);

217:     (F)->assembled    = PETSC_TRUE;
218:     if ((F)->factor == MAT_FACTOR_LU){
219:       F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
220:     } else {
221:       F_diag = ((Mat_MPISBAIJ *)(F)->data)->A;
222:     }
223:     F_diag->assembled  = PETSC_TRUE;

225:     /* to be used by MatSolve() */
226:     lu->mtxY = DenseMtx_new();
227:     lu->mtxX = DenseMtx_new();
228:     lu->scat = PETSC_NULL;

230:     IVzero(20, lu->stats);
231:     DVzero(20, lu->cpus);

233:     lu->mtxA = InpMtx_new();
234:   }
235: 
236:   /* copy A to Spooles' InpMtx object */
237:   if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
238:     Mat_MPIAIJ  *mat =  (Mat_MPIAIJ*)A->data;
239:     Mat_SeqAIJ  *aa=(Mat_SeqAIJ*)(mat->A)->data;
240:     Mat_SeqAIJ  *bb=(Mat_SeqAIJ*)(mat->B)->data;
241:     ai=aa->i; aj=aa->j; av=aa->a;
242:     bi=bb->i; bj=bb->j; bv=bb->a;
243:     lu->rstart = A->rmap->rstart;
244:     nz         = aa->nz + bb->nz;
245:     garray     = mat->garray;
246:   } else {         /* SPOOLES_SYMMETRIC  */
247:     Mat_MPISBAIJ  *mat = (Mat_MPISBAIJ*)A->data;
248:     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
249:     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
250:     ai=aa->i; aj=aa->j; av=aa->a;
251:     bi=bb->i; bj=bb->j; bv=bb->a;
252:     lu->rstart = A->rmap->rstart;
253:     nz         = aa->nz + bb->nz;
254:     garray     = mat->garray;
255:   }
256: 
257:   InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0);
258:   row   = InpMtx_ivec1(lu->mtxA);
259:   col   = InpMtx_ivec2(lu->mtxA);
260: #if !defined(PETSC_USE_COMPLEX)
261:   val   = InpMtx_dvec(lu->mtxA);
262: #endif

264:   jj = 0; irow = lu->rstart;
265:   for ( i=0; i<m; i++ ) {
266:     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
267:     countA = ai[i+1] - ai[i];
268:     countB = bi[i+1] - bi[i];
269:     bjj = bj + bi[i];
270:     jB = 0;
271: 
272:     if (lu->options.symflag == SPOOLES_NONSYMMETRIC ){
273:       /* B part, smaller col index */
274:       colA_start = lu->rstart + ajj[0]; /* the smallest col index for A */
275:       for (j=0; j<countB; j++){
276:         jcol = garray[bjj[j]];
277:         if (jcol > colA_start) {
278:           jB = j;
279:           break;
280:         }
281:         row[jj] = irow; col[jj] = jcol;
282: #if !defined(PETSC_USE_COMPLEX)
283:         val[jj++] = *bv++;
284: #else
285:         InpMtx_inputComplexEntry(lu->mtxA,irow,jcol,PetscRealPart(*bv),PetscImaginaryPart(*bv));
286:         bv++; jj++;
287: #endif
288:         if (j==countB-1) jB = countB;
289:       }
290:     }
291:     /* A part */
292:     for (j=0; j<countA; j++){
293:       row[jj] = irow; col[jj] = lu->rstart + ajj[j];
294: #if !defined(PETSC_USE_COMPLEX)
295:       val[jj++] = *av++;
296: #else
297:       InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*av),PetscImaginaryPart(*av));
298:       av++; jj++;
299: #endif
300:     }
301:     /* B part, larger col index */
302:     for (j=jB; j<countB; j++){
303:       row[jj] = irow; col[jj] = garray[bjj[j]];
304: #if !defined(PETSC_USE_COMPLEX)
305:       val[jj++] = *bv++;
306: #else
307:      InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*bv),PetscImaginaryPart(*bv));
308:      bv++; jj++;
309: #endif
310:     }
311:     irow++;
312:   }
313: #if !defined(PETSC_USE_COMPLEX)
314:   InpMtx_inputRealTriples(lu->mtxA, nz, row, col, val);
315: #endif
316:   InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
317:   if ( lu->options.msglvl > 0 ) {
318:     int err;
319:     printf("[%d] input matrix\n",rank);
320:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n [%d] input matrix\n",rank);
321:     InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
322:     err = fflush(lu->options.msgFile);
323:     if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
324:   }

326:   if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
327:     /*
328:       find a low-fill ordering
329:       (1) create the Graph object
330:       (2) order the graph using multiple minimum degree
331:       (3) find out who has the best ordering w.r.t. op count,
332:           and broadcast that front tree object
333:     */
334:     graph = Graph_new();
335:     adjIVL = InpMtx_MPI_fullAdjacency(lu->mtxA, lu->stats,
336:               lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
337:     nedges = IVL_tsize(adjIVL);
338:     Graph_init2(graph, 0, M, 0, nedges, M, nedges, adjIVL, NULL, NULL);
339:     if ( lu->options.msglvl > 2 ) {
340:       int err;
341:       err = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");
342:       Graph_writeForHumanEye(graph, lu->options.msgFile);
343:       fflush(lu->options.msgFile);
344:       if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
345:     }

347:     switch (lu->options.ordering) {
348:     case 0:
349:       lu->frontETree = orderViaBestOfNDandMS(graph,
350:                      lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
351:                      lu->options.seed + rank, lu->options.msglvl, lu->options.msgFile); break;
352:     case 1:
353:       lu->frontETree = orderViaMMD(graph,lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
354:     case 2:
355:       lu->frontETree = orderViaMS(graph, lu->options.maxdomainsize,
356:                      lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
357:     case 3:
358:       lu->frontETree = orderViaND(graph, lu->options.maxdomainsize,
359:                      lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
360:     default:
361:       SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering");
362:     }

364:     Graph_free(graph);
365:     if ( lu->options.msglvl > 2 ) {
366:       int err;
367:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");
368:       ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
369:       err = fflush(lu->options.msgFile);
370:       if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
371:     }

373:     opcounts = DVinit(size, 0.0);
374:     opcounts[rank] = ETree_nFactorOps(lu->frontETree, lu->options.typeflag, lu->options.symflag);
375:     MPI_Allgather((void*) &opcounts[rank], 1, MPI_DOUBLE,
376:               (void*) opcounts, 1, MPI_DOUBLE, ((PetscObject)A)->comm);
377:     minops = DVmin(size, opcounts, &root);
378:     DVfree(opcounts);
379: 
380:     lu->frontETree = ETree_MPI_Bcast(lu->frontETree, root,
381:                              lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
382:     if ( lu->options.msglvl > 2 ) {
383:       int err;
384:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n best front tree");
385:       ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
386:       err = fflush(lu->options.msgFile);
387:       if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
388:     }
389: 
390:     /* get the permutations, permute the front tree, permute the matrix */
391:     lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree);
392:     lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree);

394:     ETree_permuteVertices(lu->frontETree, lu->oldToNewIV);

396:     InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
397: 
398:     if (  lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);

400:     InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
401:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);

403:     /* generate the owners map IV object and the map from vertices to owners */
404:     cutoff   = 1./(2*size);
405:     cumopsDV = DV_new();
406:     DV_init(cumopsDV, size, NULL);
407:     lu->ownersIV = ETree_ddMap(lu->frontETree,
408:                        lu->options.typeflag, lu->options.symflag, cumopsDV, cutoff);
409:     DV_free(cumopsDV);
410:     lu->vtxmapIV = IV_new();
411:     IV_init(lu->vtxmapIV, M, NULL);
412:     IVgather(M, IV_entries(lu->vtxmapIV),
413:              IV_entries(lu->ownersIV), ETree_vtxToFront(lu->frontETree));
414:     if ( lu->options.msglvl > 2 ) {
415:       int err;

417:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from fronts to owning processes");
418:       IV_writeForHumanEye(lu->ownersIV, lu->options.msgFile);
419:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from vertices to owning processes");
420:       IV_writeForHumanEye(lu->vtxmapIV, lu->options.msgFile);
421:       err = fflush(lu->options.msgFile);
422:       if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
423:     }

425:     /* redistribute the matrix */
426:     lu->firsttag = 0 ;
427:     newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
428:                         lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
429:     lu->firsttag += size ;

431:     InpMtx_free(lu->mtxA);
432:     lu->mtxA = newA ;
433:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
434:     if ( lu->options.msglvl > 2 ) {
435:       int err;
436:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
437:       InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
438:       err = fflush(lu->options.msgFile);
439:       if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
440:     }
441: 
442:     /* compute the symbolic factorization */
443:     lu->symbfacIVL = SymbFac_MPI_initFromInpMtx(lu->frontETree, lu->ownersIV, lu->mtxA,
444:                      lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
445:     lu->firsttag += lu->frontETree->nfront ;
446:     if ( lu->options.msglvl > 2 ) {
447:       int err;
448:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n local symbolic factorization");
449:       IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile);
450:       err = fflush(lu->options.msgFile);
451:       if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
452:     }

454:     lu->mtxmanager = SubMtxManager_new();
455:     SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
456:     lu->frontmtx = FrontMtx_new();

458:   } else { /* new num factorization using previously computed symbolic factor */
459:     if (lu->options.pivotingflag) {                  /* different FrontMtx is required */
460:       FrontMtx_free(lu->frontmtx);
461:       lu->frontmtx   = FrontMtx_new();
462:     }

464:     SubMtxManager_free(lu->mtxmanager);
465:     lu->mtxmanager = SubMtxManager_new();
466:     SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);

468:     /* permute mtxA */
469:     InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
470:     if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
471: 
472:     InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
473:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);

475:     /* redistribute the matrix */
476:     MPI_Barrier(((PetscObject)A)->comm);
477:     lu->firsttag = 0;
478:     newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
479:                         lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
480:     lu->firsttag += size ;

482:     InpMtx_free(lu->mtxA);
483:     lu->mtxA = newA ;
484:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
485:     if ( lu->options.msglvl > 2 ) {
486:       int err;
487:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
488:       InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
489:       err = fflush(lu->options.msgFile);
490:       if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
491:     }
492:   } /* end of if ( lu->flg == DIFFERENT_NONZERO_PATTERN) */

494:   FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag,
495:               FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, rank,
496:               lu->ownersIV, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile);

498:     if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
499:     if ( lu->options.patchAndGoFlag == 1 ) {
500:       lu->frontmtx->patchinfo = PatchAndGoInfo_new();
501:       PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
502:                        lu->options.storeids, lu->options.storevalues);
503:     } else if ( lu->options.patchAndGoFlag == 2 ) {
504:       lu->frontmtx->patchinfo = PatchAndGoInfo_new();
505:       PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
506:                        lu->options.storeids, lu->options.storevalues);
507:     }
508:   }

510:   /* numerical factorization */
511:   chvmanager = ChvManager_new();
512:   ChvManager_init(chvmanager, NO_LOCK, 0);

514:   tagbound = maxTagMPI(lu->comm_spooles);
515:   lasttag  = lu->firsttag + 3*lu->frontETree->nfront + 2;
516:   /* if(!rank) PetscPrintf(PETSC_COMM_SELF,"\n firsttag: %d, nfront: %d\n",lu->firsttag, lu->frontETree->nfront);*/
517:   if ( lasttag > tagbound ) {
518:       SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_factorInpMtx(), tag range is [%d,%d], tag_bound = %d",\
519:                lu->firsttag, lasttag, tagbound);
520:   }
521:   rootchv = FrontMtx_MPI_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, droptol,
522:                      chvmanager, lu->ownersIV, lookahead, &sierr, lu->cpus,
523:                      lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
524:   ChvManager_free(chvmanager);
525:   lu->firsttag = lasttag;
526:   if ( lu->options.msglvl > 2 ) {
527:     int err;
528:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization");
529:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
530:     err = fflush(lu->options.msgFile);
531:     if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
532:   }

534:   if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
535:     if ( lu->options.patchAndGoFlag == 1 ) {
536:       if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
537:         if (lu->options.msglvl > 0 ){
538:           PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
539:           IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
540:         }
541:       }
542:       PatchAndGoInfo_free(lu->frontmtx->patchinfo);
543:     } else if ( lu->options.patchAndGoFlag == 2 ) {
544:       if (lu->options.msglvl > 0 ){
545:         if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
546:           PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
547:           IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
548:         }
549:         if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
550:           PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");
551:           DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile);
552:         }
553:       }
554:       PatchAndGoInfo_free(lu->frontmtx->patchinfo);
555:     }
556:   }
557:   if ( sierr >= 0 ) SETERRQ2(PETSC_ERR_LIB,"\n proc %d : factorization error at front %d", rank, sierr);
558: 
559:   /*  post-process the factorization and split 
560:       the factor matrices into submatrices */
561:   lasttag  = lu->firsttag + 5*size;
562:   if ( lasttag > tagbound ) {
563:       SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_postProcess(), tag range is [%d,%d], tag_bound = %d",\
564:                lu->firsttag, lasttag, tagbound);
565:   }
566:   FrontMtx_MPI_postProcess(lu->frontmtx, lu->ownersIV, lu->stats, lu->options.msglvl,
567:                          lu->options.msgFile, lu->firsttag, lu->comm_spooles);
568:   lu->firsttag += 5*size ;
569:   if ( lu->options.msglvl > 2 ) {
570:     int err;
571:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after post-processing");
572:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
573:     err = fflush(lu->options.msgFile);
574:     if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
575:   }
576: 
577:   /* create the solve map object */
578:   lu->solvemap = SolveMap_new();
579:   SolveMap_ddMap(lu->solvemap, lu->frontmtx->symmetryflag,
580:                FrontMtx_upperBlockIVL(lu->frontmtx),
581:                FrontMtx_lowerBlockIVL(lu->frontmtx),
582:                size, lu->ownersIV, FrontMtx_frontTree(lu->frontmtx),
583:                lu->options.seed, lu->options.msglvl, lu->options.msgFile);
584:   if ( lu->options.msglvl > 2 ) {
585:     int err;
586:     SolveMap_writeForHumanEye(lu->solvemap, lu->options.msgFile);
587:     err = fflush(lu->options.msgFile);
588:     if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
589:   }

591:   /* redistribute the submatrices of the factors */
592:   FrontMtx_MPI_split(lu->frontmtx, lu->solvemap,
593:                    lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
594:   if ( lu->options.msglvl > 2 ) {
595:     int err;
596:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after split");
597:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
598:     err = fflush(lu->options.msgFile);
599:     if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
600:   }

602:   /* create a solution DenseMtx object */
603:   lu->ownedColumnsIV = FrontMtx_ownedColumnsIV(lu->frontmtx, rank, lu->ownersIV,
604:                                          lu->options.msglvl, lu->options.msgFile);
605:   lu->nmycol = IV_size(lu->ownedColumnsIV);
606:   if ( lu->nmycol > 0) {
607:     DenseMtx_init(lu->mtxX, lu->options.typeflag, 0, 0, lu->nmycol, 1, 1, lu->nmycol);
608:     /* get pointers rowindX and entX */
609:     DenseMtx_rowIndices(lu->mtxX, &lu->nmycol, &lu->rowindX);
610:     lu->entX = DenseMtx_entries(lu->mtxX);
611:   } else { /* lu->nmycol == 0 */
612:     lu->entX    = 0;
613:     lu->rowindX = 0;
614:   }

616:   if ( lu->scat ){
617:     VecDestroy(lu->vec_spooles);
618:     ISDestroy(lu->iden);
619:     ISDestroy(lu->is_petsc);
620:     VecScatterDestroy(lu->scat);
621:   }
622:   lu->scat = PETSC_NULL;
623:   lu->flg = SAME_NONZERO_PATTERN;
624:   F->ops->solve            = MatSolve_MPISpooles;

626:   lu->CleanUpSpooles = PETSC_TRUE;
627:   return(0);
628: }

630: /*MC
631:   MATMPIAIJSPOOLES - MATMPIAIJSPOOLES = "mpiaijspooles" - A matrix type providing direct solvers (LU) for distributed matrices 
632:   via the external package Spooles.

634:   If MPIAIJSPOOLES is installed (see the manual for
635:   instructions on how to declare the existence of external packages),
636:   a matrix type can be constructed which invokes SPOOLES solvers.
637:   After calling MatCreate(...,A), simply call MatSetType(A,MATMPIAIJSPOOLES), then 
638:   optionally call MatMPIAIJSetPreallocation() etc DO NOT
639:   call MatCreateMPIAIJ() directly or the preallocation information will be LOST!

641:   This matrix inherits from MATMPIAIJ.  As a result, MatMPIAIJSetPreallocation() is 
642:   supported for this matrix type.  One can also call MatConvert() for an inplace conversion to or from 
643:   the MATMPIAIJ type without data copy AFTER the matrix values have been set.

645:   Consult Spooles documentation for more information about the options database keys below.

647:   Options Database Keys:
648: + -mat_type mpiaijspooles - sets the matrix type to "mpiaijspooles" during a call to MatSetFromOptions()
649: . -mat_spooles_tau <tau> - upper bound on the magnitude of the largest element in L or U
650: . -mat_spooles_seed <seed> - random number seed used for ordering
651: . -mat_spooles_msglvl <msglvl> - message output level
652: . -mat_spooles_ordering <BestOfNDandMS,MMD,MS,ND> - ordering used
653: . -mat_spooles_maxdomainsize <n> - maximum subgraph size used by Spooles orderings
654: . -mat_spooles_maxzeros <n> - maximum number of zeros inside a supernode
655: . -mat_spooles_maxsize <n> - maximum size of a supernode
656: . -mat_spooles_FrontMtxInfo <true,fase> - print Spooles information about the computed factorization
657: . -mat_spooles_symmetryflag <0,1,2> - 0: SPOOLES_SYMMETRIC, 1: SPOOLES_HERMITIAN, 2: SPOOLES_NONSYMMETRIC
658: . -mat_spooles_patchAndGoFlag <0,1,2> - 0: no patch, 1: use PatchAndGo strategy 1, 2: use PatchAndGo strategy 2
659: . -mat_spooles_toosmall <dt> - drop tolerance for PatchAndGo strategy 1
660: . -mat_spooles_storeids <bool integer> - if nonzero, stores row and col numbers where patches were applied in an IV object
661: . -mat_spooles_fudge <delta> - fudge factor for rescaling diagonals with PatchAndGo strategy 2
662: - -mat_spooles_storevalues <bool integer> - if nonzero and PatchAndGo strategy 2 is used, store change in diagonal value in a DV object

664:    Level: beginner

666: .seealso: PCLU
667: M*/