Actual source code: mpispooles.c
1: #define PETSCMAT_DLL
3: /*
4: Provides an interface to the Spooles parallel sparse solver (MPI SPOOLES)
5: */
7: #include ../src/mat/impls/aij/seq/aij.h
8: #include ../src/mat/impls/sbaij/seq/sbaij.h
9: #include ../src/mat/impls/baij/seq/baij.h
10: #include ../src/mat/impls/aij/mpi/mpiaij.h
11: #include ../src/mat/impls/sbaij/mpi/mpisbaij.h
12: #include ../src/mat/impls/aij/seq/spooles/spooles.h
14: EXTERN int SetSpoolesOptions(Mat, Spooles_options *);
15: EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
19: PetscErrorCode MatDestroy_MPIAIJSpooles(Mat A)
20: {
21: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
23:
25: if (lu->CleanUpSpooles) {
26: FrontMtx_free(lu->frontmtx);
27: IV_free(lu->newToOldIV);
28: IV_free(lu->oldToNewIV);
29: IV_free(lu->vtxmapIV);
30: InpMtx_free(lu->mtxA);
31: ETree_free(lu->frontETree);
32: IVL_free(lu->symbfacIVL);
33: SubMtxManager_free(lu->mtxmanager);
34: DenseMtx_free(lu->mtxX);
35: DenseMtx_free(lu->mtxY);
36: MPI_Comm_free(&(lu->comm_spooles));
37: if ( lu->scat ){
38: VecDestroy(lu->vec_spooles);
39: ISDestroy(lu->iden);
40: ISDestroy(lu->is_petsc);
41: VecScatterDestroy(lu->scat);
42: }
43: }
44: MatDestroy_MPIAIJ(A);
46: return(0);
47: }
51: PetscErrorCode MatSolve_MPISpooles(Mat A,Vec b,Vec x)
52: {
53: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
55: int size,rank,m=A->rmap->n,irow,*rowindY;
56: PetscScalar *array;
57: DenseMtx *newY ;
58: SubMtxManager *solvemanager ;
59: #if defined(PETSC_USE_COMPLEX)
60: double x_real,x_imag;
61: #endif
64: MPI_Comm_size(((PetscObject)A)->comm,&size);
65: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
66:
67: /* copy b into spooles' rhs mtxY */
68: DenseMtx_init(lu->mtxY, lu->options.typeflag, 0, 0, m, 1, 1, m);
69: VecGetArray(b,&array);
71: DenseMtx_rowIndices(lu->mtxY, &m, &rowindY); /* get m, rowind */
72: for ( irow = 0 ; irow < m ; irow++ ) {
73: rowindY[irow] = irow + lu->rstart; /* global rowind */
74: #if !defined(PETSC_USE_COMPLEX)
75: DenseMtx_setRealEntry(lu->mtxY, irow, 0, *array++);
76: #else
77: DenseMtx_setComplexEntry(lu->mtxY,irow,0,PetscRealPart(*array),PetscImaginaryPart(*array));
78: array++;
79: #endif
80: }
81: VecRestoreArray(b,&array);
82:
83: if ( lu->options.msglvl > 2 ) {
84: int err;
85: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n 1 matrix in original ordering");
86: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
87: err = fflush(lu->options.msgFile);
88: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
89: }
90:
91: /* permute and redistribute Y if necessary */
92: DenseMtx_permuteRows(lu->mtxY, lu->oldToNewIV);
93: if ( lu->options.msglvl > 2 ) {
94: int err;
95: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix in new ordering");
96: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
97: err = fflush(lu->options.msgFile);
98: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
99: }
101: MPI_Barrier(((PetscObject)A)->comm); /* for initializing firsttag, because the num. of tags used
102: by FrontMtx_MPI_split() is unknown */
103: lu->firsttag = 0;
104: newY = DenseMtx_MPI_splitByRows(lu->mtxY, lu->vtxmapIV, lu->stats, lu->options.msglvl,
105: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
106: DenseMtx_free(lu->mtxY);
107: lu->mtxY = newY ;
108: lu->firsttag += size ;
109: if ( lu->options.msglvl > 2 ) {
110: int err;
111: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split DenseMtx Y");
112: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
113: err = fflush(lu->options.msgFile);
114: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
115: }
117: if ( FRONTMTX_IS_PIVOTING(lu->frontmtx) ) {
118: /* pivoting has taken place, redistribute the right hand side
119: to match the final rows and columns in the fronts */
120: IV *rowmapIV ;
121: rowmapIV = FrontMtx_MPI_rowmapIV(lu->frontmtx, lu->ownersIV, lu->options.msglvl,
122: lu->options.msgFile, lu->comm_spooles);
123: newY = DenseMtx_MPI_splitByRows(lu->mtxY, rowmapIV, lu->stats, lu->options.msglvl,
124: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
125: DenseMtx_free(lu->mtxY);
126: lu->mtxY = newY ;
127: IV_free(rowmapIV);
128: lu->firsttag += size;
129: }
130: if ( lu->options.msglvl > 2 ) {
131: int err;
132: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix after split");
133: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
134: err = fflush(lu->options.msgFile);
135: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
136: }
138: if ( lu->nmycol > 0 ) IVcopy(lu->nmycol,lu->rowindX,IV_entries(lu->ownedColumnsIV)); /* must do for each solve */
139:
140: /* solve the linear system */
141: solvemanager = SubMtxManager_new();
142: SubMtxManager_init(solvemanager, NO_LOCK, 0);
143: FrontMtx_MPI_solve(lu->frontmtx, lu->mtxX, lu->mtxY, solvemanager, lu->solvemap, lu->cpus,
144: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
145: SubMtxManager_free(solvemanager);
146: if ( lu->options.msglvl > 2 ) {
147: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n solution in new ordering");
148: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
149: }
151: /* permute the solution into the original ordering */
152: DenseMtx_permuteRows(lu->mtxX, lu->newToOldIV);
153: if ( lu->options.msglvl > 2 ) {
154: int err;
155: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution in old ordering");
156: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
157: err = fflush(lu->options.msgFile);
158: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
159: }
160:
161: /* scatter local solution mtxX into mpi vector x */
162: if( !lu->scat ){ /* create followings once for each numfactorization */
163: /* vec_spooles <- mtxX */
164: #if !defined(PETSC_USE_COMPLEX)
165: VecCreateSeqWithArray(PETSC_COMM_SELF,lu->nmycol,lu->entX,&lu->vec_spooles);
166: #else
167: VecCreateSeq(PETSC_COMM_SELF,lu->nmycol,&lu->vec_spooles);
168: #endif
169: ISCreateStride(PETSC_COMM_SELF,lu->nmycol,0,1,&lu->iden);
170: ISCreateGeneral(PETSC_COMM_SELF,lu->nmycol,lu->rowindX,&lu->is_petsc);
171: VecScatterCreate(lu->vec_spooles,lu->iden,x,lu->is_petsc,&lu->scat);
172: }
173: #if defined(PETSC_USE_COMPLEX)
174: VecGetArray(lu->vec_spooles,&array);
175: for (irow = 0; irow < lu->nmycol; irow++){
176: DenseMtx_complexEntry(lu->mtxX,irow,0,&x_real,&x_imag);
177: array[irow] = x_real+x_imag*PETSC_i;
178: }
179: VecRestoreArray(lu->vec_spooles,&array);
180: #endif
181: VecScatterBegin(lu->scat,lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD);
182: VecScatterEnd(lu->scat,lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD);
183: return(0);
184: }
188: PetscErrorCode MatFactorNumeric_MPISpooles(Mat F,Mat A,const MatFactorInfo *info)
189: {
190: Mat_Spooles *lu = (Mat_Spooles*)(F)->spptr;
191: PetscErrorCode ierr;
192: int rank,size,lookahead=0,sierr;
193: ChvManager *chvmanager ;
194: Chv *rootchv ;
195: Graph *graph ;
196: IVL *adjIVL;
197: DV *cumopsDV ;
198: double droptol=0.0,*opcounts,minops,cutoff;
199: #if !defined(PETSC_USE_COMPLEX)
200: double *val;
201: #endif
202: InpMtx *newA ;
203: PetscScalar *av, *bv;
204: PetscInt *ai, *aj, *bi,*bj, nz, *ajj, *bjj, *garray,
205: i,j,irow,jcol,countA,countB,jB,*row,*col,colA_start,jj;
206: PetscInt M=A->rmap->N,m=A->rmap->n,root,nedges,tagbound,lasttag;
207: Mat F_diag;
208:
210: MPI_Comm_size(((PetscObject)A)->comm,&size);
211: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
213: if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */
214: /* get input parameters */
215: SetSpoolesOptions(A, &lu->options);
217: (F)->assembled = PETSC_TRUE;
218: if ((F)->factor == MAT_FACTOR_LU){
219: F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
220: } else {
221: F_diag = ((Mat_MPISBAIJ *)(F)->data)->A;
222: }
223: F_diag->assembled = PETSC_TRUE;
225: /* to be used by MatSolve() */
226: lu->mtxY = DenseMtx_new();
227: lu->mtxX = DenseMtx_new();
228: lu->scat = PETSC_NULL;
230: IVzero(20, lu->stats);
231: DVzero(20, lu->cpus);
233: lu->mtxA = InpMtx_new();
234: }
235:
236: /* copy A to Spooles' InpMtx object */
237: if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
238: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
239: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data;
240: Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data;
241: ai=aa->i; aj=aa->j; av=aa->a;
242: bi=bb->i; bj=bb->j; bv=bb->a;
243: lu->rstart = A->rmap->rstart;
244: nz = aa->nz + bb->nz;
245: garray = mat->garray;
246: } else { /* SPOOLES_SYMMETRIC */
247: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
248: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
249: Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data;
250: ai=aa->i; aj=aa->j; av=aa->a;
251: bi=bb->i; bj=bb->j; bv=bb->a;
252: lu->rstart = A->rmap->rstart;
253: nz = aa->nz + bb->nz;
254: garray = mat->garray;
255: }
256:
257: InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0);
258: row = InpMtx_ivec1(lu->mtxA);
259: col = InpMtx_ivec2(lu->mtxA);
260: #if !defined(PETSC_USE_COMPLEX)
261: val = InpMtx_dvec(lu->mtxA);
262: #endif
264: jj = 0; irow = lu->rstart;
265: for ( i=0; i<m; i++ ) {
266: ajj = aj + ai[i]; /* ptr to the beginning of this row */
267: countA = ai[i+1] - ai[i];
268: countB = bi[i+1] - bi[i];
269: bjj = bj + bi[i];
270: jB = 0;
271:
272: if (lu->options.symflag == SPOOLES_NONSYMMETRIC ){
273: /* B part, smaller col index */
274: colA_start = lu->rstart + ajj[0]; /* the smallest col index for A */
275: for (j=0; j<countB; j++){
276: jcol = garray[bjj[j]];
277: if (jcol > colA_start) {
278: jB = j;
279: break;
280: }
281: row[jj] = irow; col[jj] = jcol;
282: #if !defined(PETSC_USE_COMPLEX)
283: val[jj++] = *bv++;
284: #else
285: InpMtx_inputComplexEntry(lu->mtxA,irow,jcol,PetscRealPart(*bv),PetscImaginaryPart(*bv));
286: bv++; jj++;
287: #endif
288: if (j==countB-1) jB = countB;
289: }
290: }
291: /* A part */
292: for (j=0; j<countA; j++){
293: row[jj] = irow; col[jj] = lu->rstart + ajj[j];
294: #if !defined(PETSC_USE_COMPLEX)
295: val[jj++] = *av++;
296: #else
297: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*av),PetscImaginaryPart(*av));
298: av++; jj++;
299: #endif
300: }
301: /* B part, larger col index */
302: for (j=jB; j<countB; j++){
303: row[jj] = irow; col[jj] = garray[bjj[j]];
304: #if !defined(PETSC_USE_COMPLEX)
305: val[jj++] = *bv++;
306: #else
307: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*bv),PetscImaginaryPart(*bv));
308: bv++; jj++;
309: #endif
310: }
311: irow++;
312: }
313: #if !defined(PETSC_USE_COMPLEX)
314: InpMtx_inputRealTriples(lu->mtxA, nz, row, col, val);
315: #endif
316: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
317: if ( lu->options.msglvl > 0 ) {
318: int err;
319: printf("[%d] input matrix\n",rank);
320: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n [%d] input matrix\n",rank);
321: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
322: err = fflush(lu->options.msgFile);
323: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
324: }
326: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
327: /*
328: find a low-fill ordering
329: (1) create the Graph object
330: (2) order the graph using multiple minimum degree
331: (3) find out who has the best ordering w.r.t. op count,
332: and broadcast that front tree object
333: */
334: graph = Graph_new();
335: adjIVL = InpMtx_MPI_fullAdjacency(lu->mtxA, lu->stats,
336: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
337: nedges = IVL_tsize(adjIVL);
338: Graph_init2(graph, 0, M, 0, nedges, M, nedges, adjIVL, NULL, NULL);
339: if ( lu->options.msglvl > 2 ) {
340: int err;
341: err = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");
342: Graph_writeForHumanEye(graph, lu->options.msgFile);
343: fflush(lu->options.msgFile);
344: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
345: }
347: switch (lu->options.ordering) {
348: case 0:
349: lu->frontETree = orderViaBestOfNDandMS(graph,
350: lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
351: lu->options.seed + rank, lu->options.msglvl, lu->options.msgFile); break;
352: case 1:
353: lu->frontETree = orderViaMMD(graph,lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
354: case 2:
355: lu->frontETree = orderViaMS(graph, lu->options.maxdomainsize,
356: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
357: case 3:
358: lu->frontETree = orderViaND(graph, lu->options.maxdomainsize,
359: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
360: default:
361: SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering");
362: }
364: Graph_free(graph);
365: if ( lu->options.msglvl > 2 ) {
366: int err;
367: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");
368: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
369: err = fflush(lu->options.msgFile);
370: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
371: }
373: opcounts = DVinit(size, 0.0);
374: opcounts[rank] = ETree_nFactorOps(lu->frontETree, lu->options.typeflag, lu->options.symflag);
375: MPI_Allgather((void*) &opcounts[rank], 1, MPI_DOUBLE,
376: (void*) opcounts, 1, MPI_DOUBLE, ((PetscObject)A)->comm);
377: minops = DVmin(size, opcounts, &root);
378: DVfree(opcounts);
379:
380: lu->frontETree = ETree_MPI_Bcast(lu->frontETree, root,
381: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
382: if ( lu->options.msglvl > 2 ) {
383: int err;
384: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n best front tree");
385: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
386: err = fflush(lu->options.msgFile);
387: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
388: }
389:
390: /* get the permutations, permute the front tree, permute the matrix */
391: lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree);
392: lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree);
394: ETree_permuteVertices(lu->frontETree, lu->oldToNewIV);
396: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
397:
398: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
400: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
401: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
403: /* generate the owners map IV object and the map from vertices to owners */
404: cutoff = 1./(2*size);
405: cumopsDV = DV_new();
406: DV_init(cumopsDV, size, NULL);
407: lu->ownersIV = ETree_ddMap(lu->frontETree,
408: lu->options.typeflag, lu->options.symflag, cumopsDV, cutoff);
409: DV_free(cumopsDV);
410: lu->vtxmapIV = IV_new();
411: IV_init(lu->vtxmapIV, M, NULL);
412: IVgather(M, IV_entries(lu->vtxmapIV),
413: IV_entries(lu->ownersIV), ETree_vtxToFront(lu->frontETree));
414: if ( lu->options.msglvl > 2 ) {
415: int err;
417: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from fronts to owning processes");
418: IV_writeForHumanEye(lu->ownersIV, lu->options.msgFile);
419: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from vertices to owning processes");
420: IV_writeForHumanEye(lu->vtxmapIV, lu->options.msgFile);
421: err = fflush(lu->options.msgFile);
422: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
423: }
425: /* redistribute the matrix */
426: lu->firsttag = 0 ;
427: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
428: lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
429: lu->firsttag += size ;
431: InpMtx_free(lu->mtxA);
432: lu->mtxA = newA ;
433: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
434: if ( lu->options.msglvl > 2 ) {
435: int err;
436: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
437: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
438: err = fflush(lu->options.msgFile);
439: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
440: }
441:
442: /* compute the symbolic factorization */
443: lu->symbfacIVL = SymbFac_MPI_initFromInpMtx(lu->frontETree, lu->ownersIV, lu->mtxA,
444: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
445: lu->firsttag += lu->frontETree->nfront ;
446: if ( lu->options.msglvl > 2 ) {
447: int err;
448: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n local symbolic factorization");
449: IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile);
450: err = fflush(lu->options.msgFile);
451: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
452: }
454: lu->mtxmanager = SubMtxManager_new();
455: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
456: lu->frontmtx = FrontMtx_new();
458: } else { /* new num factorization using previously computed symbolic factor */
459: if (lu->options.pivotingflag) { /* different FrontMtx is required */
460: FrontMtx_free(lu->frontmtx);
461: lu->frontmtx = FrontMtx_new();
462: }
464: SubMtxManager_free(lu->mtxmanager);
465: lu->mtxmanager = SubMtxManager_new();
466: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
468: /* permute mtxA */
469: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
470: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
471:
472: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
473: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
475: /* redistribute the matrix */
476: MPI_Barrier(((PetscObject)A)->comm);
477: lu->firsttag = 0;
478: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
479: lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
480: lu->firsttag += size ;
482: InpMtx_free(lu->mtxA);
483: lu->mtxA = newA ;
484: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
485: if ( lu->options.msglvl > 2 ) {
486: int err;
487: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
488: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
489: err = fflush(lu->options.msgFile);
490: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
491: }
492: } /* end of if ( lu->flg == DIFFERENT_NONZERO_PATTERN) */
494: FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag,
495: FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, rank,
496: lu->ownersIV, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile);
498: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
499: if ( lu->options.patchAndGoFlag == 1 ) {
500: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
501: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
502: lu->options.storeids, lu->options.storevalues);
503: } else if ( lu->options.patchAndGoFlag == 2 ) {
504: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
505: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
506: lu->options.storeids, lu->options.storevalues);
507: }
508: }
510: /* numerical factorization */
511: chvmanager = ChvManager_new();
512: ChvManager_init(chvmanager, NO_LOCK, 0);
514: tagbound = maxTagMPI(lu->comm_spooles);
515: lasttag = lu->firsttag + 3*lu->frontETree->nfront + 2;
516: /* if(!rank) PetscPrintf(PETSC_COMM_SELF,"\n firsttag: %d, nfront: %d\n",lu->firsttag, lu->frontETree->nfront);*/
517: if ( lasttag > tagbound ) {
518: SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_factorInpMtx(), tag range is [%d,%d], tag_bound = %d",\
519: lu->firsttag, lasttag, tagbound);
520: }
521: rootchv = FrontMtx_MPI_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, droptol,
522: chvmanager, lu->ownersIV, lookahead, &sierr, lu->cpus,
523: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
524: ChvManager_free(chvmanager);
525: lu->firsttag = lasttag;
526: if ( lu->options.msglvl > 2 ) {
527: int err;
528: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization");
529: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
530: err = fflush(lu->options.msgFile);
531: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
532: }
534: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
535: if ( lu->options.patchAndGoFlag == 1 ) {
536: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
537: if (lu->options.msglvl > 0 ){
538: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
539: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
540: }
541: }
542: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
543: } else if ( lu->options.patchAndGoFlag == 2 ) {
544: if (lu->options.msglvl > 0 ){
545: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
546: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
547: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
548: }
549: if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
550: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");
551: DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile);
552: }
553: }
554: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
555: }
556: }
557: if ( sierr >= 0 ) SETERRQ2(PETSC_ERR_LIB,"\n proc %d : factorization error at front %d", rank, sierr);
558:
559: /* post-process the factorization and split
560: the factor matrices into submatrices */
561: lasttag = lu->firsttag + 5*size;
562: if ( lasttag > tagbound ) {
563: SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_postProcess(), tag range is [%d,%d], tag_bound = %d",\
564: lu->firsttag, lasttag, tagbound);
565: }
566: FrontMtx_MPI_postProcess(lu->frontmtx, lu->ownersIV, lu->stats, lu->options.msglvl,
567: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
568: lu->firsttag += 5*size ;
569: if ( lu->options.msglvl > 2 ) {
570: int err;
571: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after post-processing");
572: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
573: err = fflush(lu->options.msgFile);
574: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
575: }
576:
577: /* create the solve map object */
578: lu->solvemap = SolveMap_new();
579: SolveMap_ddMap(lu->solvemap, lu->frontmtx->symmetryflag,
580: FrontMtx_upperBlockIVL(lu->frontmtx),
581: FrontMtx_lowerBlockIVL(lu->frontmtx),
582: size, lu->ownersIV, FrontMtx_frontTree(lu->frontmtx),
583: lu->options.seed, lu->options.msglvl, lu->options.msgFile);
584: if ( lu->options.msglvl > 2 ) {
585: int err;
586: SolveMap_writeForHumanEye(lu->solvemap, lu->options.msgFile);
587: err = fflush(lu->options.msgFile);
588: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
589: }
591: /* redistribute the submatrices of the factors */
592: FrontMtx_MPI_split(lu->frontmtx, lu->solvemap,
593: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
594: if ( lu->options.msglvl > 2 ) {
595: int err;
596: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after split");
597: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
598: err = fflush(lu->options.msgFile);
599: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
600: }
602: /* create a solution DenseMtx object */
603: lu->ownedColumnsIV = FrontMtx_ownedColumnsIV(lu->frontmtx, rank, lu->ownersIV,
604: lu->options.msglvl, lu->options.msgFile);
605: lu->nmycol = IV_size(lu->ownedColumnsIV);
606: if ( lu->nmycol > 0) {
607: DenseMtx_init(lu->mtxX, lu->options.typeflag, 0, 0, lu->nmycol, 1, 1, lu->nmycol);
608: /* get pointers rowindX and entX */
609: DenseMtx_rowIndices(lu->mtxX, &lu->nmycol, &lu->rowindX);
610: lu->entX = DenseMtx_entries(lu->mtxX);
611: } else { /* lu->nmycol == 0 */
612: lu->entX = 0;
613: lu->rowindX = 0;
614: }
616: if ( lu->scat ){
617: VecDestroy(lu->vec_spooles);
618: ISDestroy(lu->iden);
619: ISDestroy(lu->is_petsc);
620: VecScatterDestroy(lu->scat);
621: }
622: lu->scat = PETSC_NULL;
623: lu->flg = SAME_NONZERO_PATTERN;
624: F->ops->solve = MatSolve_MPISpooles;
626: lu->CleanUpSpooles = PETSC_TRUE;
627: return(0);
628: }
630: /*MC
631: MATMPIAIJSPOOLES - MATMPIAIJSPOOLES = "mpiaijspooles" - A matrix type providing direct solvers (LU) for distributed matrices
632: via the external package Spooles.
634: If MPIAIJSPOOLES is installed (see the manual for
635: instructions on how to declare the existence of external packages),
636: a matrix type can be constructed which invokes SPOOLES solvers.
637: After calling MatCreate(...,A), simply call MatSetType(A,MATMPIAIJSPOOLES), then
638: optionally call MatMPIAIJSetPreallocation() etc DO NOT
639: call MatCreateMPIAIJ() directly or the preallocation information will be LOST!
641: This matrix inherits from MATMPIAIJ. As a result, MatMPIAIJSetPreallocation() is
642: supported for this matrix type. One can also call MatConvert() for an inplace conversion to or from
643: the MATMPIAIJ type without data copy AFTER the matrix values have been set.
645: Consult Spooles documentation for more information about the options database keys below.
647: Options Database Keys:
648: + -mat_type mpiaijspooles - sets the matrix type to "mpiaijspooles" during a call to MatSetFromOptions()
649: . -mat_spooles_tau <tau> - upper bound on the magnitude of the largest element in L or U
650: . -mat_spooles_seed <seed> - random number seed used for ordering
651: . -mat_spooles_msglvl <msglvl> - message output level
652: . -mat_spooles_ordering <BestOfNDandMS,MMD,MS,ND> - ordering used
653: . -mat_spooles_maxdomainsize <n> - maximum subgraph size used by Spooles orderings
654: . -mat_spooles_maxzeros <n> - maximum number of zeros inside a supernode
655: . -mat_spooles_maxsize <n> - maximum size of a supernode
656: . -mat_spooles_FrontMtxInfo <true,fase> - print Spooles information about the computed factorization
657: . -mat_spooles_symmetryflag <0,1,2> - 0: SPOOLES_SYMMETRIC, 1: SPOOLES_HERMITIAN, 2: SPOOLES_NONSYMMETRIC
658: . -mat_spooles_patchAndGoFlag <0,1,2> - 0: no patch, 1: use PatchAndGo strategy 1, 2: use PatchAndGo strategy 2
659: . -mat_spooles_toosmall <dt> - drop tolerance for PatchAndGo strategy 1
660: . -mat_spooles_storeids <bool integer> - if nonzero, stores row and col numbers where patches were applied in an IV object
661: . -mat_spooles_fudge <delta> - fudge factor for rescaling diagonals with PatchAndGo strategy 2
662: - -mat_spooles_storevalues <bool integer> - if nonzero and PatchAndGo strategy 2 is used, store change in diagonal value in a DV object
664: Level: beginner
666: .seealso: PCLU
667: M*/