Actual source code: blockmat.c
1: #define PETSCMAT_DLL
3: /*
4: This provides a matrix that consists of Mats
5: */
7: #include private/matimpl.h
8: #include "../src/mat/impls/baij/seq/baij.h" /* use the common AIJ data-structure */
9: #include petscksp.h
11: #define CHUNKSIZE 15
13: typedef struct {
14: SEQAIJHEADER(Mat);
15: SEQBAIJHEADER;
16: Mat *diags;
18: Vec left,right,middle,workb; /* dummy vectors to perform local parts of product */
19: } Mat_BlockMat;
23: PetscErrorCode MatRelax_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
24: {
25: Mat_BlockMat *a = (Mat_BlockMat*)A->data;
26: PetscScalar *x;
27: const Mat *v = a->a;
28: const PetscScalar *b;
29: PetscErrorCode ierr;
30: PetscInt n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
31: const PetscInt *idx;
32: IS row,col;
33: MatFactorInfo info;
34: Vec left = a->left,right = a->right, middle = a->middle;
35: Mat *diag;
38: its = its*lits;
39: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
40: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
41: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
42: if (fshift) SETERRQ(PETSC_ERR_SUP,"No support yet for fshift");
43: if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP))
44: SETERRQ(PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");
46: if (!a->diags) {
47: PetscMalloc(mbs*sizeof(Mat),&a->diags);
48: MatFactorInfoInitialize(&info);
49: for (i=0; i<mbs; i++) {
50: MatGetOrdering(a->a[a->diag[i]], MATORDERING_ND,&row,&col);
51: MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info);
52: MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
53: ISDestroy(row);
54: ISDestroy(col);
55: }
56: VecDuplicate(bb,&a->workb);
57: }
58: diag = a->diags;
60: VecSet(xx,0.0);
61: VecGetArray(xx,&x);
62: /* copy right hand side because it must be modified during iteration */
63: VecCopy(bb,a->workb);
64: VecGetArray(a->workb,(PetscScalar**)&b);
66: /* need to add code for when initial guess is zero, see MatRelax_SeqAIJ */
67: while (its--) {
68: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
70: for (i=0; i<mbs; i++) {
71: n = a->i[i+1] - a->i[i] - 1;
72: idx = a->j + a->i[i] + 1;
73: v = a->a + a->i[i] + 1;
75: VecSet(left,0.0);
76: for (j=0; j<n; j++) {
77: VecPlaceArray(right,x + idx[j]*bs);
78: MatMultAdd(v[j],right,left,left);
79: VecResetArray(right);
80: }
81: VecPlaceArray(right,b + i*bs);
82: VecAYPX(left,-1.0,right);
83: VecResetArray(right);
85: VecPlaceArray(right,x + i*bs);
86: MatSolve(diag[i],left,right);
88: /* now adjust right hand side, see MatRelax_SeqSBAIJ */
89: for (j=0; j<n; j++) {
90: MatMultTranspose(v[j],right,left);
91: VecPlaceArray(middle,b + idx[j]*bs);
92: VecAXPY(middle,-1.0,left);
93: VecResetArray(middle);
94: }
95: VecResetArray(right);
97: }
98: }
99: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
101: for (i=mbs-1; i>=0; i--) {
102: n = a->i[i+1] - a->i[i] - 1;
103: idx = a->j + a->i[i] + 1;
104: v = a->a + a->i[i] + 1;
106: VecSet(left,0.0);
107: for (j=0; j<n; j++) {
108: VecPlaceArray(right,x + idx[j]*bs);
109: MatMultAdd(v[j],right,left,left);
110: VecResetArray(right);
111: }
112: VecPlaceArray(right,b + i*bs);
113: VecAYPX(left,-1.0,right);
114: VecResetArray(right);
116: VecPlaceArray(right,x + i*bs);
117: MatSolve(diag[i],left,right);
118: VecResetArray(right);
120: }
121: }
122: }
123: VecRestoreArray(xx,&x);
124: VecRestoreArray(a->workb,(PetscScalar**)&b);
125: return(0);
126: }
130: PetscErrorCode MatRelax_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
131: {
132: Mat_BlockMat *a = (Mat_BlockMat*)A->data;
133: PetscScalar *x;
134: const Mat *v = a->a;
135: const PetscScalar *b;
136: PetscErrorCode ierr;
137: PetscInt n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
138: const PetscInt *idx;
139: IS row,col;
140: MatFactorInfo info;
141: Vec left = a->left,right = a->right;
142: Mat *diag;
145: its = its*lits;
146: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
147: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
148: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
149: if (fshift) SETERRQ(PETSC_ERR_SUP,"No support yet for fshift");
151: if (!a->diags) {
152: PetscMalloc(mbs*sizeof(Mat),&a->diags);
153: MatFactorInfoInitialize(&info);
154: for (i=0; i<mbs; i++) {
155: MatGetOrdering(a->a[a->diag[i]], MATORDERING_ND,&row,&col);
156: MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info);
157: MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
158: ISDestroy(row);
159: ISDestroy(col);
160: }
161: }
162: diag = a->diags;
164: VecSet(xx,0.0);
165: VecGetArray(xx,&x);
166: VecGetArray(bb,(PetscScalar**)&b);
168: /* need to add code for when initial guess is zero, see MatRelax_SeqAIJ */
169: while (its--) {
170: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
172: for (i=0; i<mbs; i++) {
173: n = a->i[i+1] - a->i[i];
174: idx = a->j + a->i[i];
175: v = a->a + a->i[i];
177: VecSet(left,0.0);
178: for (j=0; j<n; j++) {
179: if (idx[j] != i) {
180: VecPlaceArray(right,x + idx[j]*bs);
181: MatMultAdd(v[j],right,left,left);
182: VecResetArray(right);
183: }
184: }
185: VecPlaceArray(right,b + i*bs);
186: VecAYPX(left,-1.0,right);
187: VecResetArray(right);
189: VecPlaceArray(right,x + i*bs);
190: MatSolve(diag[i],left,right);
191: VecResetArray(right);
192: }
193: }
194: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
196: for (i=mbs-1; i>=0; i--) {
197: n = a->i[i+1] - a->i[i];
198: idx = a->j + a->i[i];
199: v = a->a + a->i[i];
201: VecSet(left,0.0);
202: for (j=0; j<n; j++) {
203: if (idx[j] != i) {
204: VecPlaceArray(right,x + idx[j]*bs);
205: MatMultAdd(v[j],right,left,left);
206: VecResetArray(right);
207: }
208: }
209: VecPlaceArray(right,b + i*bs);
210: VecAYPX(left,-1.0,right);
211: VecResetArray(right);
213: VecPlaceArray(right,x + i*bs);
214: MatSolve(diag[i],left,right);
215: VecResetArray(right);
217: }
218: }
219: }
220: VecRestoreArray(xx,&x);
221: VecRestoreArray(bb,(PetscScalar**)&b);
222: return(0);
223: }
227: PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
228: {
229: Mat_BlockMat *a = (Mat_BlockMat*)A->data;
230: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
231: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
232: PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
234: PetscInt ridx,cidx;
235: PetscTruth roworiented=a->roworiented;
236: MatScalar value;
237: Mat *ap,*aa = a->a;
240: for (k=0; k<m; k++) { /* loop over added rows */
241: row = im[k];
242: brow = row/bs;
243: if (row < 0) continue;
244: #if defined(PETSC_USE_DEBUG)
245: if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
246: #endif
247: rp = aj + ai[brow];
248: ap = aa + ai[brow];
249: rmax = imax[brow];
250: nrow = ailen[brow];
251: low = 0;
252: high = nrow;
253: for (l=0; l<n; l++) { /* loop over added columns */
254: if (in[l] < 0) continue;
255: #if defined(PETSC_USE_DEBUG)
256: if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
257: #endif
258: col = in[l]; bcol = col/bs;
259: if (A->symmetric && brow > bcol) continue;
260: ridx = row % bs; cidx = col % bs;
261: if (roworiented) {
262: value = v[l + k*n];
263: } else {
264: value = v[k + l*m];
265: }
266: if (col <= lastcol) low = 0; else high = nrow;
267: lastcol = col;
268: while (high-low > 7) {
269: t = (low+high)/2;
270: if (rp[t] > bcol) high = t;
271: else low = t;
272: }
273: for (i=low; i<high; i++) {
274: if (rp[i] > bcol) break;
275: if (rp[i] == bcol) {
276: goto noinsert1;
277: }
278: }
279: if (nonew == 1) goto noinsert1;
280: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
281: MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
282: N = nrow++ - 1; high++;
283: /* shift up all the later entries in this row */
284: for (ii=N; ii>=i; ii--) {
285: rp[ii+1] = rp[ii];
286: ap[ii+1] = ap[ii];
287: }
288: if (N>=i) ap[i] = 0;
289: rp[i] = bcol;
290: a->nz++;
291: noinsert1:;
292: if (!*(ap+i)) {
293: MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,0,ap+i);
294: }
295: MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is);
296: low = i;
297: }
298: ailen[brow] = nrow;
299: }
300: A->same_nonzero = PETSC_FALSE;
301: return(0);
302: }
306: PetscErrorCode MatLoad_BlockMat(PetscViewer viewer, const MatType type,Mat *A)
307: {
308: PetscErrorCode ierr;
309: Mat tmpA;
310: PetscInt i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
311: const PetscInt *cols;
312: const PetscScalar *values;
313: PetscTruth flg,notdone;
314: Mat_SeqAIJ *a;
315: Mat_BlockMat *amat;
318: MatLoad_SeqAIJ(viewer,MATSEQAIJ,&tmpA);
320: MatGetLocalSize(tmpA,&m,&n);
321: PetscOptionsBegin(PETSC_COMM_SELF,PETSC_NULL,"Options for loading BlockMat matrix 1","Mat");
322: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
323: PetscOptionsName("-matload_symmetric","Store the matrix as symmetric","MatLoad",&flg);
324: PetscOptionsEnd();
326: /* Determine number of nonzero blocks for each block row */
327: a = (Mat_SeqAIJ*) tmpA->data;
328: mbs = m/bs;
329: PetscMalloc3(mbs,PetscInt,&lens,bs,PetscInt*,&ii,bs,PetscInt,&ilens);
330: PetscMemzero(lens,mbs*sizeof(PetscInt));
332: for (i=0; i<mbs; i++) {
333: for (j=0; j<bs; j++) {
334: ii[j] = a->j + a->i[i*bs + j];
335: ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
336: }
338: currentcol = -1;
339: notdone = PETSC_TRUE;
340: while (PETSC_TRUE) {
341: notdone = PETSC_FALSE;
342: nextcol = 1000000000;
343: for (j=0; j<bs; j++) {
344: while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
345: ii[j]++;
346: ilens[j]--;
347: }
348: if (ilens[j] > 0) {
349: notdone = PETSC_TRUE;
350: nextcol = PetscMin(nextcol,ii[j][0]/bs);
351: }
352: }
353: if (!notdone) break;
354: if (!flg || (nextcol >= i)) lens[i]++;
355: currentcol = nextcol;
356: }
357: }
359: MatCreateBlockMat(PETSC_COMM_SELF,m,n,bs,0,lens,A);
360: if (flg) {
361: MatSetOption(*A,MAT_SYMMETRIC,PETSC_TRUE);
362: }
363: amat = (Mat_BlockMat*)(*A)->data;
365: /* preallocate the submatrices */
366: PetscMalloc(bs*sizeof(PetscInt),&llens);
367: for (i=0; i<mbs; i++) { /* loops for block rows */
368: for (j=0; j<bs; j++) {
369: ii[j] = a->j + a->i[i*bs + j];
370: ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
371: }
373: currentcol = 1000000000;
374: for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
375: if (ilens[j] > 0) {
376: currentcol = PetscMin(currentcol,ii[j][0]/bs);
377: }
378: }
380: notdone = PETSC_TRUE;
381: while (PETSC_TRUE) { /* loops over blocks in block row */
383: notdone = PETSC_FALSE;
384: nextcol = 1000000000;
385: PetscMemzero(llens,bs*sizeof(PetscInt));
386: for (j=0; j<bs; j++) { /* loop over rows in block */
387: while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
388: ii[j]++;
389: ilens[j]--;
390: llens[j]++;
391: }
392: if (ilens[j] > 0) {
393: notdone = PETSC_TRUE;
394: nextcol = PetscMin(nextcol,ii[j][0]/bs);
395: }
396: }
397: if (cnt >= amat->maxnz) SETERRQ1(PETSC_ERR_PLIB,"Number of blocks found greater than expected %D",cnt);
398: if (!flg || currentcol >= i) {
399: amat->j[cnt] = currentcol;
400: MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++);
401: }
403: if (!notdone) break;
404: currentcol = nextcol;
405: }
406: amat->ilen[i] = lens[i];
407: }
408: CHKMEMQ;
410: PetscFree3(lens,ii,ilens);
411: PetscFree(llens);
413: /* copy over the matrix, one row at a time */
414: for (i=0; i<m; i++) {
415: MatGetRow(tmpA,i,&ncols,&cols,&values);
416: MatSetValues(*A,1,&i,ncols,cols,values,INSERT_VALUES);
417: MatRestoreRow(tmpA,i,&ncols,&cols,&values);
418: }
419: MatAssemblyBegin(*A,MAT_FINAL_ASSEMBLY);
420: MatAssemblyEnd(*A,MAT_FINAL_ASSEMBLY);
421: return(0);
422: }
426: PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
427: {
428: Mat_BlockMat *a = (Mat_BlockMat*)A->data;
429: PetscErrorCode ierr;
430: const char *name;
431: PetscViewerFormat format;
434: PetscObjectGetName((PetscObject)A,&name);
435: PetscViewerGetFormat(viewer,&format);
436: if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
437: PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %D \n",a->nz);
438: if (A->symmetric) {
439: PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n");
440: }
441: }
442: return(0);
443: }
447: PetscErrorCode MatDestroy_BlockMat(Mat mat)
448: {
450: Mat_BlockMat *bmat = (Mat_BlockMat*)mat->data;
451: PetscInt i;
454: if (bmat->right) {
455: VecDestroy(bmat->right);
456: }
457: if (bmat->left) {
458: VecDestroy(bmat->left);
459: }
460: if (bmat->middle) {
461: VecDestroy(bmat->middle);
462: }
463: if (bmat->workb) {
464: VecDestroy(bmat->workb);
465: }
466: if (bmat->diags) {
467: for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
468: if (bmat->diags[i]) {MatDestroy(bmat->diags[i]);}
469: }
470: }
471: if (bmat->a) {
472: for (i=0; i<bmat->nz; i++) {
473: if (bmat->a[i]) {MatDestroy(bmat->a[i]);}
474: }
475: }
476: MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
477: PetscFree(bmat);
478: return(0);
479: }
483: PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
484: {
485: Mat_BlockMat *bmat = (Mat_BlockMat*)A->data;
487: PetscScalar *xx,*yy;
488: PetscInt *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
489: Mat *aa;
492: CHKMEMQ;
493: /*
494: Standard CSR multiply except each entry is a Mat
495: */
496: VecGetArray(x,&xx);
498: VecSet(y,0.0);
499: VecGetArray(y,&yy);
500: aj = bmat->j;
501: aa = bmat->a;
502: ii = bmat->i;
503: for (i=0; i<m; i++) {
504: jrow = ii[i];
505: VecPlaceArray(bmat->left,yy + bs*i);
506: n = ii[i+1] - jrow;
507: for (j=0; j<n; j++) {
508: VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
509: MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
510: VecResetArray(bmat->right);
511: jrow++;
512: }
513: VecResetArray(bmat->left);
514: }
515: VecRestoreArray(x,&xx);
516: VecRestoreArray(y,&yy);
517: CHKMEMQ;
518: return(0);
519: }
523: PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
524: {
525: Mat_BlockMat *bmat = (Mat_BlockMat*)A->data;
527: PetscScalar *xx,*yy;
528: PetscInt *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
529: Mat *aa;
532: CHKMEMQ;
533: /*
534: Standard CSR multiply except each entry is a Mat
535: */
536: VecGetArray(x,&xx);
538: VecSet(y,0.0);
539: VecGetArray(y,&yy);
540: aj = bmat->j;
541: aa = bmat->a;
542: ii = bmat->i;
543: for (i=0; i<m; i++) {
544: jrow = ii[i];
545: n = ii[i+1] - jrow;
546: VecPlaceArray(bmat->left,yy + bs*i);
547: VecPlaceArray(bmat->middle,xx + bs*i);
548: /* if we ALWAYS required a diagonal entry then could remove this if test */
549: if (aj[jrow] == i) {
550: VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
551: MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
552: VecResetArray(bmat->right);
553: jrow++;
554: n--;
555: }
556: for (j=0; j<n; j++) {
557: VecPlaceArray(bmat->right,xx + bs*aj[jrow]); /* upper triangular part */
558: MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
559: VecResetArray(bmat->right);
561: VecPlaceArray(bmat->right,yy + bs*aj[jrow]); /* lower triangular part */
562: MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right);
563: VecResetArray(bmat->right);
564: jrow++;
565: }
566: VecResetArray(bmat->left);
567: VecResetArray(bmat->middle);
568: }
569: VecRestoreArray(x,&xx);
570: VecRestoreArray(y,&yy);
571: CHKMEMQ;
572: return(0);
573: }
577: PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
578: {
580: return(0);
581: }
585: PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
586: {
588: return(0);
589: }
593: PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
594: {
596: return(0);
597: }
599: /*
600: Adds diagonal pointers to sparse matrix structure.
601: */
604: PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
605: {
606: Mat_BlockMat *a = (Mat_BlockMat*)A->data;
608: PetscInt i,j,mbs = A->rmap->n/A->rmap->bs;
611: if (!a->diag) {
612: PetscMalloc(mbs*sizeof(PetscInt),&a->diag);
613: }
614: for (i=0; i<mbs; i++) {
615: a->diag[i] = a->i[i+1];
616: for (j=a->i[i]; j<a->i[i+1]; j++) {
617: if (a->j[j] == i) {
618: a->diag[i] = j;
619: break;
620: }
621: }
622: }
623: return(0);
624: }
628: PetscErrorCode MatGetSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,PetscInt csize,MatReuse scall,Mat *B)
629: {
630: Mat_BlockMat *a = (Mat_BlockMat*)A->data;
631: Mat_SeqAIJ *c;
633: PetscInt i,k,first,step,lensi,nrows,ncols;
634: PetscInt *j_new,*i_new,*aj = a->j,*ai = a->i,ii,*ailen = a->ilen;
635: PetscScalar *a_new;
636: Mat C,*aa = a->a;
637: PetscTruth stride,equal;
640: ISEqual(isrow,iscol,&equal);
641: if (!equal) SETERRQ(PETSC_ERR_SUP,"Only for idential column and row indices");
642: ISStride(iscol,&stride);
643: if (!stride) SETERRQ(PETSC_ERR_SUP,"Only for stride indices");
644: ISStrideGetInfo(iscol,&first,&step);
645: if (step != A->rmap->bs) SETERRQ(PETSC_ERR_SUP,"Can only select one entry from each block");
647: ISGetLocalSize(isrow,&nrows);
648: ncols = nrows;
650: /* create submatrix */
651: if (scall == MAT_REUSE_MATRIX) {
652: PetscInt n_cols,n_rows;
653: C = *B;
654: MatGetSize(C,&n_rows,&n_cols);
655: if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
656: MatZeroEntries(C);
657: } else {
658: MatCreate(((PetscObject)A)->comm,&C);
659: MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE);
660: if (A->symmetric) {
661: MatSetType(C,MATSEQSBAIJ);
662: } else {
663: MatSetType(C,MATSEQAIJ);
664: }
665: MatSeqAIJSetPreallocation(C,0,ailen);
666: MatSeqSBAIJSetPreallocation(C,1,0,ailen);
667: }
668: c = (Mat_SeqAIJ*)C->data;
669:
670: /* loop over rows inserting into submatrix */
671: a_new = c->a;
672: j_new = c->j;
673: i_new = c->i;
674:
675: for (i=0; i<nrows; i++) {
676: ii = ai[i];
677: lensi = ailen[i];
678: for (k=0; k<lensi; k++) {
679: *j_new++ = *aj++;
680: MatGetValue(*aa++,first,first,a_new++);
681: }
682: i_new[i+1] = i_new[i] + lensi;
683: c->ilen[i] = lensi;
684: }
686: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
687: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
688: *B = C;
689: return(0);
690: }
694: PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
695: {
696: Mat_BlockMat *a = (Mat_BlockMat*)A->data;
698: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
699: PetscInt m = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
700: Mat *aa = a->a,*ap;
703: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
705: if (m) rmax = ailen[0]; /* determine row with most nonzeros */
706: for (i=1; i<m; i++) {
707: /* move each row back by the amount of empty slots (fshift) before it*/
708: fshift += imax[i-1] - ailen[i-1];
709: rmax = PetscMax(rmax,ailen[i]);
710: if (fshift) {
711: ip = aj + ai[i] ;
712: ap = aa + ai[i] ;
713: N = ailen[i];
714: for (j=0; j<N; j++) {
715: ip[j-fshift] = ip[j];
716: ap[j-fshift] = ap[j];
717: }
718: }
719: ai[i] = ai[i-1] + ailen[i-1];
720: }
721: if (m) {
722: fshift += imax[m-1] - ailen[m-1];
723: ai[m] = ai[m-1] + ailen[m-1];
724: }
725: /* reset ilen and imax for each row */
726: for (i=0; i<m; i++) {
727: ailen[i] = imax[i] = ai[i+1] - ai[i];
728: }
729: a->nz = ai[m];
730: for (i=0; i<a->nz; i++) {
731: #if defined(PETSC_USE_DEBUG)
732: if (!aa[i]) SETERRQ3(PETSC_ERR_PLIB,"Null matrix at location %D column %D nz %D",i,aj[i],a->nz);
733: #endif
734: MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY);
735: MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY);
736: }
737: CHKMEMQ;
738: PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz);
739: PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);
740: PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax);
741: a->reallocs = 0;
742: A->info.nz_unneeded = (double)fshift;
743: a->rmax = rmax;
745: A->same_nonzero = PETSC_TRUE;
746: MatMarkDiagonal_BlockMat(A);
747: return(0);
748: }
752: PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscTruth flg)
753: {
755: if (opt == MAT_SYMMETRIC && flg) {
756: A->ops->relax = MatRelax_BlockMat_Symmetric;
757: A->ops->mult = MatMult_BlockMat_Symmetric;
758: } else {
759: PetscInfo1(A,"Unused matrix option %s\n",MatOptions[opt]);
760: }
761: return(0);
762: }
765: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
766: 0,
767: 0,
768: MatMult_BlockMat,
769: /* 4*/ MatMultAdd_BlockMat,
770: MatMultTranspose_BlockMat,
771: MatMultTransposeAdd_BlockMat,
772: 0,
773: 0,
774: 0,
775: /*10*/ 0,
776: 0,
777: 0,
778: MatRelax_BlockMat,
779: 0,
780: /*15*/ 0,
781: 0,
782: 0,
783: 0,
784: 0,
785: /*20*/ 0,
786: MatAssemblyEnd_BlockMat,
787: 0,
788: MatSetOption_BlockMat,
789: 0,
790: /*25*/ 0,
791: 0,
792: 0,
793: 0,
794: 0,
795: /*30*/ 0,
796: 0,
797: 0,
798: 0,
799: 0,
800: /*35*/ 0,
801: 0,
802: 0,
803: 0,
804: 0,
805: /*40*/ 0,
806: 0,
807: 0,
808: 0,
809: 0,
810: /*45*/ 0,
811: 0,
812: 0,
813: 0,
814: 0,
815: /*50*/ 0,
816: 0,
817: 0,
818: 0,
819: 0,
820: /*55*/ 0,
821: 0,
822: 0,
823: 0,
824: 0,
825: /*60*/ MatGetSubMatrix_BlockMat,
826: MatDestroy_BlockMat,
827: MatView_BlockMat,
828: 0,
829: 0,
830: /*65*/ 0,
831: 0,
832: 0,
833: 0,
834: 0,
835: /*70*/ 0,
836: 0,
837: 0,
838: 0,
839: 0,
840: /*75*/ 0,
841: 0,
842: 0,
843: 0,
844: 0,
845: /*80*/ 0,
846: 0,
847: 0,
848: 0,
849: MatLoad_BlockMat,
850: /*85*/ 0,
851: 0,
852: 0,
853: 0,
854: 0,
855: /*90*/ 0,
856: 0,
857: 0,
858: 0,
859: 0,
860: /*95*/ 0,
861: 0,
862: 0,
863: 0};
867: /*@C
868: MatBlockMatSetPreallocation - For good matrix assembly performance
869: the user should preallocate the matrix storage by setting the parameter nz
870: (or the array nnz). By setting these parameters accurately, performance
871: during matrix assembly can be increased by more than a factor of 50.
873: Collective on MPI_Comm
875: Input Parameters:
876: + B - The matrix
877: . bs - size of each block in matrix
878: . nz - number of nonzeros per block row (same for all rows)
879: - nnz - array containing the number of nonzeros in the various block rows
880: (possibly different for each row) or PETSC_NULL
882: Notes:
883: If nnz is given then nz is ignored
885: Specify the preallocated storage with either nz or nnz (not both).
886: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
887: allocation. For large problems you MUST preallocate memory or you
888: will get TERRIBLE performance, see the users' manual chapter on matrices.
890: Level: intermediate
892: .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues()
894: @*/
895: PetscErrorCode MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
896: {
897: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
900: PetscObjectQueryFunction((PetscObject)B,"MatBlockMatSetPreallocation_C",(void (**)(void))&f);
901: if (f) {
902: (*f)(B,bs,nz,nnz);
903: }
904: return(0);
905: }
910: PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
911: {
912: Mat_BlockMat *bmat = (Mat_BlockMat*)A->data;
914: PetscInt i;
917: if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Block size given %D must be great than zero",bs);
918: if (A->rmap->n % bs) SETERRQ2(PETSC_ERR_ARG_INCOMP,"Blocksize %D does not divide number of rows %D",bs,A->rmap->n);
919: if (A->cmap->n % bs) SETERRQ2(PETSC_ERR_ARG_INCOMP,"Blocksize %D does not divide number of columns %D",bs,A->cmap->n);
920: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
921: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
922: if (nnz) {
923: for (i=0; i<A->rmap->n/bs; i++) {
924: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
925: if (nnz[i] > A->cmap->n/bs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %d value %d rowlength %d",i,nnz[i],A->cmap->n/bs);
926: }
927: }
928: A->rmap->bs = A->cmap->bs = bs;
929: bmat->mbs = A->rmap->n/bs;
931: VecCreateSeqWithArray(PETSC_COMM_SELF,bs,PETSC_NULL,&bmat->right);
932: VecCreateSeqWithArray(PETSC_COMM_SELF,bs,PETSC_NULL,&bmat->middle);
933: VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left);
935: if (!bmat->imax) {
936: PetscMalloc2(A->rmap->n,PetscInt,&bmat->imax,A->rmap->n,PetscInt,&bmat->ilen);
937: PetscLogObjectMemory(A,2*A->rmap->n*sizeof(PetscInt));
938: }
939: if (nnz) {
940: nz = 0;
941: for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
942: bmat->imax[i] = nnz[i];
943: nz += nnz[i];
944: }
945: } else {
946: SETERRQ(PETSC_ERR_SUP,"Currently requires block row by row preallocation");
947: }
949: /* bmat->ilen will count nonzeros in each row so far. */
950: for (i=0; i<bmat->mbs; i++) { bmat->ilen[i] = 0;}
952: /* allocate the matrix space */
953: MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
954: PetscMalloc3(nz,Mat,&bmat->a,nz,PetscInt,&bmat->j,A->rmap->n+1,PetscInt,&bmat->i);
955: PetscLogObjectMemory(A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)));
956: bmat->i[0] = 0;
957: for (i=1; i<bmat->mbs+1; i++) {
958: bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
959: }
960: bmat->singlemalloc = PETSC_TRUE;
961: bmat->free_a = PETSC_TRUE;
962: bmat->free_ij = PETSC_TRUE;
964: bmat->nz = 0;
965: bmat->maxnz = nz;
966: A->info.nz_unneeded = (double)bmat->maxnz;
968: return(0);
969: }
972: /*MC
973: MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
974: consisting of (usually) sparse blocks.
976: Level: advanced
978: .seealso: MatCreateBlockMat()
980: M*/
985: PetscErrorCode MatCreate_BlockMat(Mat A)
986: {
987: Mat_BlockMat *b;
991: PetscNewLog(A,Mat_BlockMat,&b);
992: A->data = (void*)b;
993: PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
995: PetscMapSetBlockSize(A->rmap,1);
996: PetscMapSetBlockSize(A->cmap,1);
997: PetscMapSetUp(A->rmap);
998: PetscMapSetUp(A->cmap);
1000: A->assembled = PETSC_TRUE;
1001: A->preallocated = PETSC_FALSE;
1002: PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT);
1004: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatBlockMatSetPreallocation_C",
1005: "MatBlockMatSetPreallocation_BlockMat",
1006: MatBlockMatSetPreallocation_BlockMat);
1008: return(0);
1009: }
1014: /*@C
1015: MatCreateBlockMat - Creates a new matrix based sparse Mat storage
1017: Collective on MPI_Comm
1019: Input Parameters:
1020: + comm - MPI communicator
1021: . m - number of rows
1022: . n - number of columns
1023: . bs - size of each submatrix
1024: . nz - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
1025: - nnz - expected number of nonzers per block row if known (use PETSC_NULL otherwise)
1028: Output Parameter:
1029: . A - the matrix
1031: Level: intermediate
1033: PETSc requires that matrices and vectors being used for certain
1034: operations are partitioned accordingly. For example, when
1035: creating a bmat matrix, A, that supports parallel matrix-vector
1036: products using MatMult(A,x,y) the user should set the number
1037: of local matrix rows to be the number of local elements of the
1038: corresponding result vector, y. Note that this is information is
1039: required for use of the matrix interface routines, even though
1040: the bmat matrix may not actually be physically partitioned.
1041: For example,
1043: .keywords: matrix, bmat, create
1045: .seealso: MATBLOCKMAT
1046: @*/
1047: PetscErrorCode MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
1048: {
1052: MatCreate(comm,A);
1053: MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
1054: MatSetType(*A,MATBLOCKMAT);
1055: MatBlockMatSetPreallocation(*A,bs,nz,nnz);
1056: return(0);
1057: }