Actual source code: blockmat.c

  1: #define PETSCMAT_DLL

  3: /*
  4:    This provides a matrix that consists of Mats
  5: */

 7:  #include private/matimpl.h
  8: #include "../src/mat/impls/baij/seq/baij.h"    /* use the common AIJ data-structure */
 9:  #include petscksp.h

 11: #define CHUNKSIZE   15

 13: typedef struct {
 14:   SEQAIJHEADER(Mat);
 15:   SEQBAIJHEADER;
 16:   Mat               *diags;

 18:   Vec               left,right,middle,workb;   /* dummy vectors to perform local parts of product */
 19: } Mat_BlockMat;

 23: PetscErrorCode MatRelax_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
 24: {
 25:   Mat_BlockMat       *a = (Mat_BlockMat*)A->data;
 26:   PetscScalar        *x;
 27:   const Mat          *v = a->a;
 28:   const PetscScalar  *b;
 29:   PetscErrorCode     ierr;
 30:   PetscInt           n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
 31:   const PetscInt     *idx;
 32:   IS                 row,col;
 33:   MatFactorInfo      info;
 34:   Vec                left = a->left,right = a->right, middle = a->middle;
 35:   Mat                *diag;

 38:   its = its*lits;
 39:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
 40:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
 41:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
 42:   if (fshift) SETERRQ(PETSC_ERR_SUP,"No support yet for fshift");
 43:   if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP))
 44:     SETERRQ(PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");

 46:   if (!a->diags) {
 47:     PetscMalloc(mbs*sizeof(Mat),&a->diags);
 48:     MatFactorInfoInitialize(&info);
 49:     for (i=0; i<mbs; i++) {
 50:       MatGetOrdering(a->a[a->diag[i]], MATORDERING_ND,&row,&col);
 51:       MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info);
 52:       MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
 53:       ISDestroy(row);
 54:       ISDestroy(col);
 55:     }
 56:     VecDuplicate(bb,&a->workb);
 57:   }
 58:   diag    = a->diags;

 60:   VecSet(xx,0.0);
 61:   VecGetArray(xx,&x);
 62:   /* copy right hand side because it must be modified during iteration */
 63:   VecCopy(bb,a->workb);
 64:   VecGetArray(a->workb,(PetscScalar**)&b);

 66:   /* need to add code for when initial guess is zero, see MatRelax_SeqAIJ */
 67:   while (its--) {
 68:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){

 70:       for (i=0; i<mbs; i++) {
 71:         n    = a->i[i+1] - a->i[i] - 1;
 72:         idx  = a->j + a->i[i] + 1;
 73:         v    = a->a + a->i[i] + 1;

 75:         VecSet(left,0.0);
 76:         for (j=0; j<n; j++) {
 77:           VecPlaceArray(right,x + idx[j]*bs);
 78:           MatMultAdd(v[j],right,left,left);
 79:           VecResetArray(right);
 80:         }
 81:         VecPlaceArray(right,b + i*bs);
 82:         VecAYPX(left,-1.0,right);
 83:         VecResetArray(right);

 85:         VecPlaceArray(right,x + i*bs);
 86:         MatSolve(diag[i],left,right);

 88:         /* now adjust right hand side, see MatRelax_SeqSBAIJ */
 89:         for (j=0; j<n; j++) {
 90:           MatMultTranspose(v[j],right,left);
 91:           VecPlaceArray(middle,b + idx[j]*bs);
 92:           VecAXPY(middle,-1.0,left);
 93:           VecResetArray(middle);
 94:         }
 95:         VecResetArray(right);

 97:       }
 98:     }
 99:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){

101:       for (i=mbs-1; i>=0; i--) {
102:         n    = a->i[i+1] - a->i[i] - 1;
103:         idx  = a->j + a->i[i] + 1;
104:         v    = a->a + a->i[i] + 1;

106:         VecSet(left,0.0);
107:         for (j=0; j<n; j++) {
108:           VecPlaceArray(right,x + idx[j]*bs);
109:           MatMultAdd(v[j],right,left,left);
110:           VecResetArray(right);
111:         }
112:         VecPlaceArray(right,b + i*bs);
113:         VecAYPX(left,-1.0,right);
114:         VecResetArray(right);

116:         VecPlaceArray(right,x + i*bs);
117:         MatSolve(diag[i],left,right);
118:         VecResetArray(right);

120:       }
121:     }
122:   }
123:   VecRestoreArray(xx,&x);
124:   VecRestoreArray(a->workb,(PetscScalar**)&b);
125:   return(0);
126: }

130: PetscErrorCode MatRelax_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
131: {
132:   Mat_BlockMat       *a = (Mat_BlockMat*)A->data;
133:   PetscScalar        *x;
134:   const Mat          *v = a->a;
135:   const PetscScalar  *b;
136:   PetscErrorCode     ierr;
137:   PetscInt           n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
138:   const PetscInt     *idx;
139:   IS                 row,col;
140:   MatFactorInfo      info;
141:   Vec                left = a->left,right = a->right;
142:   Mat                *diag;

145:   its = its*lits;
146:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
147:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
148:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
149:   if (fshift) SETERRQ(PETSC_ERR_SUP,"No support yet for fshift");

151:   if (!a->diags) {
152:     PetscMalloc(mbs*sizeof(Mat),&a->diags);
153:     MatFactorInfoInitialize(&info);
154:     for (i=0; i<mbs; i++) {
155:       MatGetOrdering(a->a[a->diag[i]], MATORDERING_ND,&row,&col);
156:       MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info);
157:       MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
158:       ISDestroy(row);
159:       ISDestroy(col);
160:     }
161:   }
162:   diag = a->diags;

164:   VecSet(xx,0.0);
165:   VecGetArray(xx,&x);
166:   VecGetArray(bb,(PetscScalar**)&b);

168:   /* need to add code for when initial guess is zero, see MatRelax_SeqAIJ */
169:   while (its--) {
170:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){

172:       for (i=0; i<mbs; i++) {
173:         n    = a->i[i+1] - a->i[i];
174:         idx  = a->j + a->i[i];
175:         v    = a->a + a->i[i];

177:         VecSet(left,0.0);
178:         for (j=0; j<n; j++) {
179:           if (idx[j] != i) {
180:             VecPlaceArray(right,x + idx[j]*bs);
181:             MatMultAdd(v[j],right,left,left);
182:             VecResetArray(right);
183:           }
184:         }
185:         VecPlaceArray(right,b + i*bs);
186:         VecAYPX(left,-1.0,right);
187:         VecResetArray(right);

189:         VecPlaceArray(right,x + i*bs);
190:         MatSolve(diag[i],left,right);
191:         VecResetArray(right);
192:       }
193:     }
194:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){

196:       for (i=mbs-1; i>=0; i--) {
197:         n    = a->i[i+1] - a->i[i];
198:         idx  = a->j + a->i[i];
199:         v    = a->a + a->i[i];

201:         VecSet(left,0.0);
202:         for (j=0; j<n; j++) {
203:           if (idx[j] != i) {
204:             VecPlaceArray(right,x + idx[j]*bs);
205:             MatMultAdd(v[j],right,left,left);
206:             VecResetArray(right);
207:           }
208:         }
209:         VecPlaceArray(right,b + i*bs);
210:         VecAYPX(left,-1.0,right);
211:         VecResetArray(right);

213:         VecPlaceArray(right,x + i*bs);
214:         MatSolve(diag[i],left,right);
215:         VecResetArray(right);

217:       }
218:     }
219:   }
220:   VecRestoreArray(xx,&x);
221:   VecRestoreArray(bb,(PetscScalar**)&b);
222:   return(0);
223: }

227: PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
228: {
229:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
230:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
231:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
232:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
234:   PetscInt       ridx,cidx;
235:   PetscTruth     roworiented=a->roworiented;
236:   MatScalar      value;
237:   Mat            *ap,*aa = a->a;

240:   for (k=0; k<m; k++) { /* loop over added rows */
241:     row  = im[k];
242:     brow = row/bs;
243:     if (row < 0) continue;
244: #if defined(PETSC_USE_DEBUG)  
245:     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
246: #endif
247:     rp   = aj + ai[brow];
248:     ap   = aa + ai[brow];
249:     rmax = imax[brow];
250:     nrow = ailen[brow];
251:     low  = 0;
252:     high = nrow;
253:     for (l=0; l<n; l++) { /* loop over added columns */
254:       if (in[l] < 0) continue;
255: #if defined(PETSC_USE_DEBUG)  
256:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
257: #endif
258:       col = in[l]; bcol = col/bs;
259:       if (A->symmetric && brow > bcol) continue;
260:       ridx = row % bs; cidx = col % bs;
261:       if (roworiented) {
262:         value = v[l + k*n];
263:       } else {
264:         value = v[k + l*m];
265:       }
266:       if (col <= lastcol) low = 0; else high = nrow;
267:       lastcol = col;
268:       while (high-low > 7) {
269:         t = (low+high)/2;
270:         if (rp[t] > bcol) high = t;
271:         else              low  = t;
272:       }
273:       for (i=low; i<high; i++) {
274:         if (rp[i] > bcol) break;
275:         if (rp[i] == bcol) {
276:           goto noinsert1;
277:         }
278:       }
279:       if (nonew == 1) goto noinsert1;
280:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
281:       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
282:       N = nrow++ - 1; high++;
283:       /* shift up all the later entries in this row */
284:       for (ii=N; ii>=i; ii--) {
285:         rp[ii+1] = rp[ii];
286:         ap[ii+1] = ap[ii];
287:       }
288:       if (N>=i) ap[i] = 0;
289:       rp[i]           = bcol;
290:       a->nz++;
291:       noinsert1:;
292:       if (!*(ap+i)) {
293:         MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,0,ap+i);
294:       }
295:       MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is);
296:       low = i;
297:     }
298:     ailen[brow] = nrow;
299:   }
300:   A->same_nonzero = PETSC_FALSE;
301:   return(0);
302: }

306: PetscErrorCode MatLoad_BlockMat(PetscViewer viewer, const MatType type,Mat *A)
307: {
308:   PetscErrorCode    ierr;
309:   Mat               tmpA;
310:   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
311:   const PetscInt    *cols;
312:   const PetscScalar *values;
313:   PetscTruth        flg,notdone;
314:   Mat_SeqAIJ        *a;
315:   Mat_BlockMat      *amat;

318:   MatLoad_SeqAIJ(viewer,MATSEQAIJ,&tmpA);

320:   MatGetLocalSize(tmpA,&m,&n);
321:   PetscOptionsBegin(PETSC_COMM_SELF,PETSC_NULL,"Options for loading BlockMat matrix 1","Mat");
322:     PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
323:     PetscOptionsName("-matload_symmetric","Store the matrix as symmetric","MatLoad",&flg);
324:   PetscOptionsEnd();

326:   /* Determine number of nonzero blocks for each block row */
327:   a    = (Mat_SeqAIJ*) tmpA->data;
328:   mbs  = m/bs;
329:   PetscMalloc3(mbs,PetscInt,&lens,bs,PetscInt*,&ii,bs,PetscInt,&ilens);
330:   PetscMemzero(lens,mbs*sizeof(PetscInt));

332:   for (i=0; i<mbs; i++) {
333:     for (j=0; j<bs; j++) {
334:       ii[j]         = a->j + a->i[i*bs + j];
335:       ilens[j]      = a->i[i*bs + j + 1] - a->i[i*bs + j];
336:     }

338:     currentcol = -1;
339:     notdone = PETSC_TRUE;
340:     while (PETSC_TRUE) {
341:       notdone = PETSC_FALSE;
342:       nextcol = 1000000000;
343:       for (j=0; j<bs; j++) {
344:         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
345:           ii[j]++;
346:           ilens[j]--;
347:         }
348:         if (ilens[j] > 0) {
349:           notdone = PETSC_TRUE;
350:           nextcol = PetscMin(nextcol,ii[j][0]/bs);
351:         }
352:       }
353:       if (!notdone) break;
354:       if (!flg || (nextcol >= i)) lens[i]++;
355:       currentcol = nextcol;
356:     }
357:   }

359:   MatCreateBlockMat(PETSC_COMM_SELF,m,n,bs,0,lens,A);
360:   if (flg) {
361:     MatSetOption(*A,MAT_SYMMETRIC,PETSC_TRUE);
362:   }
363:   amat = (Mat_BlockMat*)(*A)->data;

365:   /* preallocate the submatrices */
366:   PetscMalloc(bs*sizeof(PetscInt),&llens);
367:   for (i=0; i<mbs; i++) { /* loops for block rows */
368:     for (j=0; j<bs; j++) {
369:       ii[j]         = a->j + a->i[i*bs + j];
370:       ilens[j]      = a->i[i*bs + j + 1] - a->i[i*bs + j];
371:     }

373:     currentcol = 1000000000;
374:     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
375:       if (ilens[j] > 0) {
376:         currentcol = PetscMin(currentcol,ii[j][0]/bs);
377:       }
378:     }

380:     notdone = PETSC_TRUE;
381:     while (PETSC_TRUE) {  /* loops over blocks in block row */

383:       notdone = PETSC_FALSE;
384:       nextcol = 1000000000;
385:       PetscMemzero(llens,bs*sizeof(PetscInt));
386:       for (j=0; j<bs; j++) { /* loop over rows in block */
387:         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
388:           ii[j]++;
389:           ilens[j]--;
390:           llens[j]++;
391:         }
392:         if (ilens[j] > 0) {
393:           notdone = PETSC_TRUE;
394:           nextcol = PetscMin(nextcol,ii[j][0]/bs);
395:         }
396:       }
397:       if (cnt >= amat->maxnz) SETERRQ1(PETSC_ERR_PLIB,"Number of blocks found greater than expected %D",cnt);
398:       if (!flg || currentcol >= i) {
399:         amat->j[cnt] = currentcol;
400:         MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++);
401:       }

403:       if (!notdone) break;
404:       currentcol = nextcol;
405:     }
406:     amat->ilen[i] = lens[i];
407:   }
408:   CHKMEMQ;

410:   PetscFree3(lens,ii,ilens);
411:   PetscFree(llens);

413:   /* copy over the matrix, one row at a time */
414:   for (i=0; i<m; i++) {
415:     MatGetRow(tmpA,i,&ncols,&cols,&values);
416:     MatSetValues(*A,1,&i,ncols,cols,values,INSERT_VALUES);
417:     MatRestoreRow(tmpA,i,&ncols,&cols,&values);
418:   }
419:   MatAssemblyBegin(*A,MAT_FINAL_ASSEMBLY);
420:   MatAssemblyEnd(*A,MAT_FINAL_ASSEMBLY);
421:   return(0);
422: }

426: PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
427: {
428:   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
429:   PetscErrorCode    ierr;
430:   const char        *name;
431:   PetscViewerFormat format;

434:   PetscObjectGetName((PetscObject)A,&name);
435:   PetscViewerGetFormat(viewer,&format);
436:   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
437:     PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %D \n",a->nz);
438:     if (A->symmetric) {
439:       PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n");
440:     }
441:   }
442:   return(0);
443: }

447: PetscErrorCode MatDestroy_BlockMat(Mat mat)
448: {
450:   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
451:   PetscInt       i;

454:   if (bmat->right) {
455:     VecDestroy(bmat->right);
456:   }
457:   if (bmat->left) {
458:     VecDestroy(bmat->left);
459:   }
460:   if (bmat->middle) {
461:     VecDestroy(bmat->middle);
462:   }
463:   if (bmat->workb) {
464:     VecDestroy(bmat->workb);
465:   }
466:   if (bmat->diags) {
467:     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
468:       if (bmat->diags[i]) {MatDestroy(bmat->diags[i]);}
469:     }
470:   }
471:   if (bmat->a) {
472:     for (i=0; i<bmat->nz; i++) {
473:       if (bmat->a[i]) {MatDestroy(bmat->a[i]);}
474:     }
475:   }
476:   MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
477:   PetscFree(bmat);
478:   return(0);
479: }

483: PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
484: {
485:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
487:   PetscScalar    *xx,*yy;
488:   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
489:   Mat            *aa;

492:   CHKMEMQ;
493:   /*
494:      Standard CSR multiply except each entry is a Mat
495:   */
496:   VecGetArray(x,&xx);

498:   VecSet(y,0.0);
499:   VecGetArray(y,&yy);
500:   aj  = bmat->j;
501:   aa  = bmat->a;
502:   ii  = bmat->i;
503:   for (i=0; i<m; i++) {
504:     jrow = ii[i];
505:     VecPlaceArray(bmat->left,yy + bs*i);
506:     n    = ii[i+1] - jrow;
507:     for (j=0; j<n; j++) {
508:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
509:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
510:       VecResetArray(bmat->right);
511:       jrow++;
512:     }
513:     VecResetArray(bmat->left);
514:   }
515:   VecRestoreArray(x,&xx);
516:   VecRestoreArray(y,&yy);
517:   CHKMEMQ;
518:   return(0);
519: }

523: PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
524: {
525:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
527:   PetscScalar    *xx,*yy;
528:   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
529:   Mat            *aa;

532:   CHKMEMQ;
533:   /*
534:      Standard CSR multiply except each entry is a Mat
535:   */
536:   VecGetArray(x,&xx);

538:   VecSet(y,0.0);
539:   VecGetArray(y,&yy);
540:   aj  = bmat->j;
541:   aa  = bmat->a;
542:   ii  = bmat->i;
543:   for (i=0; i<m; i++) {
544:     jrow = ii[i];
545:     n    = ii[i+1] - jrow;
546:     VecPlaceArray(bmat->left,yy + bs*i);
547:     VecPlaceArray(bmat->middle,xx + bs*i);
548:     /* if we ALWAYS required a diagonal entry then could remove this if test */
549:     if (aj[jrow] == i) {
550:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
551:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
552:       VecResetArray(bmat->right);
553:       jrow++;
554:       n--;
555:     }
556:     for (j=0; j<n; j++) {
557:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);            /* upper triangular part */
558:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
559:       VecResetArray(bmat->right);

561:       VecPlaceArray(bmat->right,yy + bs*aj[jrow]);            /* lower triangular part */
562:       MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right);
563:       VecResetArray(bmat->right);
564:       jrow++;
565:     }
566:     VecResetArray(bmat->left);
567:     VecResetArray(bmat->middle);
568:   }
569:   VecRestoreArray(x,&xx);
570:   VecRestoreArray(y,&yy);
571:   CHKMEMQ;
572:   return(0);
573: }

577: PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
578: {
580:   return(0);
581: }

585: PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
586: {
588:   return(0);
589: }

593: PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
594: {
596:   return(0);
597: }

599: /*
600:      Adds diagonal pointers to sparse matrix structure.
601: */
604: PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
605: {
606:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
608:   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;

611:   if (!a->diag) {
612:     PetscMalloc(mbs*sizeof(PetscInt),&a->diag);
613:   }
614:   for (i=0; i<mbs; i++) {
615:     a->diag[i] = a->i[i+1];
616:     for (j=a->i[i]; j<a->i[i+1]; j++) {
617:       if (a->j[j] == i) {
618:         a->diag[i] = j;
619:         break;
620:       }
621:     }
622:   }
623:   return(0);
624: }

628: PetscErrorCode MatGetSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,PetscInt csize,MatReuse scall,Mat *B)
629: {
630:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
631:   Mat_SeqAIJ     *c;
633:   PetscInt       i,k,first,step,lensi,nrows,ncols;
634:   PetscInt       *j_new,*i_new,*aj = a->j,*ai = a->i,ii,*ailen = a->ilen;
635:   PetscScalar    *a_new;
636:   Mat            C,*aa = a->a;
637:   PetscTruth     stride,equal;

640:   ISEqual(isrow,iscol,&equal);
641:   if (!equal) SETERRQ(PETSC_ERR_SUP,"Only for idential column and row indices");
642:   ISStride(iscol,&stride);
643:   if (!stride) SETERRQ(PETSC_ERR_SUP,"Only for stride indices");
644:   ISStrideGetInfo(iscol,&first,&step);
645:   if (step != A->rmap->bs) SETERRQ(PETSC_ERR_SUP,"Can only select one entry from each block");

647:   ISGetLocalSize(isrow,&nrows);
648:   ncols = nrows;

650:   /* create submatrix */
651:   if (scall == MAT_REUSE_MATRIX) {
652:     PetscInt n_cols,n_rows;
653:     C = *B;
654:     MatGetSize(C,&n_rows,&n_cols);
655:     if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
656:     MatZeroEntries(C);
657:   } else {
658:     MatCreate(((PetscObject)A)->comm,&C);
659:     MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE);
660:     if (A->symmetric) {
661:       MatSetType(C,MATSEQSBAIJ);
662:     } else {
663:       MatSetType(C,MATSEQAIJ);
664:     }
665:     MatSeqAIJSetPreallocation(C,0,ailen);
666:     MatSeqSBAIJSetPreallocation(C,1,0,ailen);
667:   }
668:   c = (Mat_SeqAIJ*)C->data;
669: 
670:   /* loop over rows inserting into submatrix */
671:   a_new    = c->a;
672:   j_new    = c->j;
673:   i_new    = c->i;
674: 
675:   for (i=0; i<nrows; i++) {
676:     ii    = ai[i];
677:     lensi = ailen[i];
678:     for (k=0; k<lensi; k++) {
679:       *j_new++ = *aj++;
680:       MatGetValue(*aa++,first,first,a_new++);
681:     }
682:     i_new[i+1]  = i_new[i] + lensi;
683:     c->ilen[i]  = lensi;
684:   }

686:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
687:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
688:   *B = C;
689:   return(0);
690: }

694: PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
695: {
696:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
698:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
699:   PetscInt       m = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
700:   Mat            *aa = a->a,*ap;

703:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

705:   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
706:   for (i=1; i<m; i++) {
707:     /* move each row back by the amount of empty slots (fshift) before it*/
708:     fshift += imax[i-1] - ailen[i-1];
709:     rmax   = PetscMax(rmax,ailen[i]);
710:     if (fshift) {
711:       ip = aj + ai[i] ;
712:       ap = aa + ai[i] ;
713:       N  = ailen[i];
714:       for (j=0; j<N; j++) {
715:         ip[j-fshift] = ip[j];
716:         ap[j-fshift] = ap[j];
717:       }
718:     }
719:     ai[i] = ai[i-1] + ailen[i-1];
720:   }
721:   if (m) {
722:     fshift += imax[m-1] - ailen[m-1];
723:     ai[m]  = ai[m-1] + ailen[m-1];
724:   }
725:   /* reset ilen and imax for each row */
726:   for (i=0; i<m; i++) {
727:     ailen[i] = imax[i] = ai[i+1] - ai[i];
728:   }
729:   a->nz = ai[m];
730:   for (i=0; i<a->nz; i++) {
731: #if defined(PETSC_USE_DEBUG)
732:     if (!aa[i]) SETERRQ3(PETSC_ERR_PLIB,"Null matrix at location %D column %D nz %D",i,aj[i],a->nz);
733: #endif
734:     MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY);
735:     MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY);
736:   }
737:   CHKMEMQ;
738:   PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz);
739:   PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);
740:   PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax);
741:   a->reallocs          = 0;
742:   A->info.nz_unneeded  = (double)fshift;
743:   a->rmax              = rmax;

745:   A->same_nonzero = PETSC_TRUE;
746:   MatMarkDiagonal_BlockMat(A);
747:   return(0);
748: }

752: PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscTruth flg)
753: {
755:   if (opt == MAT_SYMMETRIC && flg) {
756:     A->ops->relax = MatRelax_BlockMat_Symmetric;
757:     A->ops->mult  = MatMult_BlockMat_Symmetric;
758:   } else {
759:     PetscInfo1(A,"Unused matrix option %s\n",MatOptions[opt]);
760:   }
761:   return(0);
762: }


765: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
766:        0,
767:        0,
768:        MatMult_BlockMat,
769: /* 4*/ MatMultAdd_BlockMat,
770:        MatMultTranspose_BlockMat,
771:        MatMultTransposeAdd_BlockMat,
772:        0,
773:        0,
774:        0,
775: /*10*/ 0,
776:        0,
777:        0,
778:        MatRelax_BlockMat,
779:        0,
780: /*15*/ 0,
781:        0,
782:        0,
783:        0,
784:        0,
785: /*20*/ 0,
786:        MatAssemblyEnd_BlockMat,
787:        0,
788:        MatSetOption_BlockMat,
789:        0,
790: /*25*/ 0,
791:        0,
792:        0,
793:        0,
794:        0,
795: /*30*/ 0,
796:        0,
797:        0,
798:        0,
799:        0,
800: /*35*/ 0,
801:        0,
802:        0,
803:        0,
804:        0,
805: /*40*/ 0,
806:        0,
807:        0,
808:        0,
809:        0,
810: /*45*/ 0,
811:        0,
812:        0,
813:        0,
814:        0,
815: /*50*/ 0,
816:        0,
817:        0,
818:        0,
819:        0,
820: /*55*/ 0,
821:        0,
822:        0,
823:        0,
824:        0,
825: /*60*/ MatGetSubMatrix_BlockMat,
826:        MatDestroy_BlockMat,
827:        MatView_BlockMat,
828:        0,
829:        0,
830: /*65*/ 0,
831:        0,
832:        0,
833:        0,
834:        0,
835: /*70*/ 0,
836:        0,
837:        0,
838:        0,
839:        0,
840: /*75*/ 0,
841:        0,
842:        0,
843:        0,
844:        0,
845: /*80*/ 0,
846:        0,
847:        0,
848:        0,
849:        MatLoad_BlockMat,
850: /*85*/ 0,
851:        0,
852:        0,
853:        0,
854:        0,
855: /*90*/ 0,
856:        0,
857:        0,
858:        0,
859:        0,
860: /*95*/ 0,
861:        0,
862:        0,
863:        0};

867: /*@C
868:    MatBlockMatSetPreallocation - For good matrix assembly performance
869:    the user should preallocate the matrix storage by setting the parameter nz
870:    (or the array nnz).  By setting these parameters accurately, performance
871:    during matrix assembly can be increased by more than a factor of 50.

873:    Collective on MPI_Comm

875:    Input Parameters:
876: +  B - The matrix
877: .  bs - size of each block in matrix
878: .  nz - number of nonzeros per block row (same for all rows)
879: -  nnz - array containing the number of nonzeros in the various block rows 
880:          (possibly different for each row) or PETSC_NULL

882:    Notes:
883:      If nnz is given then nz is ignored

885:    Specify the preallocated storage with either nz or nnz (not both).
886:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
887:    allocation.  For large problems you MUST preallocate memory or you 
888:    will get TERRIBLE performance, see the users' manual chapter on matrices.

890:    Level: intermediate

892: .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues()

894: @*/
895: PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
896: {
897:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);

900:   PetscObjectQueryFunction((PetscObject)B,"MatBlockMatSetPreallocation_C",(void (**)(void))&f);
901:   if (f) {
902:     (*f)(B,bs,nz,nnz);
903:   }
904:   return(0);
905: }

910: PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
911: {
912:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
914:   PetscInt       i;

917:   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Block size given %D must be great than zero",bs);
918:   if (A->rmap->n % bs) SETERRQ2(PETSC_ERR_ARG_INCOMP,"Blocksize %D does not divide number of rows %D",bs,A->rmap->n);
919:   if (A->cmap->n % bs) SETERRQ2(PETSC_ERR_ARG_INCOMP,"Blocksize %D does not divide number of columns %D",bs,A->cmap->n);
920:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
921:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
922:   if (nnz) {
923:     for (i=0; i<A->rmap->n/bs; i++) {
924:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
925:       if (nnz[i] > A->cmap->n/bs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %d value %d rowlength %d",i,nnz[i],A->cmap->n/bs);
926:     }
927:   }
928:   A->rmap->bs = A->cmap->bs = bs;
929:   bmat->mbs  = A->rmap->n/bs;

931:   VecCreateSeqWithArray(PETSC_COMM_SELF,bs,PETSC_NULL,&bmat->right);
932:   VecCreateSeqWithArray(PETSC_COMM_SELF,bs,PETSC_NULL,&bmat->middle);
933:   VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left);

935:   if (!bmat->imax) {
936:     PetscMalloc2(A->rmap->n,PetscInt,&bmat->imax,A->rmap->n,PetscInt,&bmat->ilen);
937:     PetscLogObjectMemory(A,2*A->rmap->n*sizeof(PetscInt));
938:   }
939:   if (nnz) {
940:     nz = 0;
941:     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
942:       bmat->imax[i] = nnz[i];
943:       nz           += nnz[i];
944:     }
945:   } else {
946:     SETERRQ(PETSC_ERR_SUP,"Currently requires block row by row preallocation");
947:   }

949:   /* bmat->ilen will count nonzeros in each row so far. */
950:   for (i=0; i<bmat->mbs; i++) { bmat->ilen[i] = 0;}

952:   /* allocate the matrix space */
953:   MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
954:   PetscMalloc3(nz,Mat,&bmat->a,nz,PetscInt,&bmat->j,A->rmap->n+1,PetscInt,&bmat->i);
955:   PetscLogObjectMemory(A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)));
956:   bmat->i[0] = 0;
957:   for (i=1; i<bmat->mbs+1; i++) {
958:     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
959:   }
960:   bmat->singlemalloc = PETSC_TRUE;
961:   bmat->free_a       = PETSC_TRUE;
962:   bmat->free_ij      = PETSC_TRUE;

964:   bmat->nz                = 0;
965:   bmat->maxnz             = nz;
966:   A->info.nz_unneeded  = (double)bmat->maxnz;

968:   return(0);
969: }

972: /*MC
973:    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
974:                  consisting of (usually) sparse blocks.

976:   Level: advanced

978: .seealso: MatCreateBlockMat()

980: M*/

985: PetscErrorCode  MatCreate_BlockMat(Mat A)
986: {
987:   Mat_BlockMat   *b;

991:   PetscNewLog(A,Mat_BlockMat,&b);
992:   A->data = (void*)b;
993:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));

995:   PetscMapSetBlockSize(A->rmap,1);
996:   PetscMapSetBlockSize(A->cmap,1);
997:   PetscMapSetUp(A->rmap);
998:   PetscMapSetUp(A->cmap);

1000:   A->assembled     = PETSC_TRUE;
1001:   A->preallocated  = PETSC_FALSE;
1002:   PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT);

1004:   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatBlockMatSetPreallocation_C",
1005:                                      "MatBlockMatSetPreallocation_BlockMat",
1006:                                       MatBlockMatSetPreallocation_BlockMat);

1008:   return(0);
1009: }

1014: /*@C
1015:    MatCreateBlockMat - Creates a new matrix based sparse Mat storage

1017:   Collective on MPI_Comm

1019:    Input Parameters:
1020: +  comm - MPI communicator
1021: .  m - number of rows
1022: .  n  - number of columns
1023: .  bs - size of each submatrix
1024: .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
1025: -  nnz - expected number of nonzers per block row if known (use PETSC_NULL otherwise)


1028:    Output Parameter:
1029: .  A - the matrix

1031:    Level: intermediate

1033:    PETSc requires that matrices and vectors being used for certain
1034:    operations are partitioned accordingly.  For example, when
1035:    creating a bmat matrix, A, that supports parallel matrix-vector
1036:    products using MatMult(A,x,y) the user should set the number
1037:    of local matrix rows to be the number of local elements of the
1038:    corresponding result vector, y. Note that this is information is
1039:    required for use of the matrix interface routines, even though
1040:    the bmat matrix may not actually be physically partitioned.
1041:    For example,

1043: .keywords: matrix, bmat, create

1045: .seealso: MATBLOCKMAT
1046: @*/
1047: PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
1048: {

1052:   MatCreate(comm,A);
1053:   MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
1054:   MatSetType(*A,MATBLOCKMAT);
1055:   MatBlockMatSetPreallocation(*A,bs,nz,nnz);
1056:   return(0);
1057: }