Actual source code: mpibaij.c

  1: #define PETSCMAT_DLL

 3:  #include ../src/mat/impls/baij/mpi/mpibaij.h

  5: EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
  6: EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
  7: EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
  8: EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
  9: EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
 10: EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
 11: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
 12: EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
 13: EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
 14: EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);

 18: PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
 19: {
 20:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
 22:   PetscInt       i,*idxb = 0;
 23:   PetscScalar    *va,*vb;
 24:   Vec            vtmp;

 27:   MatGetRowMaxAbs(a->A,v,idx);
 28:   VecGetArray(v,&va);
 29:   if (idx) {
 30:     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
 31:   }

 33:   VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
 34:   if (idx) {PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);}
 35:   MatGetRowMaxAbs(a->B,vtmp,idxb);
 36:   VecGetArray(vtmp,&vb);

 38:   for (i=0; i<A->rmap->n; i++){
 39:     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
 40:   }

 42:   VecRestoreArray(v,&va);
 43:   VecRestoreArray(vtmp,&vb);
 44:   if (idxb) {PetscFree(idxb);}
 45:   VecDestroy(vtmp);
 46:   return(0);
 47: }

 52: PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
 53: {
 54:   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;

 58:   MatStoreValues(aij->A);
 59:   MatStoreValues(aij->B);
 60:   return(0);
 61: }

 67: PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
 68: {
 69:   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;

 73:   MatRetrieveValues(aij->A);
 74:   MatRetrieveValues(aij->B);
 75:   return(0);
 76: }

 79: /* 
 80:      Local utility routine that creates a mapping from the global column 
 81:    number to the local number in the off-diagonal part of the local 
 82:    storage of the matrix.  This is done in a non scable way since the 
 83:    length of colmap equals the global matrix length. 
 84: */
 87: PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
 88: {
 89:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
 90:   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
 92:   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;

 95: #if defined (PETSC_USE_CTABLE)
 96:   PetscTableCreate(baij->nbs,&baij->colmap);
 97:   for (i=0; i<nbs; i++){
 98:     PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);
 99:   }
100: #else
101:   PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);
102:   PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));
103:   PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));
104:   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105: #endif
106:   return(0);
107: }

109: #define CHUNKSIZE  10

111: #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
112: { \
113:  \
114:     brow = row/bs;  \
115:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
116:     rmax = aimax[brow]; nrow = ailen[brow]; \
117:       bcol = col/bs; \
118:       ridx = row % bs; cidx = col % bs; \
119:       low = 0; high = nrow; \
120:       while (high-low > 3) { \
121:         t = (low+high)/2; \
122:         if (rp[t] > bcol) high = t; \
123:         else              low  = t; \
124:       } \
125:       for (_i=low; _i<high; _i++) { \
126:         if (rp[_i] > bcol) break; \
127:         if (rp[_i] == bcol) { \
128:           bap  = ap +  bs2*_i + bs*cidx + ridx; \
129:           if (addv == ADD_VALUES) *bap += value;  \
130:           else                    *bap  = value;  \
131:           goto a_noinsert; \
132:         } \
133:       } \
134:       if (a->nonew == 1) goto a_noinsert; \
135:       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
136:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
137:       N = nrow++ - 1;  \
138:       /* shift up all the later entries in this row */ \
139:       for (ii=N; ii>=_i; ii--) { \
140:         rp[ii+1] = rp[ii]; \
141:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
142:       } \
143:       if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); }  \
144:       rp[_i]                      = bcol;  \
145:       ap[bs2*_i + bs*cidx + ridx] = value;  \
146:       a_noinsert:; \
147:     ailen[brow] = nrow; \
148: } 

150: #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
151: { \
152:     brow = row/bs;  \
153:     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
154:     rmax = bimax[brow]; nrow = bilen[brow]; \
155:       bcol = col/bs; \
156:       ridx = row % bs; cidx = col % bs; \
157:       low = 0; high = nrow; \
158:       while (high-low > 3) { \
159:         t = (low+high)/2; \
160:         if (rp[t] > bcol) high = t; \
161:         else              low  = t; \
162:       } \
163:       for (_i=low; _i<high; _i++) { \
164:         if (rp[_i] > bcol) break; \
165:         if (rp[_i] == bcol) { \
166:           bap  = ap +  bs2*_i + bs*cidx + ridx; \
167:           if (addv == ADD_VALUES) *bap += value;  \
168:           else                    *bap  = value;  \
169:           goto b_noinsert; \
170:         } \
171:       } \
172:       if (b->nonew == 1) goto b_noinsert; \
173:       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
174:       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
175:       CHKMEMQ;\
176:       N = nrow++ - 1;  \
177:       /* shift up all the later entries in this row */ \
178:       for (ii=N; ii>=_i; ii--) { \
179:         rp[ii+1] = rp[ii]; \
180:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
181:       } \
182:       if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));}  \
183:       rp[_i]                      = bcol;  \
184:       ap[bs2*_i + bs*cidx + ridx] = value;  \
185:       b_noinsert:; \
186:     bilen[brow] = nrow; \
187: } 

191: PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
192: {
193:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
194:   MatScalar      value;
195:   PetscTruth     roworiented = baij->roworiented;
197:   PetscInt       i,j,row,col;
198:   PetscInt       rstart_orig=mat->rmap->rstart;
199:   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
200:   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;

202:   /* Some Variables required in the macro */
203:   Mat            A = baij->A;
204:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
205:   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
206:   MatScalar      *aa=a->a;

208:   Mat            B = baij->B;
209:   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
210:   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
211:   MatScalar      *ba=b->a;

213:   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
214:   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
215:   MatScalar      *ap,*bap;

218:   for (i=0; i<m; i++) {
219:     if (im[i] < 0) continue;
220: #if defined(PETSC_USE_DEBUG)
221:     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222: #endif
223:     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224:       row = im[i] - rstart_orig;
225:       for (j=0; j<n; j++) {
226:         if (in[j] >= cstart_orig && in[j] < cend_orig){
227:           col = in[j] - cstart_orig;
228:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229:           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230:           /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
231:         } else if (in[j] < 0) continue;
232: #if defined(PETSC_USE_DEBUG)
233:         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
234: #endif
235:         else {
236:           if (mat->was_assembled) {
237:             if (!baij->colmap) {
238:               CreateColmap_MPIBAIJ_Private(mat);
239:             }
240: #if defined (PETSC_USE_CTABLE)
241:             PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
242:             col  = col - 1;
243: #else
244:             col = baij->colmap[in[j]/bs] - 1;
245: #endif
246:             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247:               DisAssemble_MPIBAIJ(mat);
248:               col =  in[j];
249:               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250:               B = baij->B;
251:               b = (Mat_SeqBAIJ*)(B)->data;
252:               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253:               ba=b->a;
254:             } else col += in[j]%bs;
255:           } else col = in[j];
256:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257:           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258:           /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
259:         }
260:       }
261:     } else {
262:       if (!baij->donotstash) {
263:         if (roworiented) {
264:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
265:         } else {
266:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
267:         }
268:       }
269:     }
270:   }
271:   return(0);
272: }

276: PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
277: {
278:   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
279:   const PetscScalar *value;
280:   MatScalar         *barray=baij->barray;
281:   PetscTruth        roworiented = baij->roworiented;
282:   PetscErrorCode    ierr;
283:   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
284:   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
285:   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
286: 
288:   if(!barray) {
289:     PetscMalloc(bs2*sizeof(MatScalar),&barray);
290:     baij->barray = barray;
291:   }

293:   if (roworiented) {
294:     stepval = (n-1)*bs;
295:   } else {
296:     stepval = (m-1)*bs;
297:   }
298:   for (i=0; i<m; i++) {
299:     if (im[i] < 0) continue;
300: #if defined(PETSC_USE_DEBUG)
301:     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302: #endif
303:     if (im[i] >= rstart && im[i] < rend) {
304:       row = im[i] - rstart;
305:       for (j=0; j<n; j++) {
306:         /* If NumCol = 1 then a copy is not required */
307:         if ((roworiented) && (n == 1)) {
308:           barray = (MatScalar*)v + i*bs2;
309:         } else if((!roworiented) && (m == 1)) {
310:           barray = (MatScalar*)v + j*bs2;
311:         } else { /* Here a copy is required */
312:           if (roworiented) {
313:             value = v + i*(stepval+bs)*bs + j*bs;
314:           } else {
315:             value = v + j*(stepval+bs)*bs + i*bs;
316:           }
317:           for (ii=0; ii<bs; ii++,value+=stepval) {
318:             for (jj=0; jj<bs; jj++) {
319:               *barray++  = *value++;
320:             }
321:           }
322:           barray -=bs2;
323:         }
324: 
325:         if (in[j] >= cstart && in[j] < cend){
326:           col  = in[j] - cstart;
327:           MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);
328:         }
329:         else if (in[j] < 0) continue;
330: #if defined(PETSC_USE_DEBUG)
331:         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
332: #endif
333:         else {
334:           if (mat->was_assembled) {
335:             if (!baij->colmap) {
336:               CreateColmap_MPIBAIJ_Private(mat);
337:             }

339: #if defined(PETSC_USE_DEBUG)
340: #if defined (PETSC_USE_CTABLE)
341:             { PetscInt data;
342:               PetscTableFind(baij->colmap,in[j]+1,&data);
343:               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
344:             }
345: #else
346:             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
347: #endif
348: #endif
349: #if defined (PETSC_USE_CTABLE)
350:             PetscTableFind(baij->colmap,in[j]+1,&col);
351:             col  = (col - 1)/bs;
352: #else
353:             col = (baij->colmap[in[j]] - 1)/bs;
354: #endif
355:             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
356:               DisAssemble_MPIBAIJ(mat);
357:               col =  in[j];
358:             }
359:           }
360:           else col = in[j];
361:           MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);
362:         }
363:       }
364:     } else {
365:       if (!baij->donotstash) {
366:         if (roworiented) {
367:           MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
368:         } else {
369:           MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
370:         }
371:       }
372:     }
373:   }
374:   return(0);
375: }

377: #define HASH_KEY 0.6180339887
378: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379: /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380: /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383: PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384: {
385:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386:   PetscTruth     roworiented = baij->roworiented;
388:   PetscInt       i,j,row,col;
389:   PetscInt       rstart_orig=mat->rmap->rstart;
390:   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391:   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392:   PetscReal      tmp;
393:   MatScalar      **HD = baij->hd,value;
394: #if defined(PETSC_USE_DEBUG)
395:   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396: #endif


400:   for (i=0; i<m; i++) {
401: #if defined(PETSC_USE_DEBUG)
402:     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403:     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404: #endif
405:       row = im[i];
406:     if (row >= rstart_orig && row < rend_orig) {
407:       for (j=0; j<n; j++) {
408:         col = in[j];
409:         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410:         /* Look up PetscInto the Hash Table */
411:         key = (row/bs)*Nbs+(col/bs)+1;
412:         h1  = HASH(size,key,tmp);

414: 
415:         idx = h1;
416: #if defined(PETSC_USE_DEBUG)
417:         insert_ct++;
418:         total_ct++;
419:         if (HT[idx] != key) {
420:           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421:           if (idx == size) {
422:             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423:             if (idx == h1) {
424:               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425:             }
426:           }
427:         }
428: #else
429:         if (HT[idx] != key) {
430:           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431:           if (idx == size) {
432:             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433:             if (idx == h1) {
434:               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435:             }
436:           }
437:         }
438: #endif
439:         /* A HASH table entry is found, so insert the values at the correct address */
440:         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441:         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442:       }
443:     } else {
444:       if (!baij->donotstash) {
445:         if (roworiented) {
446:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
447:         } else {
448:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
449:         }
450:       }
451:     }
452:   }
453: #if defined(PETSC_USE_DEBUG)
454:   baij->ht_total_ct = total_ct;
455:   baij->ht_insert_ct = insert_ct;
456: #endif
457:   return(0);
458: }

462: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463: {
464:   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465:   PetscTruth        roworiented = baij->roworiented;
466:   PetscErrorCode    ierr;
467:   PetscInt          i,j,ii,jj,row,col;
468:   PetscInt          rstart=baij->rstartbs;
469:   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470:   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471:   PetscReal         tmp;
472:   MatScalar         **HD = baij->hd,*baij_a;
473:   const PetscScalar *v_t,*value;
474: #if defined(PETSC_USE_DEBUG)
475:   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476: #endif
477: 

480:   if (roworiented) {
481:     stepval = (n-1)*bs;
482:   } else {
483:     stepval = (m-1)*bs;
484:   }
485:   for (i=0; i<m; i++) {
486: #if defined(PETSC_USE_DEBUG)
487:     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488:     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489: #endif
490:     row   = im[i];
491:     v_t   = v + i*nbs2;
492:     if (row >= rstart && row < rend) {
493:       for (j=0; j<n; j++) {
494:         col = in[j];

496:         /* Look up into the Hash Table */
497:         key = row*Nbs+col+1;
498:         h1  = HASH(size,key,tmp);
499: 
500:         idx = h1;
501: #if defined(PETSC_USE_DEBUG)
502:         total_ct++;
503:         insert_ct++;
504:        if (HT[idx] != key) {
505:           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506:           if (idx == size) {
507:             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508:             if (idx == h1) {
509:               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510:             }
511:           }
512:         }
513: #else  
514:         if (HT[idx] != key) {
515:           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516:           if (idx == size) {
517:             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518:             if (idx == h1) {
519:               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520:             }
521:           }
522:         }
523: #endif
524:         baij_a = HD[idx];
525:         if (roworiented) {
526:           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527:           /* value = v + (i*(stepval+bs)+j)*bs; */
528:           value = v_t;
529:           v_t  += bs;
530:           if (addv == ADD_VALUES) {
531:             for (ii=0; ii<bs; ii++,value+=stepval) {
532:               for (jj=ii; jj<bs2; jj+=bs) {
533:                 baij_a[jj]  += *value++;
534:               }
535:             }
536:           } else {
537:             for (ii=0; ii<bs; ii++,value+=stepval) {
538:               for (jj=ii; jj<bs2; jj+=bs) {
539:                 baij_a[jj]  = *value++;
540:               }
541:             }
542:           }
543:         } else {
544:           value = v + j*(stepval+bs)*bs + i*bs;
545:           if (addv == ADD_VALUES) {
546:             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547:               for (jj=0; jj<bs; jj++) {
548:                 baij_a[jj]  += *value++;
549:               }
550:             }
551:           } else {
552:             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553:               for (jj=0; jj<bs; jj++) {
554:                 baij_a[jj]  = *value++;
555:               }
556:             }
557:           }
558:         }
559:       }
560:     } else {
561:       if (!baij->donotstash) {
562:         if (roworiented) {
563:           MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
564:         } else {
565:           MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
566:         }
567:       }
568:     }
569:   }
570: #if defined(PETSC_USE_DEBUG)
571:   baij->ht_total_ct = total_ct;
572:   baij->ht_insert_ct = insert_ct;
573: #endif
574:   return(0);
575: }

579: PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580: {
581:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583:   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584:   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;

587:   for (i=0; i<m; i++) {
588:     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589:     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590:     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591:       row = idxm[i] - bsrstart;
592:       for (j=0; j<n; j++) {
593:         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594:         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595:         if (idxn[j] >= bscstart && idxn[j] < bscend){
596:           col = idxn[j] - bscstart;
597:           MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
598:         } else {
599:           if (!baij->colmap) {
600:             CreateColmap_MPIBAIJ_Private(mat);
601:           }
602: #if defined (PETSC_USE_CTABLE)
603:           PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
604:           data --;
605: #else
606:           data = baij->colmap[idxn[j]/bs]-1;
607: #endif
608:           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609:           else {
610:             col  = data + idxn[j]%bs;
611:             MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
612:           }
613:         }
614:       }
615:     } else {
616:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
617:     }
618:   }
619:  return(0);
620: }

624: PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625: {
626:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627:   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629:   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630:   PetscReal      sum = 0.0;
631:   MatScalar      *v;

634:   if (baij->size == 1) {
635:      MatNorm(baij->A,type,nrm);
636:   } else {
637:     if (type == NORM_FROBENIUS) {
638:       v = amat->a;
639:       nz = amat->nz*bs2;
640:       for (i=0; i<nz; i++) {
641: #if defined(PETSC_USE_COMPLEX)
642:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643: #else
644:         sum += (*v)*(*v); v++;
645: #endif
646:       }
647:       v = bmat->a;
648:       nz = bmat->nz*bs2;
649:       for (i=0; i<nz; i++) {
650: #if defined(PETSC_USE_COMPLEX)
651:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652: #else
653:         sum += (*v)*(*v); v++;
654: #endif
655:       }
656:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
657:       *nrm = sqrt(*nrm);
658:     } else if (type == NORM_1) { /* max column sum */
659:       PetscReal *tmp,*tmp2;
660:       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661:       PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);
662:       tmp2 = tmp + mat->cmap->N;
663:       PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
664:       v = amat->a; jj = amat->j;
665:       for (i=0; i<amat->nz; i++) {
666:         for (j=0; j<bs; j++){
667:           col = bs*(cstart + *jj) + j; /* column index */
668:           for (row=0; row<bs; row++){
669:             tmp[col] += PetscAbsScalar(*v);  v++;
670:           }
671:         }
672:         jj++;
673:       }
674:       v = bmat->a; jj = bmat->j;
675:       for (i=0; i<bmat->nz; i++) {
676:         for (j=0; j<bs; j++){
677:           col = bs*garray[*jj] + j;
678:           for (row=0; row<bs; row++){
679:             tmp[col] += PetscAbsScalar(*v); v++;
680:           }
681:         }
682:         jj++;
683:       }
684:       MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
685:       *nrm = 0.0;
686:       for (j=0; j<mat->cmap->N; j++) {
687:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688:       }
689:       PetscFree(tmp);
690:     } else if (type == NORM_INFINITY) { /* max row sum */
691:       PetscReal *sums;
692:       PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
693:       sum = 0.0;
694:       for (j=0; j<amat->mbs; j++) {
695:         for (row=0; row<bs; row++) sums[row] = 0.0;
696:         v = amat->a + bs2*amat->i[j];
697:         nz = amat->i[j+1]-amat->i[j];
698:         for (i=0; i<nz; i++) {
699:           for (col=0; col<bs; col++){
700:             for (row=0; row<bs; row++){
701:               sums[row] += PetscAbsScalar(*v); v++;
702:             }
703:           }
704:         }
705:         v = bmat->a + bs2*bmat->i[j];
706:         nz = bmat->i[j+1]-bmat->i[j];
707:         for (i=0; i<nz; i++) {
708:           for (col=0; col<bs; col++){
709:             for (row=0; row<bs; row++){
710:               sums[row] += PetscAbsScalar(*v); v++;
711:             }
712:           }
713:         }
714:         for (row=0; row<bs; row++){
715:           if (sums[row] > sum) sum = sums[row];
716:         }
717:       }
718:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);
719:       PetscFree(sums);
720:     } else {
721:       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
722:     }
723:   }
724:   return(0);
725: }

727: /*
728:   Creates the hash table, and sets the table 
729:   This table is created only once. 
730:   If new entried need to be added to the matrix
731:   then the hash table has to be destroyed and
732:   recreated.
733: */
736: PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737: {
738:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739:   Mat            A = baij->A,B=baij->B;
740:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741:   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
743:   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
744:   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745:   PetscInt       *HT,key;
746:   MatScalar      **HD;
747:   PetscReal      tmp;
748: #if defined(PETSC_USE_INFO)
749:   PetscInt       ct=0,max=0;
750: #endif

753:   baij->ht_size=(PetscInt)(factor*nz);
754:   size = baij->ht_size;

756:   if (baij->ht) {
757:     return(0);
758:   }
759: 
760:   /* Allocate Memory for Hash Table */
761:   PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);
762:   baij->ht = (PetscInt*)(baij->hd + size);
763:   HD       = baij->hd;
764:   HT       = baij->ht;


767:   PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));
768: 

770:   /* Loop Over A */
771:   for (i=0; i<a->mbs; i++) {
772:     for (j=ai[i]; j<ai[i+1]; j++) {
773:       row = i+rstart;
774:       col = aj[j]+cstart;
775: 
776:       key = row*Nbs + col + 1;
777:       h1  = HASH(size,key,tmp);
778:       for (k=0; k<size; k++){
779:         if (!HT[(h1+k)%size]) {
780:           HT[(h1+k)%size] = key;
781:           HD[(h1+k)%size] = a->a + j*bs2;
782:           break;
783: #if defined(PETSC_USE_INFO)
784:         } else {
785:           ct++;
786: #endif
787:         }
788:       }
789: #if defined(PETSC_USE_INFO)
790:       if (k> max) max = k;
791: #endif
792:     }
793:   }
794:   /* Loop Over B */
795:   for (i=0; i<b->mbs; i++) {
796:     for (j=bi[i]; j<bi[i+1]; j++) {
797:       row = i+rstart;
798:       col = garray[bj[j]];
799:       key = row*Nbs + col + 1;
800:       h1  = HASH(size,key,tmp);
801:       for (k=0; k<size; k++){
802:         if (!HT[(h1+k)%size]) {
803:           HT[(h1+k)%size] = key;
804:           HD[(h1+k)%size] = b->a + j*bs2;
805:           break;
806: #if defined(PETSC_USE_INFO)
807:         } else {
808:           ct++;
809: #endif
810:         }
811:       }
812: #if defined(PETSC_USE_INFO)
813:       if (k> max) max = k;
814: #endif
815:     }
816:   }
817: 
818:   /* Print Summary */
819: #if defined(PETSC_USE_INFO)
820:   for (i=0,j=0; i<size; i++) {
821:     if (HT[i]) {j++;}
822:   }
823:   PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
824: #endif
825:   return(0);
826: }

830: PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
831: {
832:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
834:   PetscInt       nstash,reallocs;
835:   InsertMode     addv;

838:   if (baij->donotstash) {
839:     return(0);
840:   }

842:   /* make sure all processors are either in INSERTMODE or ADDMODE */
843:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);
844:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
845:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
846:   }
847:   mat->insertmode = addv; /* in case this processor had no cache */

849:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
850:   MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
851:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
852:   PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
853:   MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);
854:   PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
855:   return(0);
856: }

860: PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
861: {
862:   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
863:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
865:   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
866:   PetscInt       *row,*col;
867:   PetscTruth     r1,r2,r3,other_disassembled;
868:   MatScalar      *val;
869:   InsertMode     addv = mat->insertmode;
870:   PetscMPIInt    n;

872:   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
874:   if (!baij->donotstash) {
875:     while (1) {
876:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
877:       if (!flg) break;

879:       for (i=0; i<n;) {
880:         /* Now identify the consecutive vals belonging to the same row */
881:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
882:         if (j < n) ncols = j-i;
883:         else       ncols = n-i;
884:         /* Now assemble all these values with a single function call */
885:         MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
886:         i = j;
887:       }
888:     }
889:     MatStashScatterEnd_Private(&mat->stash);
890:     /* Now process the block-stash. Since the values are stashed column-oriented,
891:        set the roworiented flag to column oriented, and after MatSetValues() 
892:        restore the original flags */
893:     r1 = baij->roworiented;
894:     r2 = a->roworiented;
895:     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
896:     baij->roworiented = PETSC_FALSE;
897:     a->roworiented    = PETSC_FALSE;
898:     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
899:     while (1) {
900:       MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
901:       if (!flg) break;
902: 
903:       for (i=0; i<n;) {
904:         /* Now identify the consecutive vals belonging to the same row */
905:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
906:         if (j < n) ncols = j-i;
907:         else       ncols = n-i;
908:         MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
909:         i = j;
910:       }
911:     }
912:     MatStashScatterEnd_Private(&mat->bstash);
913:     baij->roworiented = r1;
914:     a->roworiented    = r2;
915:     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
916:   }
917: 
918:   MatAssemblyBegin(baij->A,mode);
919:   MatAssemblyEnd(baij->A,mode);

921:   /* determine if any processor has disassembled, if so we must 
922:      also disassemble ourselfs, in order that we may reassemble. */
923:   /*
924:      if nonzero structure of submatrix B cannot change then we know that
925:      no processor disassembled thus we can skip this stuff
926:   */
927:   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
928:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);
929:     if (mat->was_assembled && !other_disassembled) {
930:       DisAssemble_MPIBAIJ(mat);
931:     }
932:   }

934:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
935:     MatSetUpMultiply_MPIBAIJ(mat);
936:   }
937:   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
938:   MatAssemblyBegin(baij->B,mode);
939:   MatAssemblyEnd(baij->B,mode);
940: 
941: #if defined(PETSC_USE_INFO)
942:   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
943:     PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
944:     baij->ht_total_ct  = 0;
945:     baij->ht_insert_ct = 0;
946:   }
947: #endif
948:   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
949:     MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);
950:     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
951:     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
952:   }

954:   PetscFree(baij->rowvalues);
955:   baij->rowvalues = 0;
956:   return(0);
957: }

961: static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
962: {
963:   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
964:   PetscErrorCode    ierr;
965:   PetscMPIInt       size = baij->size,rank = baij->rank;
966:   PetscInt          bs = mat->rmap->bs;
967:   PetscTruth        iascii,isdraw;
968:   PetscViewer       sviewer;
969:   PetscViewerFormat format;

972:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
973:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
974:   if (iascii) {
975:     PetscViewerGetFormat(viewer,&format);
976:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
977:       MatInfo info;
978:       MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
979:       MatGetInfo(mat,MAT_LOCAL,&info);
980:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
981:               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
982:               mat->rmap->bs,(PetscInt)info.memory);
983:       MatGetInfo(baij->A,MAT_LOCAL,&info);
984:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);
985:       MatGetInfo(baij->B,MAT_LOCAL,&info);
986:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);
987:       PetscViewerFlush(viewer);
988:       PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
989:       VecScatterView(baij->Mvctx,viewer);
990:       return(0);
991:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
992:       PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
993:       return(0);
994:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
995:       return(0);
996:     }
997:   }

999:   if (isdraw) {
1000:     PetscDraw       draw;
1001:     PetscTruth isnull;
1002:     PetscViewerDrawGetDraw(viewer,0,&draw);
1003:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1004:   }

1006:   if (size == 1) {
1007:     PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);
1008:     MatView(baij->A,viewer);
1009:   } else {
1010:     /* assemble the entire matrix onto first processor. */
1011:     Mat         A;
1012:     Mat_SeqBAIJ *Aloc;
1013:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1014:     MatScalar   *a;

1016:     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1017:     /* Perhaps this should be the type of mat? */
1018:     MatCreate(((PetscObject)mat)->comm,&A);
1019:     if (!rank) {
1020:       MatSetSizes(A,M,N,M,N);
1021:     } else {
1022:       MatSetSizes(A,0,0,M,N);
1023:     }
1024:     MatSetType(A,MATMPIBAIJ);
1025:     MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);
1026:     PetscLogObjectParent(mat,A);

1028:     /* copy over the A part */
1029:     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1030:     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1031:     PetscMalloc(bs*sizeof(PetscInt),&rvals);

1033:     for (i=0; i<mbs; i++) {
1034:       rvals[0] = bs*(baij->rstartbs + i);
1035:       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1036:       for (j=ai[i]; j<ai[i+1]; j++) {
1037:         col = (baij->cstartbs+aj[j])*bs;
1038:         for (k=0; k<bs; k++) {
1039:           MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1040:           col++; a += bs;
1041:         }
1042:       }
1043:     }
1044:     /* copy over the B part */
1045:     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1046:     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1047:     for (i=0; i<mbs; i++) {
1048:       rvals[0] = bs*(baij->rstartbs + i);
1049:       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1050:       for (j=ai[i]; j<ai[i+1]; j++) {
1051:         col = baij->garray[aj[j]]*bs;
1052:         for (k=0; k<bs; k++) {
1053:           MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1054:           col++; a += bs;
1055:         }
1056:       }
1057:     }
1058:     PetscFree(rvals);
1059:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1060:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1061:     /* 
1062:        Everyone has to call to draw the matrix since the graphics waits are
1063:        synchronized across all processors that share the PetscDraw object
1064:     */
1065:     PetscViewerGetSingleton(viewer,&sviewer);
1066:     if (!rank) {
1067:       PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);
1068:       MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);
1069:     }
1070:     PetscViewerRestoreSingleton(viewer,&sviewer);
1071:     MatDestroy(A);
1072:   }
1073:   return(0);
1074: }

1078: PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1079: {
1081:   PetscTruth     iascii,isdraw,issocket,isbinary;

1084:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1085:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1086:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1087:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1088:   if (iascii || isdraw || issocket || isbinary) {
1089:     MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);
1090:   } else {
1091:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1092:   }
1093:   return(0);
1094: }

1098: PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1099: {
1100:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;

1104: #if defined(PETSC_USE_LOG)
1105:   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1106: #endif
1107:   MatStashDestroy_Private(&mat->stash);
1108:   MatStashDestroy_Private(&mat->bstash);
1109:   MatDestroy(baij->A);
1110:   MatDestroy(baij->B);
1111: #if defined (PETSC_USE_CTABLE)
1112:   if (baij->colmap) {PetscTableDestroy(baij->colmap);}
1113: #else
1114:   PetscFree(baij->colmap);
1115: #endif
1116:   PetscFree(baij->garray);
1117:   if (baij->lvec)   {VecDestroy(baij->lvec);}
1118:   if (baij->Mvctx)  {VecScatterDestroy(baij->Mvctx);}
1119:   PetscFree(baij->rowvalues);
1120:   PetscFree(baij->barray);
1121:   PetscFree(baij->hd);
1122:   PetscFree(baij->rangebs);
1123:   PetscFree(baij);

1125:   PetscObjectChangeTypeName((PetscObject)mat,0);
1126:   PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
1127:   PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
1128:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
1129:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);
1130:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);
1131:   PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
1132:   PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);
1133:   return(0);
1134: }

1138: PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1139: {
1140:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1142:   PetscInt       nt;

1145:   VecGetLocalSize(xx,&nt);
1146:   if (nt != A->cmap->n) {
1147:     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1148:   }
1149:   VecGetLocalSize(yy,&nt);
1150:   if (nt != A->rmap->n) {
1151:     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1152:   }
1153:   VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1154:   (*a->A->ops->mult)(a->A,xx,yy);
1155:   VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1156:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1157:   return(0);
1158: }

1162: PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1163: {
1164:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;

1168:   VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1169:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
1170:   VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1171:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1172:   return(0);
1173: }

1177: PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1178: {
1179:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1181:   PetscTruth     merged;

1184:   VecScatterGetMerged(a->Mvctx,&merged);
1185:   /* do nondiagonal part */
1186:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1187:   if (!merged) {
1188:     /* send it on its way */
1189:     VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1190:     /* do local part */
1191:     (*a->A->ops->multtranspose)(a->A,xx,yy);
1192:     /* receive remote parts: note this assumes the values are not actually */
1193:     /* inserted in yy until the next line */
1194:     VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1195:   } else {
1196:     /* do local part */
1197:     (*a->A->ops->multtranspose)(a->A,xx,yy);
1198:     /* send it on its way */
1199:     VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1200:     /* values actually were received in the Begin() but we need to call this nop */
1201:     VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1202:   }
1203:   return(0);
1204: }

1208: PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1209: {
1210:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;

1214:   /* do nondiagonal part */
1215:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1216:   /* send it on its way */
1217:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1218:   /* do local part */
1219:   (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1220:   /* receive remote parts: note this assumes the values are not actually */
1221:   /* inserted in yy until the next line, which is true for my implementation*/
1222:   /* but is not perhaps always true. */
1223:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1224:   return(0);
1225: }

1227: /*
1228:   This only works correctly for square matrices where the subblock A->A is the 
1229:    diagonal block
1230: */
1233: PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1234: {
1235:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;

1239:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1240:   MatGetDiagonal(a->A,v);
1241:   return(0);
1242: }

1246: PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1247: {
1248:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;

1252:   MatScale(a->A,aa);
1253:   MatScale(a->B,aa);
1254:   return(0);
1255: }

1259: PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1260: {
1261:   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1262:   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264:   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1265:   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1266:   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;

1269:   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1270:   mat->getrowactive = PETSC_TRUE;

1272:   if (!mat->rowvalues && (idx || v)) {
1273:     /*
1274:         allocate enough space to hold information from the longest row.
1275:     */
1276:     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1277:     PetscInt     max = 1,mbs = mat->mbs,tmp;
1278:     for (i=0; i<mbs; i++) {
1279:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1280:       if (max < tmp) { max = tmp; }
1281:     }
1282:     PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);
1283:     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1284:   }
1285: 
1286:   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1287:   lrow = row - brstart;

1289:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1290:   if (!v)   {pvA = 0; pvB = 0;}
1291:   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1292:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1293:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1294:   nztot = nzA + nzB;

1296:   cmap  = mat->garray;
1297:   if (v  || idx) {
1298:     if (nztot) {
1299:       /* Sort by increasing column numbers, assuming A and B already sorted */
1300:       PetscInt imark = -1;
1301:       if (v) {
1302:         *v = v_p = mat->rowvalues;
1303:         for (i=0; i<nzB; i++) {
1304:           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1305:           else break;
1306:         }
1307:         imark = i;
1308:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1309:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1310:       }
1311:       if (idx) {
1312:         *idx = idx_p = mat->rowindices;
1313:         if (imark > -1) {
1314:           for (i=0; i<imark; i++) {
1315:             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1316:           }
1317:         } else {
1318:           for (i=0; i<nzB; i++) {
1319:             if (cmap[cworkB[i]/bs] < cstart)
1320:               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1321:             else break;
1322:           }
1323:           imark = i;
1324:         }
1325:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1326:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1327:       }
1328:     } else {
1329:       if (idx) *idx = 0;
1330:       if (v)   *v   = 0;
1331:     }
1332:   }
1333:   *nz = nztot;
1334:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1335:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1336:   return(0);
1337: }

1341: PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342: {
1343:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;

1346:   if (!baij->getrowactive) {
1347:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1348:   }
1349:   baij->getrowactive = PETSC_FALSE;
1350:   return(0);
1351: }

1355: PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1356: {
1357:   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;

1361:   MatZeroEntries(l->A);
1362:   MatZeroEntries(l->B);
1363:   return(0);
1364: }

1368: PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369: {
1370:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1371:   Mat            A = a->A,B = a->B;
1373:   PetscReal      isend[5],irecv[5];

1376:   info->block_size     = (PetscReal)matin->rmap->bs;
1377:   MatGetInfo(A,MAT_LOCAL,info);
1378:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379:   isend[3] = info->memory;  isend[4] = info->mallocs;
1380:   MatGetInfo(B,MAT_LOCAL,info);
1381:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382:   isend[3] += info->memory;  isend[4] += info->mallocs;
1383:   if (flag == MAT_LOCAL) {
1384:     info->nz_used      = isend[0];
1385:     info->nz_allocated = isend[1];
1386:     info->nz_unneeded  = isend[2];
1387:     info->memory       = isend[3];
1388:     info->mallocs      = isend[4];
1389:   } else if (flag == MAT_GLOBAL_MAX) {
1390:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);
1391:     info->nz_used      = irecv[0];
1392:     info->nz_allocated = irecv[1];
1393:     info->nz_unneeded  = irecv[2];
1394:     info->memory       = irecv[3];
1395:     info->mallocs      = irecv[4];
1396:   } else if (flag == MAT_GLOBAL_SUM) {
1397:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);
1398:     info->nz_used      = irecv[0];
1399:     info->nz_allocated = irecv[1];
1400:     info->nz_unneeded  = irecv[2];
1401:     info->memory       = irecv[3];
1402:     info->mallocs      = irecv[4];
1403:   } else {
1404:     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1405:   }
1406:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1407:   info->fill_ratio_needed = 0;
1408:   info->factor_mallocs    = 0;
1409:   return(0);
1410: }

1414: PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1415: {
1416:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;

1420:   switch (op) {
1421:   case MAT_NEW_NONZERO_LOCATIONS:
1422:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1423:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1424:   case MAT_KEEP_ZEROED_ROWS:
1425:   case MAT_NEW_NONZERO_LOCATION_ERR:
1426:     MatSetOption(a->A,op,flg);
1427:     MatSetOption(a->B,op,flg);
1428:     break;
1429:   case MAT_ROW_ORIENTED:
1430:     a->roworiented = flg;
1431:     MatSetOption(a->A,op,flg);
1432:     MatSetOption(a->B,op,flg);
1433:     break;
1434:   case MAT_NEW_DIAGONALS:
1435:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1436:     break;
1437:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1438:     a->donotstash = flg;
1439:     break;
1440:   case MAT_USE_HASH_TABLE:
1441:     a->ht_flag = flg;
1442:     break;
1443:   case MAT_SYMMETRIC:
1444:   case MAT_STRUCTURALLY_SYMMETRIC:
1445:   case MAT_HERMITIAN:
1446:   case MAT_SYMMETRY_ETERNAL:
1447:     MatSetOption(a->A,op,flg);
1448:     break;
1449:   default:
1450:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1451:   }
1452:   return(0);
1453: }

1457: PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1458: {
1459:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1460:   Mat_SeqBAIJ    *Aloc;
1461:   Mat            B;
1463:   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1464:   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1465:   MatScalar      *a;
1466: 
1468:   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1469:   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1470:     MatCreate(((PetscObject)A)->comm,&B);
1471:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
1472:     MatSetType(B,((PetscObject)A)->type_name);
1473:     MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);
1474:   } else {
1475:     B = *matout;
1476:   }

1478:   /* copy over the A part */
1479:   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1480:   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1481:   PetscMalloc(bs*sizeof(PetscInt),&rvals);
1482: 
1483:   for (i=0; i<mbs; i++) {
1484:     rvals[0] = bs*(baij->rstartbs + i);
1485:     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1486:     for (j=ai[i]; j<ai[i+1]; j++) {
1487:       col = (baij->cstartbs+aj[j])*bs;
1488:       for (k=0; k<bs; k++) {
1489:         MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1490:         col++; a += bs;
1491:       }
1492:     }
1493:   }
1494:   /* copy over the B part */
1495:   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1496:   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1497:   for (i=0; i<mbs; i++) {
1498:     rvals[0] = bs*(baij->rstartbs + i);
1499:     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1500:     for (j=ai[i]; j<ai[i+1]; j++) {
1501:       col = baij->garray[aj[j]]*bs;
1502:       for (k=0; k<bs; k++) {
1503:         MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1504:         col++; a += bs;
1505:       }
1506:     }
1507:   }
1508:   PetscFree(rvals);
1509:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1510:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1511: 
1512:   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1513:     *matout = B;
1514:   } else {
1515:     MatHeaderCopy(A,B);
1516:   }
1517:   return(0);
1518: }

1522: PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1523: {
1524:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1525:   Mat            a = baij->A,b = baij->B;
1527:   PetscInt       s1,s2,s3;

1530:   MatGetLocalSize(mat,&s2,&s3);
1531:   if (rr) {
1532:     VecGetLocalSize(rr,&s1);
1533:     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1534:     /* Overlap communication with computation. */
1535:     VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1536:   }
1537:   if (ll) {
1538:     VecGetLocalSize(ll,&s1);
1539:     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1540:     (*b->ops->diagonalscale)(b,ll,PETSC_NULL);
1541:   }
1542:   /* scale  the diagonal block */
1543:   (*a->ops->diagonalscale)(a,ll,rr);

1545:   if (rr) {
1546:     /* Do a scatter end and then right scale the off-diagonal block */
1547:     VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1548:     (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);
1549:   }
1550: 
1551:   return(0);
1552: }

1556: PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1557: {
1558:   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1560:   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1561:   PetscInt       i,*owners = A->rmap->range;
1562:   PetscInt       *nprocs,j,idx,nsends,row;
1563:   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1564:   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1565:   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1566:   MPI_Comm       comm = ((PetscObject)A)->comm;
1567:   MPI_Request    *send_waits,*recv_waits;
1568:   MPI_Status     recv_status,*send_status;
1569: #if defined(PETSC_DEBUG)
1570:   PetscTruth     found = PETSC_FALSE;
1571: #endif
1572: 
1574:   /*  first count number of contributors to each processor */
1575:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
1576:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
1577:   PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
1578:   j = 0;
1579:   for (i=0; i<N; i++) {
1580:     if (lastidx > (idx = rows[i])) j = 0;
1581:     lastidx = idx;
1582:     for (; j<size; j++) {
1583:       if (idx >= owners[j] && idx < owners[j+1]) {
1584:         nprocs[2*j]++;
1585:         nprocs[2*j+1] = 1;
1586:         owner[i] = j;
1587: #if defined(PETSC_DEBUG)
1588:         found = PETSC_TRUE;
1589: #endif
1590:         break;
1591:       }
1592:     }
1593: #if defined(PETSC_DEBUG)
1594:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1595:     found = PETSC_FALSE;
1596: #endif
1597:   }
1598:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1599: 
1600:   /* inform other processors of number of messages and max length*/
1601:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1602: 
1603:   /* post receives:   */
1604:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
1605:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1606:   for (i=0; i<nrecvs; i++) {
1607:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1608:   }
1609: 
1610:   /* do sends:
1611:      1) starts[i] gives the starting index in svalues for stuff going to 
1612:      the ith processor
1613:   */
1614:   PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
1615:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1616:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);
1617:   starts[0]  = 0;
1618:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1619:   for (i=0; i<N; i++) {
1620:     svalues[starts[owner[i]]++] = rows[i];
1621:   }
1622: 
1623:   starts[0] = 0;
1624:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1625:   count = 0;
1626:   for (i=0; i<size; i++) {
1627:     if (nprocs[2*i+1]) {
1628:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
1629:     }
1630:   }
1631:   PetscFree(starts);

1633:   base = owners[rank];
1634: 
1635:   /*  wait on receives */
1636:   PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
1637:   source = lens + nrecvs;
1638:   count  = nrecvs; slen = 0;
1639:   while (count) {
1640:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1641:     /* unpack receives into our local space */
1642:     MPI_Get_count(&recv_status,MPIU_INT,&n);
1643:     source[imdex]  = recv_status.MPI_SOURCE;
1644:     lens[imdex]    = n;
1645:     slen          += n;
1646:     count--;
1647:   }
1648:   PetscFree(recv_waits);
1649: 
1650:   /* move the data into the send scatter */
1651:   PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
1652:   count = 0;
1653:   for (i=0; i<nrecvs; i++) {
1654:     values = rvalues + i*nmax;
1655:     for (j=0; j<lens[i]; j++) {
1656:       lrows[count++] = values[j] - base;
1657:     }
1658:   }
1659:   PetscFree(rvalues);
1660:   PetscFree(lens);
1661:   PetscFree(owner);
1662:   PetscFree(nprocs);
1663: 
1664:   /* actually zap the local rows */
1665:   /*
1666:         Zero the required rows. If the "diagonal block" of the matrix
1667:      is square and the user wishes to set the diagonal we use separate
1668:      code so that MatSetValues() is not called for each diagonal allocating
1669:      new memory, thus calling lots of mallocs and slowing things down.

1671:        Contributed by: Matthew Knepley
1672:   */
1673:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1674:   MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);
1675:   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1676:     MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);
1677:   } else if (diag != 0.0) {
1678:     MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);
1679:     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1680:       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1681: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1682:     }
1683:     for (i=0; i<slen; i++) {
1684:       row  = lrows[i] + rstart_bs;
1685:       MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
1686:     }
1687:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1688:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1689:   } else {
1690:     MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);
1691:   }

1693:   PetscFree(lrows);

1695:   /* wait on sends */
1696:   if (nsends) {
1697:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1698:     MPI_Waitall(nsends,send_waits,send_status);
1699:     PetscFree(send_status);
1700:   }
1701:   PetscFree(send_waits);
1702:   PetscFree(svalues);

1704:   return(0);
1705: }

1709: PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1710: {
1711:   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;

1715:   MatSetUnfactored(a->A);
1716:   return(0);
1717: }

1719: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);

1723: PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1724: {
1725:   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1726:   Mat            a,b,c,d;
1727:   PetscTruth     flg;

1731:   a = matA->A; b = matA->B;
1732:   c = matB->A; d = matB->B;

1734:   MatEqual(a,c,&flg);
1735:   if (flg) {
1736:     MatEqual(b,d,&flg);
1737:   }
1738:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
1739:   return(0);
1740: }

1744: PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1745: {
1747:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1748:   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;

1751:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1752:   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1753:     MatCopy_Basic(A,B,str);
1754:   } else {
1755:     MatCopy(a->A,b->A,str);
1756:     MatCopy(a->B,b->B,str);
1757:   }
1758:   return(0);
1759: }

1763: PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1764: {

1768:    MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1769:   return(0);
1770: }

1772:  #include petscblaslapack.h
1775: PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1776: {
1778:   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1779:   PetscBLASInt   bnz,one=1;
1780:   Mat_SeqBAIJ    *x,*y;

1783:   if (str == SAME_NONZERO_PATTERN) {
1784:     PetscScalar alpha = a;
1785:     x = (Mat_SeqBAIJ *)xx->A->data;
1786:     y = (Mat_SeqBAIJ *)yy->A->data;
1787:     bnz = PetscBLASIntCast(x->nz);
1788:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1789:     x = (Mat_SeqBAIJ *)xx->B->data;
1790:     y = (Mat_SeqBAIJ *)yy->B->data;
1791:     bnz = PetscBLASIntCast(x->nz);
1792:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1793:   } else {
1794:     MatAXPY_Basic(Y,a,X,str);
1795:   }
1796:   return(0);
1797: }

1801: PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1802: {
1803:   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;

1807:   MatRealPart(a->A);
1808:   MatRealPart(a->B);
1809:   return(0);
1810: }

1814: PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1815: {
1816:   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;

1820:   MatImaginaryPart(a->A);
1821:   MatImaginaryPart(a->B);
1822:   return(0);
1823: }

1825: /* -------------------------------------------------------------------*/
1826: static struct _MatOps MatOps_Values = {
1827:        MatSetValues_MPIBAIJ,
1828:        MatGetRow_MPIBAIJ,
1829:        MatRestoreRow_MPIBAIJ,
1830:        MatMult_MPIBAIJ,
1831: /* 4*/ MatMultAdd_MPIBAIJ,
1832:        MatMultTranspose_MPIBAIJ,
1833:        MatMultTransposeAdd_MPIBAIJ,
1834:        0,
1835:        0,
1836:        0,
1837: /*10*/ 0,
1838:        0,
1839:        0,
1840:        0,
1841:        MatTranspose_MPIBAIJ,
1842: /*15*/ MatGetInfo_MPIBAIJ,
1843:        MatEqual_MPIBAIJ,
1844:        MatGetDiagonal_MPIBAIJ,
1845:        MatDiagonalScale_MPIBAIJ,
1846:        MatNorm_MPIBAIJ,
1847: /*20*/ MatAssemblyBegin_MPIBAIJ,
1848:        MatAssemblyEnd_MPIBAIJ,
1849:        0,
1850:        MatSetOption_MPIBAIJ,
1851:        MatZeroEntries_MPIBAIJ,
1852: /*25*/ MatZeroRows_MPIBAIJ,
1853:        0,
1854:        0,
1855:        0,
1856:        0,
1857: /*30*/ MatSetUpPreallocation_MPIBAIJ,
1858:        0,
1859:        0,
1860:        0,
1861:        0,
1862: /*35*/ MatDuplicate_MPIBAIJ,
1863:        0,
1864:        0,
1865:        0,
1866:        0,
1867: /*40*/ MatAXPY_MPIBAIJ,
1868:        MatGetSubMatrices_MPIBAIJ,
1869:        MatIncreaseOverlap_MPIBAIJ,
1870:        MatGetValues_MPIBAIJ,
1871:        MatCopy_MPIBAIJ,
1872: /*45*/ 0,
1873:        MatScale_MPIBAIJ,
1874:        0,
1875:        0,
1876:        0,
1877: /*50*/ 0,
1878:        0,
1879:        0,
1880:        0,
1881:        0,
1882: /*55*/ 0,
1883:        0,
1884:        MatSetUnfactored_MPIBAIJ,
1885:        0,
1886:        MatSetValuesBlocked_MPIBAIJ,
1887: /*60*/ 0,
1888:        MatDestroy_MPIBAIJ,
1889:        MatView_MPIBAIJ,
1890:        0,
1891:        0,
1892: /*65*/ 0,
1893:        0,
1894:        0,
1895:        0,
1896:        0,
1897: /*70*/ MatGetRowMaxAbs_MPIBAIJ,
1898:        0,
1899:        0,
1900:        0,
1901:        0,
1902: /*75*/ 0,
1903:        0,
1904:        0,
1905:        0,
1906:        0,
1907: /*80*/ 0,
1908:        0,
1909:        0,
1910:        0,
1911:        MatLoad_MPIBAIJ,
1912: /*85*/ 0,
1913:        0,
1914:        0,
1915:        0,
1916:        0,
1917: /*90*/ 0,
1918:        0,
1919:        0,
1920:        0,
1921:        0,
1922: /*95*/ 0,
1923:        0,
1924:        0,
1925:        0,
1926:        0,
1927: /*100*/0,
1928:        0,
1929:        0,
1930:        0,
1931:        0,
1932: /*105*/0,
1933:        MatRealPart_MPIBAIJ,
1934:        MatImaginaryPart_MPIBAIJ};


1940: PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1941: {
1943:   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1944:   *iscopy = PETSC_FALSE;
1945:   return(0);
1946: }


1956: PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1957: {
1958:   PetscInt       m,rstart,cstart,cend;
1959:   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1960:   const PetscInt *JJ=0;
1961:   PetscScalar    *values=0;


1966:   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1967:   PetscMapSetBlockSize(B->rmap,bs);
1968:   PetscMapSetBlockSize(B->cmap,bs);
1969:   PetscMapSetUp(B->rmap);
1970:   PetscMapSetUp(B->cmap);
1971:   m      = B->rmap->n/bs;
1972:   rstart = B->rmap->rstart/bs;
1973:   cstart = B->cmap->rstart/bs;
1974:   cend   = B->cmap->rend/bs;

1976:   if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1977:   PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);
1978:   o_nnz = d_nnz + m;
1979:   for (i=0; i<m; i++) {
1980:     nz = ii[i+1] - ii[i];
1981:     if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1982:     nz_max = PetscMax(nz_max,nz);
1983:     JJ  = jj + ii[i];
1984:     for (j=0; j<nz; j++) {
1985:       if (*JJ >= cstart) break;
1986:       JJ++;
1987:     }
1988:     d = 0;
1989:     for (; j<nz; j++) {
1990:       if (*JJ++ >= cend) break;
1991:       d++;
1992:     }
1993:     d_nnz[i] = d;
1994:     o_nnz[i] = nz - d;
1995:   }
1996:   MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
1997:   PetscFree(d_nnz);

1999:   values = (PetscScalar*)V;
2000:   if (!values) {
2001:     PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);
2002:     PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
2003:   }
2004:   for (i=0; i<m; i++) {
2005:     PetscInt          row    = i + rstart;
2006:     PetscInt          ncols  = ii[i+1] - ii[i];
2007:     const PetscInt    *icols = jj + ii[i];
2008:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2009:     MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);
2010:   }

2012:   if (!V) { PetscFree(values); }
2013:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2014:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2016:   return(0);
2017: }

2022: /*@C
2023:    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2024:    (the default parallel PETSc format).  

2026:    Collective on MPI_Comm

2028:    Input Parameters:
2029: +  A - the matrix 
2030: .  i - the indices into j for the start of each local row (starts with zero)
2031: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2032: -  v - optional values in the matrix

2034:    Level: developer

2036: .keywords: matrix, aij, compressed row, sparse, parallel

2038: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2039: @*/
2040: PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2041: {
2042:   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);

2045:   PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);
2046:   if (f) {
2047:     (*f)(B,bs,i,j,v);
2048:   }
2049:   return(0);
2050: }

2055: PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2056: {
2057:   Mat_MPIBAIJ    *b;
2059:   PetscInt       i, newbs = PetscAbs(bs);

2062:   B->preallocated = PETSC_TRUE;
2063:   if (bs < 0) {
2064:     PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");
2065:       PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);
2066:     PetscOptionsEnd();
2067:     bs   = PetscAbs(bs);
2068:   }
2069:   if ((d_nnz || o_nnz) && newbs != bs) {
2070:     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2071:   }
2072:   bs = newbs;


2075:   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2076:   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2077:   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2078:   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2079:   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2080: 
2081:   PetscMapSetBlockSize(B->rmap,bs);
2082:   PetscMapSetBlockSize(B->cmap,bs);
2083:   PetscMapSetUp(B->rmap);
2084:   PetscMapSetUp(B->cmap);

2086:   if (d_nnz) {
2087:     for (i=0; i<B->rmap->n/bs; i++) {
2088:       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2089:     }
2090:   }
2091:   if (o_nnz) {
2092:     for (i=0; i<B->rmap->n/bs; i++) {
2093:       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2094:     }
2095:   }

2097:   b = (Mat_MPIBAIJ*)B->data;
2098:   b->bs2 = bs*bs;
2099:   b->mbs = B->rmap->n/bs;
2100:   b->nbs = B->cmap->n/bs;
2101:   b->Mbs = B->rmap->N/bs;
2102:   b->Nbs = B->cmap->N/bs;

2104:   for (i=0; i<=b->size; i++) {
2105:     b->rangebs[i] = B->rmap->range[i]/bs;
2106:   }
2107:   b->rstartbs = B->rmap->rstart/bs;
2108:   b->rendbs   = B->rmap->rend/bs;
2109:   b->cstartbs = B->cmap->rstart/bs;
2110:   b->cendbs   = B->cmap->rend/bs;

2112:   MatCreate(PETSC_COMM_SELF,&b->A);
2113:   MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
2114:   MatSetType(b->A,MATSEQBAIJ);
2115:   MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
2116:   PetscLogObjectParent(B,b->A);
2117:   MatCreate(PETSC_COMM_SELF,&b->B);
2118:   MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
2119:   MatSetType(b->B,MATSEQBAIJ);
2120:   MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
2121:   PetscLogObjectParent(B,b->B);

2123:   MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);

2125:   return(0);
2126: }

2130: EXTERN PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2131: EXTERN PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);

2134: /*MC
2135:    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.

2137:    Options Database Keys:
2138: + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2139: . -mat_block_size <bs> - set the blocksize used to store the matrix
2140: - -mat_use_hash_table <fact>

2142:   Level: beginner

2144: .seealso: MatCreateMPIBAIJ
2145: M*/

2150: PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
2151: {
2152:   Mat_MPIBAIJ    *b;
2154:   PetscTruth     flg;

2157:   PetscNewLog(B,Mat_MPIBAIJ,&b);
2158:   B->data = (void*)b;


2161:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2162:   B->mapping    = 0;
2163:   B->assembled  = PETSC_FALSE;

2165:   B->insertmode = NOT_SET_VALUES;
2166:   MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);
2167:   MPI_Comm_size(((PetscObject)B)->comm,&b->size);

2169:   /* build local table of row and column ownerships */
2170:   PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);

2172:   /* build cache for off array entries formed */
2173:   MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);
2174:   b->donotstash  = PETSC_FALSE;
2175:   b->colmap      = PETSC_NULL;
2176:   b->garray      = PETSC_NULL;
2177:   b->roworiented = PETSC_TRUE;

2179:   /* stuff used in block assembly */
2180:   b->barray       = 0;

2182:   /* stuff used for matrix vector multiply */
2183:   b->lvec         = 0;
2184:   b->Mvctx        = 0;

2186:   /* stuff for MatGetRow() */
2187:   b->rowindices   = 0;
2188:   b->rowvalues    = 0;
2189:   b->getrowactive = PETSC_FALSE;

2191:   /* hash table stuff */
2192:   b->ht           = 0;
2193:   b->hd           = 0;
2194:   b->ht_size      = 0;
2195:   b->ht_flag      = PETSC_FALSE;
2196:   b->ht_fact      = 0;
2197:   b->ht_total_ct  = 0;
2198:   b->ht_insert_ct = 0;

2200:   PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");
2201:     PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);
2202:     if (flg) {
2203:       PetscReal fact = 1.39;
2204:       MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
2205:       PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);
2206:       if (fact <= 1.0) fact = 1.39;
2207:       MatMPIBAIJSetHashTableFactor(B,fact);
2208:       PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);
2209:     }
2210:   PetscOptionsEnd();

2212:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2213:                                      "MatStoreValues_MPIBAIJ",
2214:                                      MatStoreValues_MPIBAIJ);
2215:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2216:                                      "MatRetrieveValues_MPIBAIJ",
2217:                                      MatRetrieveValues_MPIBAIJ);
2218:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2219:                                      "MatGetDiagonalBlock_MPIBAIJ",
2220:                                      MatGetDiagonalBlock_MPIBAIJ);
2221:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2222:                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2223:                                      MatMPIBAIJSetPreallocation_MPIBAIJ);
2224:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2225:                                      "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2226:                                      MatMPIBAIJSetPreallocationCSR_MPIBAIJ);
2227:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2228:                                      "MatDiagonalScaleLocal_MPIBAIJ",
2229:                                      MatDiagonalScaleLocal_MPIBAIJ);
2230:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2231:                                      "MatSetHashTableFactor_MPIBAIJ",
2232:                                      MatSetHashTableFactor_MPIBAIJ);
2233:   PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);
2234:   return(0);
2235: }

2238: /*MC
2239:    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.

2241:    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2242:    and MATMPIBAIJ otherwise.

2244:    Options Database Keys:
2245: . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()

2247:   Level: beginner

2249: .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2250: M*/

2255: PetscErrorCode  MatCreate_BAIJ(Mat A)
2256: {
2258:   PetscMPIInt    size;

2261:   MPI_Comm_size(((PetscObject)A)->comm,&size);
2262:   if (size == 1) {
2263:     MatSetType(A,MATSEQBAIJ);
2264:   } else {
2265:     MatSetType(A,MATMPIBAIJ);
2266:   }
2267:   return(0);
2268: }

2273: /*@C
2274:    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2275:    (block compressed row).  For good matrix assembly performance
2276:    the user should preallocate the matrix storage by setting the parameters 
2277:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2278:    performance can be increased by more than a factor of 50.

2280:    Collective on Mat

2282:    Input Parameters:
2283: +  A - the matrix 
2284: .  bs   - size of blockk
2285: .  d_nz  - number of block nonzeros per block row in diagonal portion of local 
2286:            submatrix  (same for all local rows)
2287: .  d_nnz - array containing the number of block nonzeros in the various block rows 
2288:            of the in diagonal portion of the local (possibly different for each block
2289:            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2290: .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2291:            submatrix (same for all local rows).
2292: -  o_nnz - array containing the number of nonzeros in the various block rows of the
2293:            off-diagonal portion of the local submatrix (possibly different for
2294:            each block row) or PETSC_NULL.

2296:    If the *_nnz parameter is given then the *_nz parameter is ignored

2298:    Options Database Keys:
2299: +   -mat_block_size - size of the blocks to use
2300: -   -mat_use_hash_table <fact>

2302:    Notes:
2303:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2304:    than it must be used on all processors that share the object for that argument.

2306:    Storage Information:
2307:    For a square global matrix we define each processor's diagonal portion 
2308:    to be its local rows and the corresponding columns (a square submatrix);  
2309:    each processor's off-diagonal portion encompasses the remainder of the
2310:    local matrix (a rectangular submatrix). 

2312:    The user can specify preallocated storage for the diagonal part of
2313:    the local submatrix with either d_nz or d_nnz (not both).  Set 
2314:    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2315:    memory allocation.  Likewise, specify preallocated storage for the
2316:    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).

2318:    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2319:    the figure below we depict these three local rows and all columns (0-11).

2321: .vb
2322:            0 1 2 3 4 5 6 7 8 9 10 11
2323:           -------------------
2324:    row 3  |  o o o d d d o o o o o o
2325:    row 4  |  o o o d d d o o o o o o
2326:    row 5  |  o o o d d d o o o o o o
2327:           -------------------
2328: .ve
2329:   
2330:    Thus, any entries in the d locations are stored in the d (diagonal) 
2331:    submatrix, and any entries in the o locations are stored in the
2332:    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2333:    stored simply in the MATSEQBAIJ format for compressed row storage.

2335:    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2336:    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2337:    In general, for PDE problems in which most nonzeros are near the diagonal,
2338:    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2339:    or you will get TERRIBLE performance; see the users' manual chapter on
2340:    matrices.

2342:    You can call MatGetInfo() to get information on how effective the preallocation was;
2343:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2344:    You can also run with the option -info and look for messages with the string 
2345:    malloc in them to see if additional memory allocation was needed.

2347:    Level: intermediate

2349: .keywords: matrix, block, aij, compressed row, sparse, parallel

2351: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2352: @*/
2353: PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2354: {
2355:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);

2358:   PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);
2359:   if (f) {
2360:     (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
2361:   }
2362:   return(0);
2363: }

2367: /*@C
2368:    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2369:    (block compressed row).  For good matrix assembly performance
2370:    the user should preallocate the matrix storage by setting the parameters 
2371:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2372:    performance can be increased by more than a factor of 50.

2374:    Collective on MPI_Comm

2376:    Input Parameters:
2377: +  comm - MPI communicator
2378: .  bs   - size of blockk
2379: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2380:            This value should be the same as the local size used in creating the 
2381:            y vector for the matrix-vector product y = Ax.
2382: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2383:            This value should be the same as the local size used in creating the 
2384:            x vector for the matrix-vector product y = Ax.
2385: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2386: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2387: .  d_nz  - number of nonzero blocks per block row in diagonal portion of local 
2388:            submatrix  (same for all local rows)
2389: .  d_nnz - array containing the number of nonzero blocks in the various block rows 
2390:            of the in diagonal portion of the local (possibly different for each block
2391:            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2392: .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2393:            submatrix (same for all local rows).
2394: -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2395:            off-diagonal portion of the local submatrix (possibly different for
2396:            each block row) or PETSC_NULL.

2398:    Output Parameter:
2399: .  A - the matrix 

2401:    Options Database Keys:
2402: +   -mat_block_size - size of the blocks to use
2403: -   -mat_use_hash_table <fact>

2405:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2406:    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2407:    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2408:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

2410:    Notes:
2411:    If the *_nnz parameter is given then the *_nz parameter is ignored

2413:    A nonzero block is any block that as 1 or more nonzeros in it

2415:    The user MUST specify either the local or global matrix dimensions
2416:    (possibly both).

2418:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2419:    than it must be used on all processors that share the object for that argument.

2421:    Storage Information:
2422:    For a square global matrix we define each processor's diagonal portion 
2423:    to be its local rows and the corresponding columns (a square submatrix);  
2424:    each processor's off-diagonal portion encompasses the remainder of the
2425:    local matrix (a rectangular submatrix). 

2427:    The user can specify preallocated storage for the diagonal part of
2428:    the local submatrix with either d_nz or d_nnz (not both).  Set 
2429:    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2430:    memory allocation.  Likewise, specify preallocated storage for the
2431:    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).

2433:    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2434:    the figure below we depict these three local rows and all columns (0-11).

2436: .vb
2437:            0 1 2 3 4 5 6 7 8 9 10 11
2438:           -------------------
2439:    row 3  |  o o o d d d o o o o o o
2440:    row 4  |  o o o d d d o o o o o o
2441:    row 5  |  o o o d d d o o o o o o
2442:           -------------------
2443: .ve
2444:   
2445:    Thus, any entries in the d locations are stored in the d (diagonal) 
2446:    submatrix, and any entries in the o locations are stored in the
2447:    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2448:    stored simply in the MATSEQBAIJ format for compressed row storage.

2450:    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2451:    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2452:    In general, for PDE problems in which most nonzeros are near the diagonal,
2453:    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2454:    or you will get TERRIBLE performance; see the users' manual chapter on
2455:    matrices.

2457:    Level: intermediate

2459: .keywords: matrix, block, aij, compressed row, sparse, parallel

2461: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2462: @*/
2463: PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2464: {
2466:   PetscMPIInt    size;

2469:   MatCreate(comm,A);
2470:   MatSetSizes(*A,m,n,M,N);
2471:   MPI_Comm_size(comm,&size);
2472:   if (size > 1) {
2473:     MatSetType(*A,MATMPIBAIJ);
2474:     MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
2475:   } else {
2476:     MatSetType(*A,MATSEQBAIJ);
2477:     MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
2478:   }
2479:   return(0);
2480: }

2484: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2485: {
2486:   Mat            mat;
2487:   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2489:   PetscInt       len=0;

2492:   *newmat       = 0;
2493:   MatCreate(((PetscObject)matin)->comm,&mat);
2494:   MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
2495:   MatSetType(mat,((PetscObject)matin)->type_name);
2496:   PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));

2498:   mat->factor       = matin->factor;
2499:   mat->preallocated = PETSC_TRUE;
2500:   mat->assembled    = PETSC_TRUE;
2501:   mat->insertmode   = NOT_SET_VALUES;

2503:   a      = (Mat_MPIBAIJ*)mat->data;
2504:   mat->rmap->bs  = matin->rmap->bs;
2505:   a->bs2   = oldmat->bs2;
2506:   a->mbs   = oldmat->mbs;
2507:   a->nbs   = oldmat->nbs;
2508:   a->Mbs   = oldmat->Mbs;
2509:   a->Nbs   = oldmat->Nbs;
2510: 
2511:   PetscMapCopy(((PetscObject)matin)->comm,matin->rmap,mat->rmap);
2512:   PetscMapCopy(((PetscObject)matin)->comm,matin->cmap,mat->cmap);

2514:   a->size         = oldmat->size;
2515:   a->rank         = oldmat->rank;
2516:   a->donotstash   = oldmat->donotstash;
2517:   a->roworiented  = oldmat->roworiented;
2518:   a->rowindices   = 0;
2519:   a->rowvalues    = 0;
2520:   a->getrowactive = PETSC_FALSE;
2521:   a->barray       = 0;
2522:   a->rstartbs     = oldmat->rstartbs;
2523:   a->rendbs       = oldmat->rendbs;
2524:   a->cstartbs     = oldmat->cstartbs;
2525:   a->cendbs       = oldmat->cendbs;

2527:   /* hash table stuff */
2528:   a->ht           = 0;
2529:   a->hd           = 0;
2530:   a->ht_size      = 0;
2531:   a->ht_flag      = oldmat->ht_flag;
2532:   a->ht_fact      = oldmat->ht_fact;
2533:   a->ht_total_ct  = 0;
2534:   a->ht_insert_ct = 0;

2536:   PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));
2537:   MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);
2538:   MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);
2539:   if (oldmat->colmap) {
2540: #if defined (PETSC_USE_CTABLE)
2541:   PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2542: #else
2543:   PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);
2544:   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2545:   PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));
2546: #endif
2547:   } else a->colmap = 0;

2549:   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2550:     PetscMalloc(len*sizeof(PetscInt),&a->garray);
2551:     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2552:     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));
2553:   } else a->garray = 0;
2554: 
2555:   VecDuplicate(oldmat->lvec,&a->lvec);
2556:   PetscLogObjectParent(mat,a->lvec);
2557:   VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2558:   PetscLogObjectParent(mat,a->Mvctx);

2560:    MatDuplicate(oldmat->A,cpvalues,&a->A);
2561:   PetscLogObjectParent(mat,a->A);
2562:    MatDuplicate(oldmat->B,cpvalues,&a->B);
2563:   PetscLogObjectParent(mat,a->B);
2564:   PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
2565:   *newmat = mat;

2567:   return(0);
2568: }

2570:  #include petscsys.h

2574: PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2575: {
2576:   Mat            A;
2578:   int            fd;
2579:   PetscInt       i,nz,j,rstart,rend;
2580:   PetscScalar    *vals,*buf;
2581:   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2582:   MPI_Status     status;
2583:   PetscMPIInt    rank,size,maxnz;
2584:   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2585:   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2586:   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2587:   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2588:   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2589:   PetscInt       dcount,kmax,k,nzcount,tmp,mend;

2592:   PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");
2593:     PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2594:   PetscOptionsEnd();

2596:   MPI_Comm_size(comm,&size);
2597:   MPI_Comm_rank(comm,&rank);
2598:   if (!rank) {
2599:     PetscViewerBinaryGetDescriptor(viewer,&fd);
2600:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
2601:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2602:   }

2604:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
2605:   M = header[1]; N = header[2];

2607:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

2609:   /* 
2610:      This code adds extra rows to make sure the number of rows is 
2611:      divisible by the blocksize
2612:   */
2613:   Mbs        = M/bs;
2614:   extra_rows = bs - M + bs*Mbs;
2615:   if (extra_rows == bs) extra_rows = 0;
2616:   else                  Mbs++;
2617:   if (extra_rows && !rank) {
2618:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2619:   }

2621:   /* determine ownership of all rows */
2622:   mbs        = Mbs/size + ((Mbs % size) > rank);
2623:   m          = mbs*bs;
2624:   PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);
2625:   MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);

2627:   /* process 0 needs enough room for process with most rows */
2628:   if (!rank) {
2629:     mmax = rowners[1];
2630:     for (i=2; i<size; i++) {
2631:       mmax = PetscMax(mmax,rowners[i]);
2632:     }
2633:     mmax*=bs;
2634:   } else mmax = m;

2636:   rowners[0] = 0;
2637:   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2638:   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2639:   rstart = rowners[rank];
2640:   rend   = rowners[rank+1];

2642:   /* distribute row lengths to all processors */
2643:   PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);
2644:   if (!rank) {
2645:     mend = m;
2646:     if (size == 1) mend = mend - extra_rows;
2647:     PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);
2648:     for (j=mend; j<m; j++) locrowlens[j] = 1;
2649:     PetscMalloc(m*sizeof(PetscInt),&rowlengths);
2650:     PetscMalloc(size*sizeof(PetscInt),&procsnz);
2651:     PetscMemzero(procsnz,size*sizeof(PetscInt));
2652:     for (j=0; j<m; j++) {
2653:       procsnz[0] += locrowlens[j];
2654:     }
2655:     for (i=1; i<size; i++) {
2656:       mend = browners[i+1] - browners[i];
2657:       if (i == size-1) mend = mend - extra_rows;
2658:       PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);
2659:       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2660:       /* calculate the number of nonzeros on each processor */
2661:       for (j=0; j<browners[i+1]-browners[i]; j++) {
2662:         procsnz[i] += rowlengths[j];
2663:       }
2664:       MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);
2665:     }
2666:     PetscFree(rowlengths);
2667:   } else {
2668:     MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);
2669:   }

2671:   if (!rank) {
2672:     /* determine max buffer needed and allocate it */
2673:     maxnz = procsnz[0];
2674:     for (i=1; i<size; i++) {
2675:       maxnz = PetscMax(maxnz,procsnz[i]);
2676:     }
2677:     PetscMalloc(maxnz*sizeof(PetscInt),&cols);

2679:     /* read in my part of the matrix column indices  */
2680:     nz     = procsnz[0];
2681:     PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);
2682:     mycols = ibuf;
2683:     if (size == 1)  nz -= extra_rows;
2684:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2685:     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }

2687:     /* read in every ones (except the last) and ship off */
2688:     for (i=1; i<size-1; i++) {
2689:       nz   = procsnz[i];
2690:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
2691:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2692:     }
2693:     /* read in the stuff for the last proc */
2694:     if (size != 1) {
2695:       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2696:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
2697:       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2698:       MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
2699:     }
2700:     PetscFree(cols);
2701:   } else {
2702:     /* determine buffer space needed for message */
2703:     nz = 0;
2704:     for (i=0; i<m; i++) {
2705:       nz += locrowlens[i];
2706:     }
2707:     PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);
2708:     mycols = ibuf;
2709:     /* receive message of column indices*/
2710:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2711:     MPI_Get_count(&status,MPIU_INT,&maxnz);
2712:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2713:   }
2714: 
2715:   /* loop over local rows, determining number of off diagonal entries */
2716:   PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);
2717:   PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);
2718:   PetscMemzero(mask,Mbs*sizeof(PetscInt));
2719:   PetscMemzero(masked1,Mbs*sizeof(PetscInt));
2720:   PetscMemzero(masked2,Mbs*sizeof(PetscInt));
2721:   rowcount = 0; nzcount = 0;
2722:   for (i=0; i<mbs; i++) {
2723:     dcount  = 0;
2724:     odcount = 0;
2725:     for (j=0; j<bs; j++) {
2726:       kmax = locrowlens[rowcount];
2727:       for (k=0; k<kmax; k++) {
2728:         tmp = mycols[nzcount++]/bs;
2729:         if (!mask[tmp]) {
2730:           mask[tmp] = 1;
2731:           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2732:           else masked1[dcount++] = tmp;
2733:         }
2734:       }
2735:       rowcount++;
2736:     }
2737: 
2738:     dlens[i]  = dcount;
2739:     odlens[i] = odcount;

2741:     /* zero out the mask elements we set */
2742:     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2743:     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2744:   }

2746:   /* create our matrix */
2747:   MatCreate(comm,&A);
2748:   MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);
2749:   MatSetType(A,type);CHKERRQ(ierr)
2750:   MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);

2752:   if (!rank) {
2753:     PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);
2754:     /* read in my part of the matrix numerical values  */
2755:     nz = procsnz[0];
2756:     vals = buf;
2757:     mycols = ibuf;
2758:     if (size == 1)  nz -= extra_rows;
2759:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2760:     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }

2762:     /* insert into matrix */
2763:     jj      = rstart*bs;
2764:     for (i=0; i<m; i++) {
2765:       MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2766:       mycols += locrowlens[i];
2767:       vals   += locrowlens[i];
2768:       jj++;
2769:     }
2770:     /* read in other processors (except the last one) and ship out */
2771:     for (i=1; i<size-1; i++) {
2772:       nz   = procsnz[i];
2773:       vals = buf;
2774:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2775:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);
2776:     }
2777:     /* the last proc */
2778:     if (size != 1){
2779:       nz   = procsnz[i] - extra_rows;
2780:       vals = buf;
2781:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2782:       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2783:       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);
2784:     }
2785:     PetscFree(procsnz);
2786:   } else {
2787:     /* receive numeric values */
2788:     PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);

2790:     /* receive message of values*/
2791:     vals   = buf;
2792:     mycols = ibuf;
2793:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);
2794:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2795:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

2797:     /* insert into matrix */
2798:     jj      = rstart*bs;
2799:     for (i=0; i<m; i++) {
2800:       MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2801:       mycols += locrowlens[i];
2802:       vals   += locrowlens[i];
2803:       jj++;
2804:     }
2805:   }
2806:   PetscFree(locrowlens);
2807:   PetscFree(buf);
2808:   PetscFree(ibuf);
2809:   PetscFree2(rowners,browners);
2810:   PetscFree2(dlens,odlens);
2811:   PetscFree3(mask,masked1,masked2);
2812:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2813:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

2815:   *newmat = A;
2816:   return(0);
2817: }

2821: /*@
2822:    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.

2824:    Input Parameters:
2825: .  mat  - the matrix
2826: .  fact - factor

2828:    Collective on Mat

2830:    Level: advanced

2832:   Notes:
2833:    This can also be set by the command line option: -mat_use_hash_table <fact>

2835: .keywords: matrix, hashtable, factor, HT

2837: .seealso: MatSetOption()
2838: @*/
2839: PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2840: {
2841:   PetscErrorCode ierr,(*f)(Mat,PetscReal);

2844:   PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);
2845:   if (f) {
2846:     (*f)(mat,fact);
2847:   }
2848:   return(0);
2849: }

2854: PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2855: {
2856:   Mat_MPIBAIJ *baij;

2859:   baij = (Mat_MPIBAIJ*)mat->data;
2860:   baij->ht_fact = fact;
2861:   return(0);
2862: }

2867: PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2868: {
2869:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2871:   *Ad     = a->A;
2872:   *Ao     = a->B;
2873:   *colmap = a->garray;
2874:   return(0);
2875: }

2877: /*
2878:     Special version for direct calls from Fortran (to eliminate two function call overheads 
2879: */
2880: #if defined(PETSC_HAVE_FORTRAN_CAPS)
2881: #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
2882: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
2883: #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
2884: #endif

2888: /*@C
2889:   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()

2891:   Collective on Mat

2893:   Input Parameters:
2894: + mat - the matrix
2895: . min - number of input rows
2896: . im - input rows
2897: . nin - number of input columns
2898: . in - input columns
2899: . v - numerical values input
2900: - addvin - INSERT_VALUES or ADD_VALUES

2902:   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.

2904:   Level: advanced

2906: .seealso:   MatSetValuesBlocked()
2907: @*/
2908: PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
2909: {
2910:   /* convert input arguments to C version */
2911:   Mat             mat = *matin;
2912:   PetscInt        m = *min, n = *nin;
2913:   InsertMode      addv = *addvin;

2915:   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
2916:   const MatScalar *value;
2917:   MatScalar       *barray=baij->barray;
2918:   PetscTruth      roworiented = baij->roworiented;
2919:   PetscErrorCode  ierr;
2920:   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
2921:   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
2922:   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
2923: 
2925:   /* tasks normally handled by MatSetValuesBlocked() */
2926:   if (mat->insertmode == NOT_SET_VALUES) {
2927:     mat->insertmode = addv;
2928:   }
2929: #if defined(PETSC_USE_DEBUG) 
2930:   else if (mat->insertmode != addv) {
2931:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
2932:   }
2933:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2934: #endif
2935:   if (mat->assembled) {
2936:     mat->was_assembled = PETSC_TRUE;
2937:     mat->assembled     = PETSC_FALSE;
2938:   }
2939:   PetscLogEventBegin(MAT_SetValues,mat,0,0,0);


2942:   if(!barray) {
2943:     PetscMalloc(bs2*sizeof(MatScalar),&barray);
2944:     baij->barray = barray;
2945:   }

2947:   if (roworiented) {
2948:     stepval = (n-1)*bs;
2949:   } else {
2950:     stepval = (m-1)*bs;
2951:   }
2952:   for (i=0; i<m; i++) {
2953:     if (im[i] < 0) continue;
2954: #if defined(PETSC_USE_DEBUG)
2955:     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
2956: #endif
2957:     if (im[i] >= rstart && im[i] < rend) {
2958:       row = im[i] - rstart;
2959:       for (j=0; j<n; j++) {
2960:         /* If NumCol = 1 then a copy is not required */
2961:         if ((roworiented) && (n == 1)) {
2962:           barray = (MatScalar*)v + i*bs2;
2963:         } else if((!roworiented) && (m == 1)) {
2964:           barray = (MatScalar*)v + j*bs2;
2965:         } else { /* Here a copy is required */
2966:           if (roworiented) {
2967:             value = v + i*(stepval+bs)*bs + j*bs;
2968:           } else {
2969:             value = v + j*(stepval+bs)*bs + i*bs;
2970:           }
2971:           for (ii=0; ii<bs; ii++,value+=stepval) {
2972:             for (jj=0; jj<bs; jj++) {
2973:               *barray++  = *value++;
2974:             }
2975:           }
2976:           barray -=bs2;
2977:         }
2978: 
2979:         if (in[j] >= cstart && in[j] < cend){
2980:           col  = in[j] - cstart;
2981:           MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);
2982:         }
2983:         else if (in[j] < 0) continue;
2984: #if defined(PETSC_USE_DEBUG)
2985:         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
2986: #endif
2987:         else {
2988:           if (mat->was_assembled) {
2989:             if (!baij->colmap) {
2990:               CreateColmap_MPIBAIJ_Private(mat);
2991:             }

2993: #if defined(PETSC_USE_DEBUG)
2994: #if defined (PETSC_USE_CTABLE)
2995:             { PetscInt data;
2996:               PetscTableFind(baij->colmap,in[j]+1,&data);
2997:               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
2998:             }
2999: #else
3000:             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3001: #endif
3002: #endif
3003: #if defined (PETSC_USE_CTABLE)
3004:             PetscTableFind(baij->colmap,in[j]+1,&col);
3005:             col  = (col - 1)/bs;
3006: #else
3007:             col = (baij->colmap[in[j]] - 1)/bs;
3008: #endif
3009:             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3010:               DisAssemble_MPIBAIJ(mat);
3011:               col =  in[j];
3012:             }
3013:           }
3014:           else col = in[j];
3015:           MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);
3016:         }
3017:       }
3018:     } else {
3019:       if (!baij->donotstash) {
3020:         if (roworiented) {
3021:           MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3022:         } else {
3023:           MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3024:         }
3025:       }
3026:     }
3027:   }
3028: 
3029:   /* task normally handled by MatSetValuesBlocked() */
3030:   PetscLogEventEnd(MAT_SetValues,mat,0,0,0);
3031:   return(0);
3032: }