Actual source code: sbaij2.c

  1: #define PETSCMAT_DLL

 3:  #include ../src/mat/impls/baij/seq/baij.h
 4:  #include ../src/inline/spops.h
 5:  #include ../src/inline/ilu.h
 6:  #include petscbt.h
 7:  #include ../src/mat/impls/sbaij/seq/sbaij.h

 11: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
 12: {
 13:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
 15:   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
 16:   const PetscInt *idx;
 17:   PetscBT        table,table0;

 20:   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
 21:   mbs = a->mbs;
 22:   ai  = a->i;
 23:   aj  = a->j;
 24:   bs  = A->rmap->bs;
 25:   PetscBTCreate(mbs,table);
 26:   PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
 27:   PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);
 28:   PetscBTCreate(mbs,table0);

 30:   for (i=0; i<is_max; i++) { /* for each is */
 31:     isz  = 0;
 32:     PetscBTMemzero(mbs,table);
 33: 
 34:     /* Extract the indices, assume there can be duplicate entries */
 35:     ISGetIndices(is[i],&idx);
 36:     ISGetLocalSize(is[i],&n);

 38:     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
 39:     bcol_max = 0;
 40:     for (j=0; j<n ; ++j){
 41:       brow = idx[j]/bs; /* convert the indices into block indices */
 42:       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
 43:       if(!PetscBTLookupSet(table,brow)) {
 44:         nidx[isz++] = brow;
 45:         if (bcol_max < brow) bcol_max = brow;
 46:       }
 47:     }
 48:     ISRestoreIndices(is[i],&idx);
 49:     ISDestroy(is[i]);
 50: 
 51:     k = 0;
 52:     for (j=0; j<ov; j++){ /* for each overlap */
 53:       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
 54:       PetscBTMemzero(mbs,table0);
 55:       for (l=k; l<isz; l++) { PetscBTSet(table0,nidx[l]); }

 57:       n = isz;  /* length of the updated is[i] */
 58:       for (brow=0; brow<mbs; brow++){
 59:         start = ai[brow]; end   = ai[brow+1];
 60:         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
 61:           for (l = start; l<end ; l++){
 62:             bcol = aj[l];
 63:             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
 64:           }
 65:           k++;
 66:           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
 67:         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
 68:           for (l = start; l<end ; l++){
 69:             bcol = aj[l];
 70:             if (bcol > bcol_max) break;
 71:             if (PetscBTLookup(table0,bcol)){
 72:               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
 73:               break; /* for l = start; l<end ; l++) */
 74:             }
 75:           }
 76:         }
 77:       }
 78:     } /* for each overlap */

 80:     /* expand the Index Set */
 81:     for (j=0; j<isz; j++) {
 82:       for (k=0; k<bs; k++)
 83:         nidx2[j*bs+k] = nidx[j]*bs+k;
 84:     }
 85:     ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
 86:   }
 87:   PetscBTDestroy(table);
 88:   PetscFree(nidx);
 89:   PetscFree(nidx2);
 90:   PetscBTDestroy(table0);
 91:   return(0);
 92: }

 96: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
 97: {
 98:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
100:   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
101:   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
102:   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
103:   const PetscInt *irow,*aj = a->j,*ai = a->i;
104:   MatScalar      *mat_a;
105:   Mat            C;
106:   PetscTruth     flag,sorted;

109:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
110:   ISSorted(iscol,&sorted);
111:   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");

113:   ISGetIndices(isrow,&irow);
114:   ISGetSize(isrow,&nrows);
115: 
116:   PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);
117:   ssmap = smap;
118:   PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
119:   PetscMemzero(smap,oldcols*sizeof(PetscInt));
120:   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
121:   /* determine lens of each row */
122:   for (i=0; i<nrows; i++) {
123:     kstart  = ai[irow[i]];
124:     kend    = kstart + a->ilen[irow[i]];
125:     lens[i] = 0;
126:       for (k=kstart; k<kend; k++) {
127:         if (ssmap[aj[k]]) {
128:           lens[i]++;
129:         }
130:       }
131:     }
132:   /* Create and fill new matrix */
133:   if (scall == MAT_REUSE_MATRIX) {
134:     c = (Mat_SeqSBAIJ *)((*B)->data);

136:     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
137:     PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
138:     if (!flag) {
139:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
140:     }
141:     PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
142:     C = *B;
143:   } else {
144:     MatCreate(((PetscObject)A)->comm,&C);
145:     MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);
146:     MatSetType(C,((PetscObject)A)->type_name);
147:     MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);
148:   }
149:   c = (Mat_SeqSBAIJ *)(C->data);
150:   for (i=0; i<nrows; i++) {
151:     row    = irow[i];
152:     kstart = ai[row];
153:     kend   = kstart + a->ilen[row];
154:     mat_i  = c->i[i];
155:     mat_j  = c->j + mat_i;
156:     mat_a  = c->a + mat_i*bs2;
157:     mat_ilen = c->ilen + i;
158:     for (k=kstart; k<kend; k++) {
159:       if ((tcol=ssmap[a->j[k]])) {
160:         *mat_j++ = tcol - 1;
161:         PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
162:         mat_a   += bs2;
163:         (*mat_ilen)++;
164:       }
165:     }
166:   }
167: 
168:   /* Free work space */
169:   PetscFree(smap);
170:   PetscFree(lens);
171:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
172:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
173: 
174:   ISRestoreIndices(isrow,&irow);
175:   *B = C;
176:   return(0);
177: }

181: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
182: {
183:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
184:   IS             is1;
186:   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
187:   const PetscInt *irow;

190:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
191: 
192:   ISGetIndices(isrow,&irow);
193:   ISGetSize(isrow,&nrows);
194: 
195:   /* Verify if the indices corespond to each element in a block 
196:    and form the IS with compressed IS */
197:   PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);
198:   iary = vary + a->mbs;
199:   PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));
200:   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
201: 
202:   count = 0;
203:   for (i=0; i<a->mbs; i++) {
204:     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
205:     if (vary[i]==bs) iary[count++] = i;
206:   }
207:   ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
208: 
209:   ISRestoreIndices(isrow,&irow);
210:   PetscFree(vary);

212:   MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
213:   ISDestroy(is1);
214:   return(0);
215: }

219: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
220: {
222:   PetscInt       i;

225:   if (scall == MAT_INITIAL_MATRIX) {
226:     PetscMalloc((n+1)*sizeof(Mat),B);
227:   }

229:   for (i=0; i<n; i++) {
230:     MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
231:   }
232:   return(0);
233: }

235: /* -------------------------------------------------------*/
236: /* Should check that shapes of vectors and matrices match */
237: /* -------------------------------------------------------*/
238:  #include petscblaslapack.h

242: PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
243: {
244:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
245:   PetscScalar    *x,*z,*xb,x1,zero=0.0;
246:   MatScalar      *v;
248:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
249:   PetscInt       nonzerorow=0;

252:   VecSet(zz,zero);
253:   VecGetArray(xx,&x);
254:   VecGetArray(zz,&z);

256:   v  = a->a;
257:   xb = x;
258: 
259:   for (i=0; i<mbs; i++) {
260:     n    = ai[1] - ai[0];  /* length of i_th row of A */
261:     x1   = *xb++;
262:     ib   = aj + *ai++;
263:     jmin = 0;
264:     nonzerorow += (n>0);
265:     /* if we ALWAYS required a diagonal entry then could remove this if test */
266:     /* should we use a tmp to hold the accumulated z[i] */
267:     if (*ib == i) {      /* (diag of A)*x */
268:       z[i] += *v++ * x[*ib++];
269:       jmin++;
270:     }
271:     for (j=jmin; j<n; j++) {
272:       cval    = *ib;
273:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
274:       z[i]    += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
275:     }
276:   }

278:   VecRestoreArray(xx,&x);
279:   VecRestoreArray(zz,&z);
280:   PetscLogFlops(2*(a->nz*2 - nonzerorow) - nonzerorow);  /* nz = (nz+m)/2 */
281:   return(0);
282: }

286: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
287: {
288:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
289:   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
290:   MatScalar      *v;
292:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
293:   PetscInt       nonzerorow=0;

296:   VecSet(zz,zero);
297:   VecGetArray(xx,&x);
298:   VecGetArray(zz,&z);
299: 
300:   v     = a->a;
301:   xb = x;

303:   for (i=0; i<mbs; i++) {
304:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
305:     x1 = xb[0]; x2 = xb[1];
306:     ib = aj + *ai;
307:     jmin = 0;
308:     nonzerorow += (n>0);
309:     if (*ib == i){     /* (diag of A)*x */
310:       z[2*i]   += v[0]*x1 + v[2]*x2;
311:       z[2*i+1] += v[2]*x1 + v[3]*x2;
312:       v += 4; jmin++;
313:     }
314:     for (j=jmin; j<n; j++) {
315:       /* (strict lower triangular part of A)*x  */
316:       cval       = ib[j]*2;
317:       z[cval]     += v[0]*x1 + v[1]*x2;
318:       z[cval+1]   += v[2]*x1 + v[3]*x2;
319:       /* (strict upper triangular part of A)*x  */
320:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
321:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
322:       v  += 4;
323:     }
324:     xb +=2; ai++;
325:   }

327:   VecRestoreArray(xx,&x);
328:   VecRestoreArray(zz,&z);
329:   PetscLogFlops(8*(a->nz*2 - nonzerorow) - nonzerorow);
330:   return(0);
331: }

335: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
336: {
337:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
338:   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
339:   MatScalar      *v;
341:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
342:   PetscInt       nonzerorow=0;

345:   VecSet(zz,zero);
346:   VecGetArray(xx,&x);
347:   VecGetArray(zz,&z);
348: 
349:   v    = a->a;
350:   xb   = x;

352:   for (i=0; i<mbs; i++) {
353:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
354:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
355:     ib = aj + *ai;
356:     jmin = 0;
357:     nonzerorow += (n>0);
358:     if (*ib == i){     /* (diag of A)*x */
359:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
360:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
361:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
362:       v += 9; jmin++;
363:     }
364:     for (j=jmin; j<n; j++) {
365:       /* (strict lower triangular part of A)*x  */
366:       cval       = ib[j]*3;
367:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
368:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
369:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
370:       /* (strict upper triangular part of A)*x  */
371:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
372:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
373:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
374:       v  += 9;
375:     }
376:     xb +=3; ai++;
377:   }

379:   VecRestoreArray(xx,&x);
380:   VecRestoreArray(zz,&z);
381:   PetscLogFlops(18*(a->nz*2 - nonzerorow) - nonzerorow);
382:   return(0);
383: }

387: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
388: {
389:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
390:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
391:   MatScalar      *v;
393:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
394:   PetscInt       nonzerorow=0;

397:   VecSet(zz,zero);
398:   VecGetArray(xx,&x);
399:   VecGetArray(zz,&z);
400: 
401:   v     = a->a;
402:   xb = x;

404:   for (i=0; i<mbs; i++) {
405:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
406:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
407:     ib = aj + *ai;
408:     jmin = 0;
409:     nonzerorow += (n>0);
410:     if (*ib == i){     /* (diag of A)*x */
411:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
412:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
413:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
414:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
415:       v += 16; jmin++;
416:     }
417:     for (j=jmin; j<n; j++) {
418:       /* (strict lower triangular part of A)*x  */
419:       cval       = ib[j]*4;
420:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
421:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
422:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
423:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
424:       /* (strict upper triangular part of A)*x  */
425:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
426:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
427:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
428:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
429:       v  += 16;
430:     }
431:     xb +=4; ai++;
432:   }

434:   VecRestoreArray(xx,&x);
435:   VecRestoreArray(zz,&z);
436:   PetscLogFlops(32*(a->nz*2 - nonzerorow) - nonzerorow);
437:   return(0);
438: }

442: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
443: {
444:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
445:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
446:   MatScalar      *v;
448:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
449:   PetscInt       nonzerorow=0;

452:   VecSet(zz,zero);
453:   VecGetArray(xx,&x);
454:   VecGetArray(zz,&z);
455: 
456:   v     = a->a;
457:   xb = x;

459:   for (i=0; i<mbs; i++) {
460:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
461:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
462:     ib = aj + *ai;
463:     jmin = 0;
464:     nonzerorow += (n>0);
465:     if (*ib == i){      /* (diag of A)*x */
466:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
467:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
468:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
469:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
470:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
471:       v += 25; jmin++;
472:     }
473:     for (j=jmin; j<n; j++) {
474:       /* (strict lower triangular part of A)*x  */
475:       cval       = ib[j]*5;
476:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
477:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
478:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
479:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
480:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
481:       /* (strict upper triangular part of A)*x  */
482:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
483:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
484:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
485:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
486:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
487:       v  += 25;
488:     }
489:     xb +=5; ai++;
490:   }

492:   VecRestoreArray(xx,&x);
493:   VecRestoreArray(zz,&z);
494:   PetscLogFlops(50*(a->nz*2 - nonzerorow) - nonzerorow);
495:   return(0);
496: }


501: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
502: {
503:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
504:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
505:   MatScalar      *v;
507:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
508:   PetscInt       nonzerorow=0;

511:   VecSet(zz,zero);
512:   VecGetArray(xx,&x);
513:   VecGetArray(zz,&z);
514: 
515:   v     = a->a;
516:   xb = x;

518:   for (i=0; i<mbs; i++) {
519:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
520:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
521:     ib = aj + *ai;
522:     jmin = 0;
523:     nonzerorow += (n>0);
524:     if (*ib == i){      /* (diag of A)*x */
525:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
526:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
527:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
528:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
529:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
530:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
531:       v += 36; jmin++;
532:     }
533:     for (j=jmin; j<n; j++) {
534:       /* (strict lower triangular part of A)*x  */
535:       cval       = ib[j]*6;
536:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
537:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
538:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
539:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
540:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
541:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
542:       /* (strict upper triangular part of A)*x  */
543:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
544:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
545:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
546:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
547:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
548:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
549:       v  += 36;
550:     }
551:     xb +=6; ai++;
552:   }

554:   VecRestoreArray(xx,&x);
555:   VecRestoreArray(zz,&z);
556:   PetscLogFlops(72*(a->nz*2 - nonzerorow) - nonzerorow);
557:   return(0);
558: }
561: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
562: {
563:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
564:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
565:   MatScalar      *v;
567:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
568:   PetscInt       nonzerorow=0;

571:   VecSet(zz,zero);
572:   VecGetArray(xx,&x);
573:   VecGetArray(zz,&z);
574: 
575:   v     = a->a;
576:   xb = x;

578:   for (i=0; i<mbs; i++) {
579:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
580:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
581:     ib = aj + *ai;
582:     jmin = 0;
583:     nonzerorow += (n>0);
584:     if (*ib == i){      /* (diag of A)*x */
585:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
586:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
587:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
588:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
589:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
590:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
591:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
592:       v += 49; jmin++;
593:     }
594:     for (j=jmin; j<n; j++) {
595:       /* (strict lower triangular part of A)*x  */
596:       cval       = ib[j]*7;
597:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
598:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
599:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
600:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
601:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
602:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
603:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
604:       /* (strict upper triangular part of A)*x  */
605:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
606:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
607:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
608:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
609:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
610:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
611:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
612:       v  += 49;
613:     }
614:     xb +=7; ai++;
615:   }
616:   VecRestoreArray(xx,&x);
617:   VecRestoreArray(zz,&z);
618:   PetscLogFlops(98*(a->nz*2 - nonzerorow) - nonzerorow);
619:   return(0);
620: }

622: /*
623:     This will not work with MatScalar == float because it calls the BLAS
624: */
627: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
628: {
629:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
630:   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
631:   MatScalar      *v;
633:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
634:   PetscInt       nonzerorow=0;

637:   VecSet(zz,zero);
638:   VecGetArray(xx,&x); x_ptr=x;
639:   VecGetArray(zz,&z); z_ptr=z;

641:   aj   = a->j;
642:   v    = a->a;
643:   ii   = a->i;

645:   if (!a->mult_work) {
646:     PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);
647:   }
648:   work = a->mult_work;
649: 
650:   for (i=0; i<mbs; i++) {
651:     n     = ii[1] - ii[0]; ncols = n*bs;
652:     workt = work; idx=aj+ii[0];
653:     nonzerorow += (n>0);

655:     /* upper triangular part */
656:     for (j=0; j<n; j++) {
657:       xb = x_ptr + bs*(*idx++);
658:       for (k=0; k<bs; k++) workt[k] = xb[k];
659:       workt += bs;
660:     }
661:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
662:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
663: 
664:     /* strict lower triangular part */
665:     idx = aj+ii[0];
666:     if (*idx == i){
667:       ncols -= bs; v += bs2; idx++; n--;
668:     }
669: 
670:     if (ncols > 0){
671:       workt = work;
672:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
673:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
674:       for (j=0; j<n; j++) {
675:         zb = z_ptr + bs*(*idx++);
676:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
677:         workt += bs;
678:       }
679:     }
680:     x += bs; v += n*bs2; z += bs; ii++;
681:   }
682: 
683:   VecRestoreArray(xx,&x);
684:   VecRestoreArray(zz,&z);
685:   PetscLogFlops(2*(a->nz*2 - nonzerorow)*bs2 - nonzerorow);
686:   return(0);
687: }

692: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
693: {
694:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
695:   PetscScalar    *x,*z,*xb,x1;
696:   MatScalar      *v;
698:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
699:   PetscInt       nonzerorow=0;

702:   VecCopy_Seq(yy,zz);
703:   VecGetArray(xx,&x);
704:   VecGetArray(zz,&z);
705:   v  = a->a;
706:   xb = x;

708:   for (i=0; i<mbs; i++) {
709:     n  = ai[1] - ai[0];  /* length of i_th row of A */
710:     x1 = xb[0];
711:     ib = aj + *ai;
712:     jmin = 0;
713:     nonzerorow += (n>0);
714:     if (*ib == i) {            /* (diag of A)*x */
715:       z[i] += *v++ * x[*ib++]; jmin++;
716:     }
717:     for (j=jmin; j<n; j++) {
718:       cval    = *ib;
719:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
720:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
721:     }
722:     xb++; ai++;
723:   }

725:   VecRestoreArray(xx,&x);
726:   VecRestoreArray(zz,&z);
727: 
728:   PetscLogFlops(2*(a->nz*2 - nonzerorow));
729:   return(0);
730: }

734: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
735: {
736:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
737:   PetscScalar    *x,*z,*xb,x1,x2;
738:   MatScalar      *v;
740:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
741:   PetscInt       nonzerorow=0;

744:   VecCopy_Seq(yy,zz);
745:   VecGetArray(xx,&x);
746:   VecGetArray(zz,&z);

748:   v  = a->a;
749:   xb = x;

751:   for (i=0; i<mbs; i++) {
752:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
753:     x1 = xb[0]; x2 = xb[1];
754:     ib = aj + *ai;
755:     jmin = 0;
756:     nonzerorow += (n>0);
757:     if (*ib == i){      /* (diag of A)*x */
758:       z[2*i]   += v[0]*x1 + v[2]*x2;
759:       z[2*i+1] += v[2]*x1 + v[3]*x2;
760:       v += 4; jmin++;
761:     }
762:     for (j=jmin; j<n; j++) {
763:       /* (strict lower triangular part of A)*x  */
764:       cval       = ib[j]*2;
765:       z[cval]     += v[0]*x1 + v[1]*x2;
766:       z[cval+1]   += v[2]*x1 + v[3]*x2;
767:       /* (strict upper triangular part of A)*x  */
768:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
769:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
770:       v  += 4;
771:     }
772:     xb +=2; ai++;
773:   }
774:   VecRestoreArray(xx,&x);
775:   VecRestoreArray(zz,&z);

777:   PetscLogFlops(4*(a->nz*2 - nonzerorow));
778:   return(0);
779: }

783: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
784: {
785:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
786:   PetscScalar    *x,*z,*xb,x1,x2,x3;
787:   MatScalar      *v;
789:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
790:   PetscInt       nonzerorow=0;

793:   VecCopy_Seq(yy,zz);
794:   VecGetArray(xx,&x);
795:   VecGetArray(zz,&z);

797:   v     = a->a;
798:   xb = x;

800:   for (i=0; i<mbs; i++) {
801:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
802:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
803:     ib = aj + *ai;
804:     jmin = 0;
805:     nonzerorow += (n>0);
806:     if (*ib == i){     /* (diag of A)*x */
807:      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
808:      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
809:      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
810:      v += 9; jmin++;
811:     }
812:     for (j=jmin; j<n; j++) {
813:       /* (strict lower triangular part of A)*x  */
814:       cval       = ib[j]*3;
815:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
816:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
817:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
818:       /* (strict upper triangular part of A)*x  */
819:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
820:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
821:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
822:       v  += 9;
823:     }
824:     xb +=3; ai++;
825:   }

827:   VecRestoreArray(xx,&x);
828:   VecRestoreArray(zz,&z);

830:   PetscLogFlops(18*(a->nz*2 - nonzerorow));
831:   return(0);
832: }

836: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
837: {
838:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
839:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
840:   MatScalar      *v;
842:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
843:   PetscInt       nonzerorow=0;

846:   VecCopy_Seq(yy,zz);
847:   VecGetArray(xx,&x);
848:   VecGetArray(zz,&z);

850:   v     = a->a;
851:   xb = x;

853:   for (i=0; i<mbs; i++) {
854:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
855:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
856:     ib = aj + *ai;
857:     jmin = 0;
858:     nonzerorow += (n>0);
859:     if (*ib == i){      /* (diag of A)*x */
860:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
861:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
862:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
863:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
864:       v += 16; jmin++;
865:     }
866:     for (j=jmin; j<n; j++) {
867:       /* (strict lower triangular part of A)*x  */
868:       cval       = ib[j]*4;
869:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
870:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
871:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
872:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
873:       /* (strict upper triangular part of A)*x  */
874:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
875:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
876:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
877:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
878:       v  += 16;
879:     }
880:     xb +=4; ai++;
881:   }

883:   VecRestoreArray(xx,&x);
884:   VecRestoreArray(zz,&z);

886:   PetscLogFlops(32*(a->nz*2 - nonzerorow));
887:   return(0);
888: }

892: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
893: {
894:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
895:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
896:   MatScalar      *v;
898:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
899:   PetscInt       nonzerorow=0;

902:   VecCopy_Seq(yy,zz);
903:   VecGetArray(xx,&x);
904:   VecGetArray(zz,&z);

906:   v     = a->a;
907:   xb = x;

909:   for (i=0; i<mbs; i++) {
910:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
911:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
912:     ib = aj + *ai;
913:     jmin = 0;
914:     nonzerorow += (n>0);
915:     if (*ib == i){      /* (diag of A)*x */
916:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
917:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
918:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
919:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
920:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
921:       v += 25; jmin++;
922:     }
923:     for (j=jmin; j<n; j++) {
924:       /* (strict lower triangular part of A)*x  */
925:       cval       = ib[j]*5;
926:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
927:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
928:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
929:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
930:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
931:       /* (strict upper triangular part of A)*x  */
932:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
933:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
934:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
935:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
936:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
937:       v  += 25;
938:     }
939:     xb +=5; ai++;
940:   }

942:   VecRestoreArray(xx,&x);
943:   VecRestoreArray(zz,&z);

945:   PetscLogFlops(50*(a->nz*2 - nonzerorow));
946:   return(0);
947: }
950: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
951: {
952:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
953:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
954:   MatScalar      *v;
956:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
957:   PetscInt       nonzerorow=0;

960:   VecCopy_Seq(yy,zz);
961:   VecGetArray(xx,&x);
962:   VecGetArray(zz,&z);

964:   v     = a->a;
965:   xb = x;

967:   for (i=0; i<mbs; i++) {
968:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
969:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
970:     ib = aj + *ai;
971:     jmin = 0;
972:     nonzerorow += (n>0);
973:     if (*ib == i){     /* (diag of A)*x */
974:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
975:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
976:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
977:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
978:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
979:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
980:       v += 36; jmin++;
981:     }
982:     for (j=jmin; j<n; j++) {
983:       /* (strict lower triangular part of A)*x  */
984:       cval       = ib[j]*6;
985:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
986:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
987:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
988:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
989:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
990:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
991:       /* (strict upper triangular part of A)*x  */
992:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
993:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
994:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
995:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
996:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
997:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
998:       v  += 36;
999:     }
1000:     xb +=6; ai++;
1001:   }

1003:   VecRestoreArray(xx,&x);
1004:   VecRestoreArray(zz,&z);

1006:   PetscLogFlops(72*(a->nz*2 - nonzerorow));
1007:   return(0);
1008: }

1012: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1013: {
1014:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1015:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
1016:   MatScalar      *v;
1018:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
1019:   PetscInt       nonzerorow=0;

1022:   VecCopy_Seq(yy,zz);
1023:   VecGetArray(xx,&x);
1024:   VecGetArray(zz,&z);

1026:   v     = a->a;
1027:   xb = x;

1029:   for (i=0; i<mbs; i++) {
1030:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
1031:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1032:     ib = aj + *ai;
1033:     jmin = 0;
1034:     nonzerorow += (n>0);
1035:     if (*ib == i){     /* (diag of A)*x */
1036:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1037:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1038:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1039:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1040:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1041:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1042:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1043:       v += 49; jmin++;
1044:     }
1045:     for (j=jmin; j<n; j++) {
1046:       /* (strict lower triangular part of A)*x  */
1047:       cval       = ib[j]*7;
1048:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1049:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1050:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1051:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1052:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1053:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1054:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1055:       /* (strict upper triangular part of A)*x  */
1056:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1057:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1058:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1059:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1060:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1061:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1062:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1063:       v  += 49;
1064:     }
1065:     xb +=7; ai++;
1066:   }

1068:   VecRestoreArray(xx,&x);
1069:   VecRestoreArray(zz,&z);

1071:   PetscLogFlops(98*(a->nz*2 - nonzerorow));
1072:   return(0);
1073: }

1077: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1078: {
1079:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1080:   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1081:   MatScalar      *v;
1083:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
1084:   PetscInt       nonzerorow=0;

1087:   VecCopy_Seq(yy,zz);
1088:   VecGetArray(xx,&x); x_ptr=x;
1089:   VecGetArray(zz,&z); z_ptr=z;

1091:   aj   = a->j;
1092:   v    = a->a;
1093:   ii   = a->i;

1095:   if (!a->mult_work) {
1096:     PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);
1097:   }
1098:   work = a->mult_work;
1099: 
1100: 
1101:   for (i=0; i<mbs; i++) {
1102:     n     = ii[1] - ii[0]; ncols = n*bs;
1103:     workt = work; idx=aj+ii[0];
1104:     nonzerorow += (n>0);

1106:     /* upper triangular part */
1107:     for (j=0; j<n; j++) {
1108:       xb = x_ptr + bs*(*idx++);
1109:       for (k=0; k<bs; k++) workt[k] = xb[k];
1110:       workt += bs;
1111:     }
1112:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1113:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

1115:     /* strict lower triangular part */
1116:     idx = aj+ii[0];
1117:     if (*idx == i){
1118:       ncols -= bs; v += bs2; idx++; n--;
1119:     }
1120:     if (ncols > 0){
1121:       workt = work;
1122:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
1123:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1124:       for (j=0; j<n; j++) {
1125:         zb = z_ptr + bs*(*idx++);
1126:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
1127:         workt += bs;
1128:       }
1129:     }

1131:     x += bs; v += n*bs2; z += bs; ii++;
1132:   }

1134:   VecRestoreArray(xx,&x);
1135:   VecRestoreArray(zz,&z);

1137:   PetscLogFlops(2*(a->nz*2 - nonzerorow));
1138:   return(0);
1139: }

1143: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1144: {
1145:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1146:   PetscScalar    oalpha = alpha;
1148:   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);

1151:   BLASscal_(&totalnz,&oalpha,a->a,&one);
1152:   PetscLogFlops(totalnz);
1153:   return(0);
1154: }

1158: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1159: {
1160:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1161:   MatScalar      *v = a->a;
1162:   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1163:   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1165:   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
1166: 
1168:   if (type == NORM_FROBENIUS) {
1169:     for (k=0; k<mbs; k++){
1170:       jmin = a->i[k]; jmax = a->i[k+1];
1171:       col  = aj + jmin;
1172:       if (*col == k){         /* diagonal block */
1173:         for (i=0; i<bs2; i++){
1174: #if defined(PETSC_USE_COMPLEX)
1175:           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1176: #else
1177:           sum_diag += (*v)*(*v); v++;
1178: #endif
1179:         }
1180:         jmin++;
1181:       }
1182:       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
1183:         for (i=0; i<bs2; i++){
1184: #if defined(PETSC_USE_COMPLEX)
1185:           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1186: #else
1187:           sum_off += (*v)*(*v); v++;
1188: #endif  
1189:         }
1190:       }
1191:     }
1192:     *norm = sqrt(sum_diag + 2*sum_off);
1193:   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1194:     PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);
1195:     jl   = il + mbs;
1196:     sum  = (PetscReal*)(jl + mbs);
1197:     for (i=0; i<mbs; i++) jl[i] = mbs;
1198:     il[0] = 0;

1200:     *norm = 0.0;
1201:     for (k=0; k<mbs; k++) { /* k_th block row */
1202:       for (j=0; j<bs; j++) sum[j]=0.0;
1203:       /*-- col sum --*/
1204:       i = jl[k]; /* first |A(i,k)| to be added */
1205:       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1206:                   at step k */
1207:       while (i<mbs){
1208:         nexti = jl[i];  /* next block row to be added */
1209:         ik    = il[i];  /* block index of A(i,k) in the array a */
1210:         for (j=0; j<bs; j++){
1211:           v = a->a + ik*bs2 + j*bs;
1212:           for (k1=0; k1<bs; k1++) {
1213:             sum[j] += PetscAbsScalar(*v); v++;
1214:           }
1215:         }
1216:         /* update il, jl */
1217:         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1218:         jmax = a->i[i+1];
1219:         if (jmin < jmax){
1220:           il[i] = jmin;
1221:           j   = a->j[jmin];
1222:           jl[i] = jl[j]; jl[j]=i;
1223:         }
1224:         i = nexti;
1225:       }
1226:       /*-- row sum --*/
1227:       jmin = a->i[k]; jmax = a->i[k+1];
1228:       for (i=jmin; i<jmax; i++) {
1229:         for (j=0; j<bs; j++){
1230:           v = a->a + i*bs2 + j;
1231:           for (k1=0; k1<bs; k1++){
1232:             sum[j] += PetscAbsScalar(*v); v += bs;
1233:           }
1234:         }
1235:       }
1236:       /* add k_th block row to il, jl */
1237:       col = aj+jmin;
1238:       if (*col == k) jmin++;
1239:       if (jmin < jmax){
1240:         il[k] = jmin;
1241:         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1242:       }
1243:       for (j=0; j<bs; j++){
1244:         if (sum[j] > *norm) *norm = sum[j];
1245:       }
1246:     }
1247:     PetscFree(il);
1248:   } else {
1249:     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1250:   }
1251:   return(0);
1252: }

1256: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1257: {
1258:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;


1263:   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1264:   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1265:     *flg = PETSC_FALSE;
1266:     return(0);
1267:   }
1268: 
1269:   /* if the a->i are the same */
1270:   PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1271:   if (!*flg) {
1272:     return(0);
1273:   }
1274: 
1275:   /* if a->j are the same */
1276:   PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1277:   if (!*flg) {
1278:     return(0);
1279:   }
1280:   /* if a->a are the same */
1281:   PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);
1282:   return(0);
1283: }

1287: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1288: {
1289:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1291:   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1292:   PetscScalar    *x,zero = 0.0;
1293:   MatScalar      *aa,*aa_j;

1296:   bs   = A->rmap->bs;
1297:   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1298: 
1299:   aa   = a->a;
1300:   ai   = a->i;
1301:   aj   = a->j;
1302:   ambs = a->mbs;
1303:   bs2  = a->bs2;

1305:   VecSet(v,zero);
1306:   VecGetArray(v,&x);
1307:   VecGetLocalSize(v,&n);
1308:   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1309:   for (i=0; i<ambs; i++) {
1310:     j=ai[i];
1311:     if (aj[j] == i) {             /* if this is a diagonal element */
1312:       row  = i*bs;
1313:       aa_j = aa + j*bs2;
1314:       if (A->factor && bs==1){
1315:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1316:       } else {
1317:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1318:       }
1319:     }
1320:   }
1321: 
1322:   VecRestoreArray(v,&x);
1323:   return(0);
1324: }

1328: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1329: {
1330:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1331:   PetscScalar    *l,x,*li,*ri;
1332:   MatScalar      *aa,*v;
1334:   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1335:   PetscTruth     flg;

1338:   if (ll != rr){
1339:     VecEqual(ll,rr,&flg);
1340:     if (!flg)
1341:       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1342:   }
1343:   if (!ll) return(0);
1344:   ai  = a->i;
1345:   aj  = a->j;
1346:   aa  = a->a;
1347:   m   = A->rmap->N;
1348:   bs  = A->rmap->bs;
1349:   mbs = a->mbs;
1350:   bs2 = a->bs2;

1352:   VecGetArray(ll,&l);
1353:   VecGetLocalSize(ll,&lm);
1354:   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1355:   for (i=0; i<mbs; i++) { /* for each block row */
1356:     M  = ai[i+1] - ai[i];
1357:     li = l + i*bs;
1358:     v  = aa + bs2*ai[i];
1359:     for (j=0; j<M; j++) { /* for each block */
1360:       ri = l + bs*aj[ai[i]+j];
1361:       for (k=0; k<bs; k++) {
1362:         x = ri[k];
1363:         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1364:       }
1365:     }
1366:   }
1367:   VecRestoreArray(ll,&l);
1368:   PetscLogFlops(2*a->nz);
1369:   return(0);
1370: }

1374: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1375: {
1376:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1379:   info->block_size     = a->bs2;
1380:   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
1381:   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1382:   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1383:   info->assemblies   = A->num_ass;
1384:   info->mallocs      = a->reallocs;
1385:   info->memory       = ((PetscObject)A)->mem;
1386:   if (A->factor) {
1387:     info->fill_ratio_given  = A->info.fill_ratio_given;
1388:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1389:     info->factor_mallocs    = A->info.factor_mallocs;
1390:   } else {
1391:     info->fill_ratio_given  = 0;
1392:     info->fill_ratio_needed = 0;
1393:     info->factor_mallocs    = 0;
1394:   }
1395:   return(0);
1396: }


1401: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1402: {
1403:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1407:   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1408:   return(0);
1409: }

1413: /* 
1414:    This code does not work since it only checks the upper triangular part of
1415:   the matrix. Hence it is not listed in the function table.
1416: */
1417: PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1418: {
1419:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1421:   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1422:   PetscReal      atmp;
1423:   MatScalar      *aa;
1424:   PetscScalar    *x;
1425:   PetscInt       ncols,brow,bcol,krow,kcol;

1428:   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1429:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1430:   bs   = A->rmap->bs;
1431:   aa   = a->a;
1432:   ai   = a->i;
1433:   aj   = a->j;
1434:   mbs = a->mbs;

1436:   VecSet(v,0.0);
1437:   VecGetArray(v,&x);
1438:   VecGetLocalSize(v,&n);
1439:   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1440:   for (i=0; i<mbs; i++) {
1441:     ncols = ai[1] - ai[0]; ai++;
1442:     brow  = bs*i;
1443:     for (j=0; j<ncols; j++){
1444:       bcol = bs*(*aj);
1445:       for (kcol=0; kcol<bs; kcol++){
1446:         col = bcol + kcol;      /* col index */
1447:         for (krow=0; krow<bs; krow++){
1448:           atmp = PetscAbsScalar(*aa); aa++;
1449:           row = brow + krow;    /* row index */
1450:           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1451:           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1452:         }
1453:       }
1454:       aj++;
1455:     }
1456:   }
1457:   VecRestoreArray(v,&x);
1458:   return(0);
1459: }