Actual source code: baij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include ../src/mat/impls/baij/seq/baij.h
 8:  #include ../src/inline/spops.h
 9:  #include petscsys.h

 11:  #include ../src/inline/ilu.h

 15: /*@
 16:   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.

 18:   Collective on Mat

 20:   Input Parameters:
 21: . mat - the matrix

 23:   Level: advanced
 24: @*/
 25: PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
 26: {
 27:   PetscErrorCode ierr,(*f)(Mat);

 31:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
 32:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

 34:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
 35:   if (f) {
 36:     (*f)(mat);
 37:   } else {
 38:     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
 39:   }
 40:   return(0);
 41: }

 46: PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
 47: {
 48:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 50:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
 51:   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
 52:   PetscReal      shift = 0.0;

 55:   if (a->idiagvalid) return(0);
 56:   MatMarkDiagonal_SeqBAIJ(A);
 57:   diag_offset = a->diag;
 58:   if (!a->idiag) {
 59:     PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
 60:   }
 61:   diag  = a->idiag;
 62:   mdiag = a->idiag+bs*bs*mbs;
 63:   /* factor and invert each block */
 64:   switch (bs){
 65:     case 2:
 66:       for (i=0; i<mbs; i++) {
 67:         odiag   = v + 4*diag_offset[i];
 68:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 69:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 70:         Kernel_A_gets_inverse_A_2(diag,shift);
 71:         diag    += 4;
 72:         mdiag   += 4;
 73:       }
 74:       break;
 75:     case 3:
 76:       for (i=0; i<mbs; i++) {
 77:         odiag    = v + 9*diag_offset[i];
 78:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 79:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 80:         diag[8]  = odiag[8];
 81:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 82:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 83:         mdiag[8] = odiag[8];
 84:         Kernel_A_gets_inverse_A_3(diag,shift);
 85:         diag    += 9;
 86:         mdiag   += 9;
 87:       }
 88:       break;
 89:     case 4:
 90:       for (i=0; i<mbs; i++) {
 91:         odiag  = v + 16*diag_offset[i];
 92:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 93:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 94:         Kernel_A_gets_inverse_A_4(diag,shift);
 95:         diag  += 16;
 96:         mdiag += 16;
 97:       }
 98:       break;
 99:     case 5:
100:       for (i=0; i<mbs; i++) {
101:         odiag = v + 25*diag_offset[i];
102:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
103:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
104:         Kernel_A_gets_inverse_A_5(diag,shift);
105:         diag  += 25;
106:         mdiag += 25;
107:       }
108:       break;
109:     default:
110:       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111:   }
112:   a->idiagvalid = PETSC_TRUE;
113:   return(0);
114: }

119: PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120: {
121:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122:   PetscScalar        *x,x1,s1;
123:   const PetscScalar  *b;
124:   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
125:   PetscErrorCode     ierr;
126:   PetscInt           m = a->mbs,i,i2,nz,idx;
127:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

130:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131:   its = its*lits;
132:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

138:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

140:   diag  = a->diag;
141:   idiag = a->idiag;
142:   VecGetArray(xx,&x);
143:   VecGetArray(bb,(PetscScalar**)&b);

145:   if (flag & SOR_ZERO_INITIAL_GUESS) {
146:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147:       x[0] = b[0]*idiag[0];
148:       i2     = 1;
149:       idiag += 1;
150:       for (i=1; i<m; i++) {
151:         v     = aa + ai[i];
152:         vi    = aj + ai[i];
153:         nz    = diag[i] - ai[i];
154:         s1    = b[i2];
155:         while (nz--) {
156:           idx  = (*vi++);
157:           x1   = x[idx];
158:           s1  -= v[0]*x1;
159:           v   += 1;
160:         }
161:         x[i2]   = idiag[0]*s1;
162:         idiag   += 1;
163:         i2      += 1;
164:       }
165:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166:       PetscLogFlops(a->nz);
167:     }
168:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170:       i2    = 0;
171:       mdiag = a->idiag+a->mbs;
172:       for (i=0; i<m; i++) {
173:         x1      = x[i2];
174:         x[i2]   = mdiag[0]*x1;
175:         mdiag  += 1;
176:         i2     += 1;
177:       }
178:       PetscLogFlops(m);
179:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
181:     }
182:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183:       idiag   = a->idiag+a->mbs - 1;
184:       i2      = m - 1;
185:       x1      = x[i2];
186:       x[i2]   = idiag[0]*x1;
187:       idiag -= 1;
188:       i2    -= 1;
189:       for (i=m-2; i>=0; i--) {
190:         v     = aa + (diag[i]+1);
191:         vi    = aj + diag[i] + 1;
192:         nz    = ai[i+1] - diag[i] - 1;
193:         s1    = x[i2];
194:         while (nz--) {
195:           idx  = (*vi++);
196:           x1   = x[idx];
197:           s1  -= v[0]*x1;
198:           v   += 1;
199:         }
200:         x[i2]   = idiag[0]*s1;
201:         idiag   -= 2;
202:         i2      -= 1;
203:       }
204:       PetscLogFlops(a->nz);
205:     }
206:   } else {
207:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208:   }
209:   VecRestoreArray(xx,&x);
210:   VecRestoreArray(bb,(PetscScalar**)&b);
211:   return(0);
212: }

216: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217: {
218:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
219:   PetscScalar        *x,x1,x2,s1,s2;
220:   const PetscScalar  *b;
221:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
222:   PetscErrorCode     ierr;
223:   PetscInt           m = a->mbs,i,i2,nz,idx;
224:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

227:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228:   its = its*lits;
229:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

235:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

237:   diag  = a->diag;
238:   idiag = a->idiag;
239:   VecGetArray(xx,&x);
240:   VecGetArray(bb,(PetscScalar**)&b);

242:   if (flag & SOR_ZERO_INITIAL_GUESS) {
243:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246:       i2     = 2;
247:       idiag += 4;
248:       for (i=1; i<m; i++) {
249:         v     = aa + 4*ai[i];
250:         vi    = aj + ai[i];
251:         nz    = diag[i] - ai[i];
252:         s1    = b[i2]; s2 = b[i2+1];
253:         while (nz--) {
254:           idx  = 2*(*vi++);
255:           x1   = x[idx]; x2 = x[1+idx];
256:           s1  -= v[0]*x1 + v[2]*x2;
257:           s2  -= v[1]*x1 + v[3]*x2;
258:           v   += 4;
259:         }
260:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262:         idiag   += 4;
263:         i2      += 2;
264:       }
265:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266:       PetscLogFlops(4*(a->nz));
267:     }
268:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270:       i2    = 0;
271:       mdiag = a->idiag+4*a->mbs;
272:       for (i=0; i<m; i++) {
273:         x1      = x[i2]; x2 = x[i2+1];
274:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276:         mdiag  += 4;
277:         i2     += 2;
278:       }
279:       PetscLogFlops(6*m);
280:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
282:     }
283:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284:       idiag   = a->idiag+4*a->mbs - 4;
285:       i2      = 2*m - 2;
286:       x1      = x[i2]; x2 = x[i2+1];
287:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289:       idiag -= 4;
290:       i2    -= 2;
291:       for (i=m-2; i>=0; i--) {
292:         v     = aa + 4*(diag[i]+1);
293:         vi    = aj + diag[i] + 1;
294:         nz    = ai[i+1] - diag[i] - 1;
295:         s1    = x[i2]; s2 = x[i2+1];
296:         while (nz--) {
297:           idx  = 2*(*vi++);
298:           x1   = x[idx]; x2 = x[1+idx];
299:           s1  -= v[0]*x1 + v[2]*x2;
300:           s2  -= v[1]*x1 + v[3]*x2;
301:           v   += 4;
302:         }
303:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305:         idiag   -= 4;
306:         i2      -= 2;
307:       }
308:       PetscLogFlops(4*(a->nz));
309:     }
310:   } else {
311:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312:   }
313:   VecRestoreArray(xx,&x);
314:   VecRestoreArray(bb,(PetscScalar**)&b);
315:   return(0);
316: }

320: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321: {
322:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325:   const PetscScalar  *b;
326:   PetscErrorCode     ierr;
327:   PetscInt           m = a->mbs,i,i2,nz,idx;
328:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

331:   its = its*lits;
332:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

339:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

341:   diag  = a->diag;
342:   idiag = a->idiag;
343:   VecGetArray(xx,&x);
344:   VecGetArray(bb,(PetscScalar**)&b);

346:   if (flag & SOR_ZERO_INITIAL_GUESS) {
347:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351:       i2     = 3;
352:       idiag += 9;
353:       for (i=1; i<m; i++) {
354:         v     = aa + 9*ai[i];
355:         vi    = aj + ai[i];
356:         nz    = diag[i] - ai[i];
357:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358:         while (nz--) {
359:           idx  = 3*(*vi++);
360:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364:           v   += 9;
365:         }
366:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369:         idiag   += 9;
370:         i2      += 3;
371:       }
372:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373:       PetscLogFlops(9*(a->nz));
374:     }
375:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377:       i2    = 0;
378:       mdiag = a->idiag+9*a->mbs;
379:       for (i=0; i<m; i++) {
380:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384:         mdiag  += 9;
385:         i2     += 3;
386:       }
387:       PetscLogFlops(15*m);
388:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
390:     }
391:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392:       idiag   = a->idiag+9*a->mbs - 9;
393:       i2      = 3*m - 3;
394:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398:       idiag -= 9;
399:       i2    -= 3;
400:       for (i=m-2; i>=0; i--) {
401:         v     = aa + 9*(diag[i]+1);
402:         vi    = aj + diag[i] + 1;
403:         nz    = ai[i+1] - diag[i] - 1;
404:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405:         while (nz--) {
406:           idx  = 3*(*vi++);
407:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411:           v   += 9;
412:         }
413:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416:         idiag   -= 9;
417:         i2      -= 3;
418:       }
419:       PetscLogFlops(9*(a->nz));
420:     }
421:   } else {
422:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423:   }
424:   VecRestoreArray(xx,&x);
425:   VecRestoreArray(bb,(PetscScalar**)&b);
426:   return(0);
427: }

431: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432: {
433:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
434:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
435:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
436:   const PetscScalar  *b;
437:   PetscErrorCode     ierr;
438:   PetscInt           m = a->mbs,i,i2,nz,idx;
439:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

442:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443:   its = its*lits;
444:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

450:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

452:   diag  = a->diag;
453:   idiag = a->idiag;
454:   VecGetArray(xx,&x);
455:   VecGetArray(bb,(PetscScalar**)&b);

457:   if (flag & SOR_ZERO_INITIAL_GUESS) {
458:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
460:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
461:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463:       i2     = 4;
464:       idiag += 16;
465:       for (i=1; i<m; i++) {
466:         v     = aa + 16*ai[i];
467:         vi    = aj + ai[i];
468:         nz    = diag[i] - ai[i];
469:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470:         while (nz--) {
471:           idx  = 4*(*vi++);
472:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
474:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
475:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477:           v   += 16;
478:         }
479:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
480:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
481:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483:         idiag   += 16;
484:         i2      += 4;
485:       }
486:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487:       PetscLogFlops(16*(a->nz));
488:     }
489:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491:       i2    = 0;
492:       mdiag = a->idiag+16*a->mbs;
493:       for (i=0; i<m; i++) {
494:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
496:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
497:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499:         mdiag  += 16;
500:         i2     += 4;
501:       }
502:       PetscLogFlops(28*m);
503:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
505:     }
506:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507:       idiag   = a->idiag+16*a->mbs - 16;
508:       i2      = 4*m - 4;
509:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
511:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
512:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514:       idiag -= 16;
515:       i2    -= 4;
516:       for (i=m-2; i>=0; i--) {
517:         v     = aa + 16*(diag[i]+1);
518:         vi    = aj + diag[i] + 1;
519:         nz    = ai[i+1] - diag[i] - 1;
520:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521:         while (nz--) {
522:           idx  = 4*(*vi++);
523:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
525:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
526:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528:           v   += 16;
529:         }
530:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
531:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
532:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534:         idiag   -= 16;
535:         i2      -= 4;
536:       }
537:       PetscLogFlops(16*(a->nz));
538:     }
539:   } else {
540:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541:   }
542:   VecRestoreArray(xx,&x);
543:   VecRestoreArray(bb,(PetscScalar**)&b);
544:   return(0);
545: }

549: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550: {
551:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
552:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
554:   const PetscScalar  *b;
555:   PetscErrorCode     ierr;
556:   PetscInt           m = a->mbs,i,i2,nz,idx;
557:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

560:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561:   its = its*lits;
562:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

568:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

570:   diag  = a->diag;
571:   idiag = a->idiag;
572:   VecGetArray(xx,&x);
573:   VecGetArray(bb,(PetscScalar**)&b);

575:   if (flag & SOR_ZERO_INITIAL_GUESS) {
576:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582:       i2     = 5;
583:       idiag += 25;
584:       for (i=1; i<m; i++) {
585:         v     = aa + 25*ai[i];
586:         vi    = aj + ai[i];
587:         nz    = diag[i] - ai[i];
588:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589:         while (nz--) {
590:           idx  = 5*(*vi++);
591:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597:           v   += 25;
598:         }
599:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604:         idiag   += 25;
605:         i2      += 5;
606:       }
607:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608:       PetscLogFlops(25*(a->nz));
609:     }
610:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612:       i2    = 0;
613:       mdiag = a->idiag+25*a->mbs;
614:       for (i=0; i<m; i++) {
615:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621:         mdiag  += 25;
622:         i2     += 5;
623:       }
624:       PetscLogFlops(45*m);
625:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
627:     }
628:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629:       idiag   = a->idiag+25*a->mbs - 25;
630:       i2      = 5*m - 5;
631:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637:       idiag -= 25;
638:       i2    -= 5;
639:       for (i=m-2; i>=0; i--) {
640:         v     = aa + 25*(diag[i]+1);
641:         vi    = aj + diag[i] + 1;
642:         nz    = ai[i+1] - diag[i] - 1;
643:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644:         while (nz--) {
645:           idx  = 5*(*vi++);
646:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652:           v   += 25;
653:         }
654:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659:         idiag   -= 25;
660:         i2      -= 5;
661:       }
662:       PetscLogFlops(25*(a->nz));
663:     }
664:   } else {
665:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666:   }
667:   VecRestoreArray(xx,&x);
668:   VecRestoreArray(bb,(PetscScalar**)&b);
669:   return(0);
670: }

674: PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675: {
676:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
677:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
679:   const PetscScalar  *b;
680:   PetscErrorCode     ierr;
681:   PetscInt           m = a->mbs,i,i2,nz,idx;
682:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

685:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686:   its = its*lits;
687:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

693:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

695:   diag  = a->diag;
696:   idiag = a->idiag;
697:   VecGetArray(xx,&x);
698:   VecGetArray(bb,(PetscScalar**)&b);

700:   if (flag & SOR_ZERO_INITIAL_GUESS) {
701:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702:       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703:       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704:       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705:       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706:       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707:       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708:       i2     = 6;
709:       idiag += 36;
710:       for (i=1; i<m; i++) {
711:         v     = aa + 36*ai[i];
712:         vi    = aj + ai[i];
713:         nz    = diag[i] - ai[i];
714:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715:         while (nz--) {
716:           idx  = 6*(*vi++);
717:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724:           v   += 36;
725:         }
726:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732:         idiag   += 36;
733:         i2      += 6;
734:       }
735:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736:       PetscLogFlops(36*(a->nz));
737:     }
738:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740:       i2    = 0;
741:       mdiag = a->idiag+36*a->mbs;
742:       for (i=0; i<m; i++) {
743:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744:         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745:         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746:         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747:         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748:         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749:         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750:         mdiag  += 36;
751:         i2     += 6;
752:       }
753:       PetscLogFlops(60*m);
754:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
756:     }
757:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758:       idiag   = a->idiag+36*a->mbs - 36;
759:       i2      = 6*m - 6;
760:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761:       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762:       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763:       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764:       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765:       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766:       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767:       idiag -= 36;
768:       i2    -= 6;
769:       for (i=m-2; i>=0; i--) {
770:         v     = aa + 36*(diag[i]+1);
771:         vi    = aj + diag[i] + 1;
772:         nz    = ai[i+1] - diag[i] - 1;
773:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774:         while (nz--) {
775:           idx  = 6*(*vi++);
776:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783:           v   += 36;
784:         }
785:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791:         idiag   -= 36;
792:         i2      -= 6;
793:       }
794:       PetscLogFlops(36*(a->nz));
795:     }
796:   } else {
797:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798:   }
799:   VecRestoreArray(xx,&x);
800:   VecRestoreArray(bb,(PetscScalar**)&b);
801:   return(0);
802: }

806: PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807: {
808:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
809:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
811:   const PetscScalar  *b;
812:   PetscErrorCode     ierr;
813:   PetscInt           m = a->mbs,i,i2,nz,idx;
814:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

817:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818:   its = its*lits;
819:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

825:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

827:   diag  = a->diag;
828:   idiag = a->idiag;
829:   VecGetArray(xx,&x);
830:   VecGetArray(bb,(PetscScalar**)&b);

832:   if (flag & SOR_ZERO_INITIAL_GUESS) {
833:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834:       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835:       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836:       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837:       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838:       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839:       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840:       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841:       i2     = 7;
842:       idiag += 49;
843:       for (i=1; i<m; i++) {
844:         v     = aa + 49*ai[i];
845:         vi    = aj + ai[i];
846:         nz    = diag[i] - ai[i];
847:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848:         while (nz--) {
849:           idx  = 7*(*vi++);
850:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858:           v   += 49;
859:         }
860:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867:         idiag   += 49;
868:         i2      += 7;
869:       }
870:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871:       PetscLogFlops(49*(a->nz));
872:     }
873:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875:       i2    = 0;
876:       mdiag = a->idiag+49*a->mbs;
877:       for (i=0; i<m; i++) {
878:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879:         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880:         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881:         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882:         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883:         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884:         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885:         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886:         mdiag  += 36;
887:         i2     += 6;
888:       }
889:       PetscLogFlops(93*m);
890:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
892:     }
893:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894:       idiag   = a->idiag+49*a->mbs - 49;
895:       i2      = 7*m - 7;
896:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897:       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898:       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899:       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900:       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901:       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902:       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903:       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904:       idiag -= 49;
905:       i2    -= 7;
906:       for (i=m-2; i>=0; i--) {
907:         v     = aa + 49*(diag[i]+1);
908:         vi    = aj + diag[i] + 1;
909:         nz    = ai[i+1] - diag[i] - 1;
910:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911:         while (nz--) {
912:           idx  = 7*(*vi++);
913:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921:           v   += 49;
922:         }
923:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930:         idiag   -= 49;
931:         i2      -= 7;
932:       }
933:       PetscLogFlops(49*(a->nz));
934:     }
935:   } else {
936:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937:   }
938:   VecRestoreArray(xx,&x);
939:   VecRestoreArray(bb,(PetscScalar**)&b);
940:   return(0);
941: }

943: /*
944:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
945: */
946: #if defined(PETSC_HAVE_FORTRAN_CAPS)
947: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949: #define matsetvaluesblocked4_ matsetvaluesblocked4
950: #endif

955: void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956: {
957:   Mat               A = *AA;
958:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
959:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960:   PetscInt          *ai=a->i,*ailen=a->ilen;
961:   PetscInt          *aj=a->j,stepval,lastcol = -1;
962:   const PetscScalar *value = v;
963:   MatScalar         *ap,*aa = a->a,*bap;

966:   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967:   stepval = (n-1)*4;
968:   for (k=0; k<m; k++) { /* loop over added rows */
969:     row  = im[k];
970:     rp   = aj + ai[row];
971:     ap   = aa + 16*ai[row];
972:     nrow = ailen[row];
973:     low  = 0;
974:     high = nrow;
975:     for (l=0; l<n; l++) { /* loop over added columns */
976:       col = in[l];
977:       if (col <= lastcol) low = 0; else high = nrow;
978:       lastcol = col;
979:       value = v + k*(stepval+4 + l)*4;
980:       while (high-low > 7) {
981:         t = (low+high)/2;
982:         if (rp[t] > col) high = t;
983:         else             low  = t;
984:       }
985:       for (i=low; i<high; i++) {
986:         if (rp[i] > col) break;
987:         if (rp[i] == col) {
988:           bap  = ap +  16*i;
989:           for (ii=0; ii<4; ii++,value+=stepval) {
990:             for (jj=ii; jj<16; jj+=4) {
991:               bap[jj] += *value++;
992:             }
993:           }
994:           goto noinsert2;
995:         }
996:       }
997:       N = nrow++ - 1;
998:       high++; /* added new column index thus must search to one higher than before */
999:       /* shift up all the later entries in this row */
1000:       for (ii=N; ii>=i; ii--) {
1001:         rp[ii+1] = rp[ii];
1002:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003:       }
1004:       if (N >= i) {
1005:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006:       }
1007:       rp[i] = col;
1008:       bap   = ap +  16*i;
1009:       for (ii=0; ii<4; ii++,value+=stepval) {
1010:         for (jj=ii; jj<16; jj+=4) {
1011:           bap[jj] = *value++;
1012:         }
1013:       }
1014:       noinsert2:;
1015:       low = i;
1016:     }
1017:     ailen[row] = nrow;
1018:   }
1019:   PetscFunctionReturnVoid();
1020: }

1023: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024: #define matsetvalues4_ MATSETVALUES4
1025: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026: #define matsetvalues4_ matsetvalues4
1027: #endif

1032: void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033: {
1034:   Mat         A = *AA;
1035:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037:   PetscInt    *ai=a->i,*ailen=a->ilen;
1038:   PetscInt    *aj=a->j,brow,bcol;
1039:   PetscInt    ridx,cidx,lastcol = -1;
1040:   MatScalar   *ap,value,*aa=a->a,*bap;
1041: 
1043:   for (k=0; k<m; k++) { /* loop over added rows */
1044:     row  = im[k]; brow = row/4;
1045:     rp   = aj + ai[brow];
1046:     ap   = aa + 16*ai[brow];
1047:     nrow = ailen[brow];
1048:     low  = 0;
1049:     high = nrow;
1050:     for (l=0; l<n; l++) { /* loop over added columns */
1051:       col = in[l]; bcol = col/4;
1052:       ridx = row % 4; cidx = col % 4;
1053:       value = v[l + k*n];
1054:       if (col <= lastcol) low = 0; else high = nrow;
1055:       lastcol = col;
1056:       while (high-low > 7) {
1057:         t = (low+high)/2;
1058:         if (rp[t] > bcol) high = t;
1059:         else              low  = t;
1060:       }
1061:       for (i=low; i<high; i++) {
1062:         if (rp[i] > bcol) break;
1063:         if (rp[i] == bcol) {
1064:           bap  = ap +  16*i + 4*cidx + ridx;
1065:           *bap += value;
1066:           goto noinsert1;
1067:         }
1068:       }
1069:       N = nrow++ - 1;
1070:       high++; /* added new column thus must search to one higher than before */
1071:       /* shift up all the later entries in this row */
1072:       for (ii=N; ii>=i; ii--) {
1073:         rp[ii+1] = rp[ii];
1074:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075:       }
1076:       if (N>=i) {
1077:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078:       }
1079:       rp[i]                    = bcol;
1080:       ap[16*i + 4*cidx + ridx] = value;
1081:       noinsert1:;
1082:       low = i;
1083:     }
1084:     ailen[brow] = nrow;
1085:   }
1086:   PetscFunctionReturnVoid();
1087: }

1090: /*
1091:      Checks for missing diagonals
1092: */
1095: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096: {
1097:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1099:   PetscInt       *diag,*jj = a->j,i;

1102:   MatMarkDiagonal_SeqBAIJ(A);
1103:   *missing = PETSC_FALSE;
1104:   diag     = a->diag;
1105:   for (i=0; i<a->mbs; i++) {
1106:     if (jj[diag[i]] != i) {
1107:       *missing  = PETSC_TRUE;
1108:       if (d) *d = i;
1109:     }
1110:   }
1111:   return(0);
1112: }

1116: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1117: {
1118:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1120:   PetscInt       i,j,m = a->mbs;

1123:   if (!a->diag) {
1124:     PetscMalloc(m*sizeof(PetscInt),&a->diag);
1125:   }
1126:   for (i=0; i<m; i++) {
1127:     a->diag[i] = a->i[i+1];
1128:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1129:       if (a->j[j] == i) {
1130:         a->diag[i] = j;
1131:         break;
1132:       }
1133:     }
1134:   }
1135:   return(0);
1136: }


1139: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);

1143: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1144: {
1145:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1147:   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1148:   PetscInt       *tia, *tja;

1151:   *nn = n;
1152:   if (!ia) return(0);
1153:   if (symmetric) {
1154:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1155:   } else {
1156:     tia = a->i; tja = a->j;
1157:   }
1158: 
1159:   if (!blockcompressed && bs > 1) {
1160:     (*nn) *= bs;
1161:     nbs    = bs;
1162:     /* malloc & create the natural set of indices */
1163:     PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1164:     if (n) {
1165:       (*ia)[0] = 0;
1166:       for (j=1; j<bs; j++) {
1167:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1168:       }
1169:     }

1171:     for (i=1; i<n; i++) {
1172:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1173:       for (j=1; j<bs; j++) {
1174:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1175:       }
1176:     }
1177:     if (n) {
1178:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1179:     }

1181:     if (ja) {
1182:       PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1183:       cnt = 0;
1184:       for (i=0; i<n; i++) {
1185:         for (j=0; j<bs; j++) {
1186:           for (k=tia[i]; k<tia[i+1]; k++) {
1187:             for (l=0; l<bs; l++) {
1188:               (*ja)[cnt++] = bs*tja[k] + l;
1189:             }
1190:           }
1191:         }
1192:       }
1193:     }

1195:     n     *= bs;
1196:     nz *= bs*bs;
1197:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198:       PetscFree(tia);
1199:       PetscFree(tja);
1200:     }
1201:   } else {
1202:     *ia = tia;
1203:     if (ja) *ja = tja;
1204:   }
1205:   if (oshift == 1) {
1206:     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1207:     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1208:   }
1209:   return(0);
1210: }

1214: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1215: {
1216:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1218:   PetscInt       i,n = a->mbs,nz = a->i[n];

1221:   if (!ia) return(0);
1222:   if (!blockcompressed && A->rmap->bs > 1) {
1223:     PetscFree(*ia);
1224:     if (ja) {PetscFree(*ja);}
1225:   } else if (symmetric) {
1226:     PetscFree(*ia);
1227:     if (ja) {PetscFree(*ja);}
1228:   } else if (oshift == 1) { /* blockcompressed */
1229:     for (i=0; i<n+1; i++) a->i[i]--;
1230:     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1231:   }
1232:   return(0);
1233: }

1237: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1238: {
1239:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1243: #if defined(PETSC_USE_LOG)
1244:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1245: #endif
1246:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1247:   if (a->row) {
1248:     ISDestroy(a->row);
1249:   }
1250:   if (a->col) {
1251:     ISDestroy(a->col);
1252:   }
1253:   PetscFree(a->diag);
1254:   PetscFree(a->idiag);
1255:   PetscFree2(a->imax,a->ilen);
1256:   PetscFree(a->solve_work);
1257:   PetscFree(a->mult_work);
1258:   if (a->icol) {ISDestroy(a->icol);}
1259:   PetscFree(a->saved_values);
1260:   PetscFree(a->xtoy);
1261:   if (a->compressedrow.use){PetscFree(a->compressedrow.i);}

1263:   if (a->sbaijMat) {MatDestroy(a->sbaijMat);}
1264:   PetscFree(a);

1266:   PetscObjectChangeTypeName((PetscObject)A,0);
1267:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
1268:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
1269:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
1270:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
1271:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
1272:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
1273:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
1274:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);
1275:   return(0);
1276: }

1280: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1281: {
1282:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1286:   switch (op) {
1287:   case MAT_ROW_ORIENTED:
1288:     a->roworiented    = flg;
1289:     break;
1290:   case MAT_KEEP_ZEROED_ROWS:
1291:     a->keepzeroedrows = flg;
1292:     break;
1293:   case MAT_NEW_NONZERO_LOCATIONS:
1294:     a->nonew          = (flg ? 0 : 1);
1295:     break;
1296:   case MAT_NEW_NONZERO_LOCATION_ERR:
1297:     a->nonew          = (flg ? -1 : 0);
1298:     break;
1299:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1300:     a->nonew          = (flg ? -2 : 0);
1301:     break;
1302:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1303:     a->nounused       = (flg ? -1 : 0);
1304:     break;
1305:   case MAT_NEW_DIAGONALS:
1306:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1307:   case MAT_USE_HASH_TABLE:
1308:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1309:     break;
1310:   case MAT_SYMMETRIC:
1311:   case MAT_STRUCTURALLY_SYMMETRIC:
1312:   case MAT_HERMITIAN:
1313:   case MAT_SYMMETRY_ETERNAL:
1314:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1315:     break;
1316:   default:
1317:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1318:   }
1319:   return(0);
1320: }

1324: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1325: {
1326:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1328:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1329:   MatScalar      *aa,*aa_i;
1330:   PetscScalar    *v_i;

1333:   bs  = A->rmap->bs;
1334:   ai  = a->i;
1335:   aj  = a->j;
1336:   aa  = a->a;
1337:   bs2 = a->bs2;
1338: 
1339:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1340: 
1341:   bn  = row/bs;   /* Block number */
1342:   bp  = row % bs; /* Block Position */
1343:   M   = ai[bn+1] - ai[bn];
1344:   *nz = bs*M;
1345: 
1346:   if (v) {
1347:     *v = 0;
1348:     if (*nz) {
1349:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
1350:       for (i=0; i<M; i++) { /* for each block in the block row */
1351:         v_i  = *v + i*bs;
1352:         aa_i = aa + bs2*(ai[bn] + i);
1353:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1354:       }
1355:     }
1356:   }

1358:   if (idx) {
1359:     *idx = 0;
1360:     if (*nz) {
1361:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
1362:       for (i=0; i<M; i++) { /* for each block in the block row */
1363:         idx_i = *idx + i*bs;
1364:         itmp  = bs*aj[ai[bn] + i];
1365:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1366:       }
1367:     }
1368:   }
1369:   return(0);
1370: }

1374: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1375: {

1379:   if (idx) {PetscFree(*idx);}
1380:   if (v)   {PetscFree(*v);}
1381:   return(0);
1382: }

1386: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1387: {
1388:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1389:   Mat            C;
1391:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1392:   PetscInt       *rows,*cols,bs2=a->bs2;
1393:   MatScalar      *array;

1396:   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1397:   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1398:     PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1399:     PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

1401:     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1402:     MatCreate(((PetscObject)A)->comm,&C);
1403:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1404:     MatSetType(C,((PetscObject)A)->type_name);
1405:     MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1406:     PetscFree(col);
1407:   } else {
1408:     C = *B;
1409:   }

1411:   array = a->a;
1412:   PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1413:   cols = rows + bs;
1414:   for (i=0; i<mbs; i++) {
1415:     cols[0] = i*bs;
1416:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1417:     len = ai[i+1] - ai[i];
1418:     for (j=0; j<len; j++) {
1419:       rows[0] = (*aj++)*bs;
1420:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1421:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1422:       array += bs2;
1423:     }
1424:   }
1425:   PetscFree(rows);
1426: 
1427:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1428:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1429: 
1430:   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1431:     *B = C;
1432:   } else {
1433:     MatHeaderCopy(A,C);
1434:   }
1435:   return(0);
1436: }

1440: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1441: {
1442:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1444:   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1445:   int            fd;
1446:   PetscScalar    *aa;
1447:   FILE           *file;

1450:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1451:   PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);
1452:   col_lens[0] = MAT_FILE_COOKIE;

1454:   col_lens[1] = A->rmap->N;
1455:   col_lens[2] = A->cmap->n;
1456:   col_lens[3] = a->nz*bs2;

1458:   /* store lengths of each row and write (including header) to file */
1459:   count = 0;
1460:   for (i=0; i<a->mbs; i++) {
1461:     for (j=0; j<bs; j++) {
1462:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1463:     }
1464:   }
1465:   PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1466:   PetscFree(col_lens);

1468:   /* store column indices (zero start index) */
1469:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1470:   count = 0;
1471:   for (i=0; i<a->mbs; i++) {
1472:     for (j=0; j<bs; j++) {
1473:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1474:         for (l=0; l<bs; l++) {
1475:           jj[count++] = bs*a->j[k] + l;
1476:         }
1477:       }
1478:     }
1479:   }
1480:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1481:   PetscFree(jj);

1483:   /* store nonzero values */
1484:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1485:   count = 0;
1486:   for (i=0; i<a->mbs; i++) {
1487:     for (j=0; j<bs; j++) {
1488:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1489:         for (l=0; l<bs; l++) {
1490:           aa[count++] = a->a[bs2*k + l*bs + j];
1491:         }
1492:       }
1493:     }
1494:   }
1495:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1496:   PetscFree(aa);

1498:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1499:   if (file) {
1500:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1501:   }
1502:   return(0);
1503: }

1507: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1508: {
1509:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1510:   PetscErrorCode    ierr;
1511:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1512:   PetscViewerFormat format;

1515:   PetscViewerGetFormat(viewer,&format);
1516:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1517:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1518:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1519:     Mat aij;
1520:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1521:     MatView(aij,viewer);
1522:     MatDestroy(aij);
1523:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1524:      return(0);
1525:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1526:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1527:     for (i=0; i<a->mbs; i++) {
1528:       for (j=0; j<bs; j++) {
1529:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1530:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1531:           for (l=0; l<bs; l++) {
1532: #if defined(PETSC_USE_COMPLEX)
1533:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1534:               PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1535:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1536:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1537:               PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1538:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1539:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1540:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1541:             }
1542: #else
1543:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1544:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1545:             }
1546: #endif
1547:           }
1548:         }
1549:         PetscViewerASCIIPrintf(viewer,"\n");
1550:       }
1551:     }
1552:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1553:   } else {
1554:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1555:     for (i=0; i<a->mbs; i++) {
1556:       for (j=0; j<bs; j++) {
1557:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1558:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1559:           for (l=0; l<bs; l++) {
1560: #if defined(PETSC_USE_COMPLEX)
1561:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1562:               PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1563:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1564:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1565:               PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1566:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1567:             } else {
1568:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1569:             }
1570: #else
1571:             PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1572: #endif
1573:           }
1574:         }
1575:         PetscViewerASCIIPrintf(viewer,"\n");
1576:       }
1577:     }
1578:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1579:   }
1580:   PetscViewerFlush(viewer);
1581:   return(0);
1582: }

1586: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1587: {
1588:   Mat            A = (Mat) Aa;
1589:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1591:   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1592:   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1593:   MatScalar      *aa;
1594:   PetscViewer    viewer;


1598:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1599:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

1601:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1603:   /* loop over matrix elements drawing boxes */
1604:   color = PETSC_DRAW_BLUE;
1605:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1606:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1607:       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1608:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1609:       aa = a->a + j*bs2;
1610:       for (k=0; k<bs; k++) {
1611:         for (l=0; l<bs; l++) {
1612:           if (PetscRealPart(*aa++) >=  0.) continue;
1613:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1614:         }
1615:       }
1616:     }
1617:   }
1618:   color = PETSC_DRAW_CYAN;
1619:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1620:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1621:       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1622:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1623:       aa = a->a + j*bs2;
1624:       for (k=0; k<bs; k++) {
1625:         for (l=0; l<bs; l++) {
1626:           if (PetscRealPart(*aa++) != 0.) continue;
1627:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1628:         }
1629:       }
1630:     }
1631:   }

1633:   color = PETSC_DRAW_RED;
1634:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1635:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1636:       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1637:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1638:       aa = a->a + j*bs2;
1639:       for (k=0; k<bs; k++) {
1640:         for (l=0; l<bs; l++) {
1641:           if (PetscRealPart(*aa++) <= 0.) continue;
1642:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1643:         }
1644:       }
1645:     }
1646:   }
1647:   return(0);
1648: }

1652: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1653: {
1655:   PetscReal      xl,yl,xr,yr,w,h;
1656:   PetscDraw      draw;
1657:   PetscTruth     isnull;


1661:   PetscViewerDrawGetDraw(viewer,0,&draw);
1662:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1664:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1665:   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1666:   xr += w;    yr += h;  xl = -w;     yl = -h;
1667:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1668:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1669:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1670:   return(0);
1671: }

1675: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1676: {
1678:   PetscTruth     iascii,isbinary,isdraw;

1681:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1682:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1683:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1684:   if (iascii){
1685:     MatView_SeqBAIJ_ASCII(A,viewer);
1686:   } else if (isbinary) {
1687:     MatView_SeqBAIJ_Binary(A,viewer);
1688:   } else if (isdraw) {
1689:     MatView_SeqBAIJ_Draw(A,viewer);
1690:   } else {
1691:     Mat B;
1692:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1693:     MatView(B,viewer);
1694:     MatDestroy(B);
1695:   }
1696:   return(0);
1697: }


1702: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1703: {
1704:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1705:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1706:   PetscInt    *ai = a->i,*ailen = a->ilen;
1707:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1708:   MatScalar   *ap,*aa = a->a;

1711:   for (k=0; k<m; k++) { /* loop over rows */
1712:     row  = im[k]; brow = row/bs;
1713:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1714:     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1715:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1716:     nrow = ailen[brow];
1717:     for (l=0; l<n; l++) { /* loop over columns */
1718:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1719:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1720:       col  = in[l] ;
1721:       bcol = col/bs;
1722:       cidx = col%bs;
1723:       ridx = row%bs;
1724:       high = nrow;
1725:       low  = 0; /* assume unsorted */
1726:       while (high-low > 5) {
1727:         t = (low+high)/2;
1728:         if (rp[t] > bcol) high = t;
1729:         else             low  = t;
1730:       }
1731:       for (i=low; i<high; i++) {
1732:         if (rp[i] > bcol) break;
1733:         if (rp[i] == bcol) {
1734:           *v++ = ap[bs2*i+bs*cidx+ridx];
1735:           goto finished;
1736:         }
1737:       }
1738:       *v++ = 0.0;
1739:       finished:;
1740:     }
1741:   }
1742:   return(0);
1743: }

1745: #define CHUNKSIZE 10
1748: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1749: {
1750:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1751:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1752:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1753:   PetscErrorCode    ierr;
1754:   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1755:   PetscTruth        roworiented=a->roworiented;
1756:   const PetscScalar *value = v;
1757:   MatScalar         *ap,*aa = a->a,*bap;

1760:   if (roworiented) {
1761:     stepval = (n-1)*bs;
1762:   } else {
1763:     stepval = (m-1)*bs;
1764:   }
1765:   for (k=0; k<m; k++) { /* loop over added rows */
1766:     row  = im[k];
1767:     if (row < 0) continue;
1768: #if defined(PETSC_USE_DEBUG)  
1769:     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1770: #endif
1771:     rp   = aj + ai[row];
1772:     ap   = aa + bs2*ai[row];
1773:     rmax = imax[row];
1774:     nrow = ailen[row];
1775:     low  = 0;
1776:     high = nrow;
1777:     for (l=0; l<n; l++) { /* loop over added columns */
1778:       if (in[l] < 0) continue;
1779: #if defined(PETSC_USE_DEBUG)  
1780:       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1781: #endif
1782:       col = in[l];
1783:       if (roworiented) {
1784:         value = v + k*(stepval+bs)*bs + l*bs;
1785:       } else {
1786:         value = v + l*(stepval+bs)*bs + k*bs;
1787:       }
1788:       if (col <= lastcol) low = 0; else high = nrow;
1789:       lastcol = col;
1790:       while (high-low > 7) {
1791:         t = (low+high)/2;
1792:         if (rp[t] > col) high = t;
1793:         else             low  = t;
1794:       }
1795:       for (i=low; i<high; i++) {
1796:         if (rp[i] > col) break;
1797:         if (rp[i] == col) {
1798:           bap  = ap +  bs2*i;
1799:           if (roworiented) {
1800:             if (is == ADD_VALUES) {
1801:               for (ii=0; ii<bs; ii++,value+=stepval) {
1802:                 for (jj=ii; jj<bs2; jj+=bs) {
1803:                   bap[jj] += *value++;
1804:                 }
1805:               }
1806:             } else {
1807:               for (ii=0; ii<bs; ii++,value+=stepval) {
1808:                 for (jj=ii; jj<bs2; jj+=bs) {
1809:                   bap[jj] = *value++;
1810:                 }
1811:               }
1812:             }
1813:           } else {
1814:             if (is == ADD_VALUES) {
1815:               for (ii=0; ii<bs; ii++,value+=stepval) {
1816:                 for (jj=0; jj<bs; jj++) {
1817:                   *bap++ += *value++;
1818:                 }
1819:               }
1820:             } else {
1821:               for (ii=0; ii<bs; ii++,value+=stepval) {
1822:                 for (jj=0; jj<bs; jj++) {
1823:                   *bap++  = *value++;
1824:                 }
1825:               }
1826:             }
1827:           }
1828:           goto noinsert2;
1829:         }
1830:       }
1831:       if (nonew == 1) goto noinsert2;
1832:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1833:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1834:       N = nrow++ - 1; high++;
1835:       /* shift up all the later entries in this row */
1836:       for (ii=N; ii>=i; ii--) {
1837:         rp[ii+1] = rp[ii];
1838:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1839:       }
1840:       if (N >= i) {
1841:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1842:       }
1843:       rp[i] = col;
1844:       bap   = ap +  bs2*i;
1845:       if (roworiented) {
1846:         for (ii=0; ii<bs; ii++,value+=stepval) {
1847:           for (jj=ii; jj<bs2; jj+=bs) {
1848:             bap[jj] = *value++;
1849:           }
1850:         }
1851:       } else {
1852:         for (ii=0; ii<bs; ii++,value+=stepval) {
1853:           for (jj=0; jj<bs; jj++) {
1854:             *bap++  = *value++;
1855:           }
1856:         }
1857:       }
1858:       noinsert2:;
1859:       low = i;
1860:     }
1861:     ailen[row] = nrow;
1862:   }
1863:   return(0);
1864: }

1868: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1869: {
1870:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1871:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1872:   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1874:   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1875:   MatScalar      *aa = a->a,*ap;
1876:   PetscReal      ratio=0.6;

1879:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1881:   if (m) rmax = ailen[0];
1882:   for (i=1; i<mbs; i++) {
1883:     /* move each row back by the amount of empty slots (fshift) before it*/
1884:     fshift += imax[i-1] - ailen[i-1];
1885:     rmax   = PetscMax(rmax,ailen[i]);
1886:     if (fshift) {
1887:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1888:       N = ailen[i];
1889:       for (j=0; j<N; j++) {
1890:         ip[j-fshift] = ip[j];
1891:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1892:       }
1893:     }
1894:     ai[i] = ai[i-1] + ailen[i-1];
1895:   }
1896:   if (mbs) {
1897:     fshift += imax[mbs-1] - ailen[mbs-1];
1898:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1899:   }
1900:   /* reset ilen and imax for each row */
1901:   for (i=0; i<mbs; i++) {
1902:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1903:   }
1904:   a->nz = ai[mbs];

1906:   /* diagonals may have moved, so kill the diagonal pointers */
1907:   a->idiagvalid = PETSC_FALSE;
1908:   if (fshift && a->diag) {
1909:     PetscFree(a->diag);
1910:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1911:     a->diag = 0;
1912:   }
1913:   if (fshift && a->nounused == -1) {
1914:     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1915:   }
1916:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1917:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1918:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1919:   a->reallocs          = 0;
1920:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

1922:   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1923:   if (a->compressedrow.use){
1924:     Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1925:   }

1927:   A->same_nonzero = PETSC_TRUE;
1928:   return(0);
1929: }

1931: /* 
1932:    This function returns an array of flags which indicate the locations of contiguous
1933:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1934:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1935:    Assume: sizes should be long enough to hold all the values.
1936: */
1939: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1940: {
1941:   PetscInt   i,j,k,row;
1942:   PetscTruth flg;

1945:   for (i=0,j=0; i<n; j++) {
1946:     row = idx[i];
1947:     if (row%bs!=0) { /* Not the begining of a block */
1948:       sizes[j] = 1;
1949:       i++;
1950:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1951:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1952:       i++;
1953:     } else { /* Begining of the block, so check if the complete block exists */
1954:       flg = PETSC_TRUE;
1955:       for (k=1; k<bs; k++) {
1956:         if (row+k != idx[i+k]) { /* break in the block */
1957:           flg = PETSC_FALSE;
1958:           break;
1959:         }
1960:       }
1961:       if (flg) { /* No break in the bs */
1962:         sizes[j] = bs;
1963:         i+= bs;
1964:       } else {
1965:         sizes[j] = 1;
1966:         i++;
1967:       }
1968:     }
1969:   }
1970:   *bs_max = j;
1971:   return(0);
1972: }
1973: 
1976: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1977: {
1978:   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1980:   PetscInt       i,j,k,count,*rows;
1981:   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1982:   PetscScalar    zero = 0.0;
1983:   MatScalar      *aa;

1986:   /* Make a copy of the IS and  sort it */
1987:   /* allocate memory for rows,sizes */
1988:   PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
1989:   sizes = rows + is_n;

1991:   /* copy IS values to rows, and sort them */
1992:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1993:   PetscSortInt(is_n,rows);
1994:   if (baij->keepzeroedrows) {
1995:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1996:     bs_max = is_n;
1997:     A->same_nonzero = PETSC_TRUE;
1998:   } else {
1999:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2000:     A->same_nonzero = PETSC_FALSE;
2001:   }

2003:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2004:     row   = rows[j];
2005:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2006:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2007:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2008:     if (sizes[i] == bs && !baij->keepzeroedrows) {
2009:       if (diag != 0.0) {
2010:         if (baij->ilen[row/bs] > 0) {
2011:           baij->ilen[row/bs]       = 1;
2012:           baij->j[baij->i[row/bs]] = row/bs;
2013:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
2014:         }
2015:         /* Now insert all the diagonal values for this bs */
2016:         for (k=0; k<bs; k++) {
2017:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2018:         }
2019:       } else { /* (diag == 0.0) */
2020:         baij->ilen[row/bs] = 0;
2021:       } /* end (diag == 0.0) */
2022:     } else { /* (sizes[i] != bs) */
2023: #if defined (PETSC_USE_DEBUG)
2024:       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2025: #endif
2026:       for (k=0; k<count; k++) {
2027:         aa[0] =  zero;
2028:         aa    += bs;
2029:       }
2030:       if (diag != 0.0) {
2031:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2032:       }
2033:     }
2034:   }

2036:   PetscFree(rows);
2037:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2038:   return(0);
2039: }

2043: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2044: {
2045:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2046:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2047:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2048:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2050:   PetscInt       ridx,cidx,bs2=a->bs2;
2051:   PetscTruth     roworiented=a->roworiented;
2052:   MatScalar      *ap,value,*aa=a->a,*bap;

2055:   for (k=0; k<m; k++) { /* loop over added rows */
2056:     row  = im[k];
2057:     brow = row/bs;
2058:     if (row < 0) continue;
2059: #if defined(PETSC_USE_DEBUG)  
2060:     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2061: #endif
2062:     rp   = aj + ai[brow];
2063:     ap   = aa + bs2*ai[brow];
2064:     rmax = imax[brow];
2065:     nrow = ailen[brow];
2066:     low  = 0;
2067:     high = nrow;
2068:     for (l=0; l<n; l++) { /* loop over added columns */
2069:       if (in[l] < 0) continue;
2070: #if defined(PETSC_USE_DEBUG)  
2071:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2072: #endif
2073:       col = in[l]; bcol = col/bs;
2074:       ridx = row % bs; cidx = col % bs;
2075:       if (roworiented) {
2076:         value = v[l + k*n];
2077:       } else {
2078:         value = v[k + l*m];
2079:       }
2080:       if (col <= lastcol) low = 0; else high = nrow;
2081:       lastcol = col;
2082:       while (high-low > 7) {
2083:         t = (low+high)/2;
2084:         if (rp[t] > bcol) high = t;
2085:         else              low  = t;
2086:       }
2087:       for (i=low; i<high; i++) {
2088:         if (rp[i] > bcol) break;
2089:         if (rp[i] == bcol) {
2090:           bap  = ap +  bs2*i + bs*cidx + ridx;
2091:           if (is == ADD_VALUES) *bap += value;
2092:           else                  *bap  = value;
2093:           goto noinsert1;
2094:         }
2095:       }
2096:       if (nonew == 1) goto noinsert1;
2097:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2098:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2099:       N = nrow++ - 1; high++;
2100:       /* shift up all the later entries in this row */
2101:       for (ii=N; ii>=i; ii--) {
2102:         rp[ii+1] = rp[ii];
2103:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2104:       }
2105:       if (N>=i) {
2106:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2107:       }
2108:       rp[i]                      = bcol;
2109:       ap[bs2*i + bs*cidx + ridx] = value;
2110:       a->nz++;
2111:       noinsert1:;
2112:       low = i;
2113:     }
2114:     ailen[brow] = nrow;
2115:   }
2116:   A->same_nonzero = PETSC_FALSE;
2117:   return(0);
2118: }

2120: EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);

2124: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2125: {
2126:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2127:   Mat            outA;
2129:   PetscTruth     row_identity,col_identity;

2132:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2133:   ISIdentity(row,&row_identity);
2134:   ISIdentity(col,&col_identity);
2135:   if (!row_identity || !col_identity) {
2136:     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2137:   }

2139:   outA          = inA;
2140:   inA->factor   = MAT_FACTOR_LU;

2142:   MatMarkDiagonal_SeqBAIJ(inA);

2144:   PetscObjectReference((PetscObject)row);
2145:   if (a->row) { ISDestroy(a->row); }
2146:   a->row = row;
2147:   PetscObjectReference((PetscObject)col);
2148:   if (a->col) { ISDestroy(a->col); }
2149:   a->col = col;
2150: 
2151:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2152:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2153:   PetscLogObjectParent(inA,a->icol);
2154: 
2155:   MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));
2156:   if (!a->solve_work) {
2157:     PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);
2158:     PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2159:   }
2160:   MatLUFactorNumeric(outA,inA,info);

2162:   return(0);
2163: }

2168: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2169: {
2170:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2171:   PetscInt    i,nz,nbs;

2174:   nz  = baij->maxnz/baij->bs2;
2175:   nbs = baij->nbs;
2176:   for (i=0; i<nz; i++) {
2177:     baij->j[i] = indices[i];
2178:   }
2179:   baij->nz = nz;
2180:   for (i=0; i<nbs; i++) {
2181:     baij->ilen[i] = baij->imax[i];
2182:   }

2184:   return(0);
2185: }

2190: /*@
2191:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2192:        in the matrix.

2194:   Input Parameters:
2195: +  mat - the SeqBAIJ matrix
2196: -  indices - the column indices

2198:   Level: advanced

2200:   Notes:
2201:     This can be called if you have precomputed the nonzero structure of the 
2202:   matrix and want to provide it to the matrix object to improve the performance
2203:   of the MatSetValues() operation.

2205:     You MUST have set the correct numbers of nonzeros per row in the call to 
2206:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2208:     MUST be called before any calls to MatSetValues();

2210: @*/
2211: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2212: {
2213:   PetscErrorCode ierr,(*f)(Mat,PetscInt *);

2218:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
2219:   if (f) {
2220:     (*f)(mat,indices);
2221:   } else {
2222:     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2223:   }
2224:   return(0);
2225: }

2229: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2230: {
2231:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2233:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2234:   PetscReal      atmp;
2235:   PetscScalar    *x,zero = 0.0;
2236:   MatScalar      *aa;
2237:   PetscInt       ncols,brow,krow,kcol;

2240:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2241:   bs   = A->rmap->bs;
2242:   aa   = a->a;
2243:   ai   = a->i;
2244:   aj   = a->j;
2245:   mbs  = a->mbs;

2247:   VecSet(v,zero);
2248:   VecGetArray(v,&x);
2249:   VecGetLocalSize(v,&n);
2250:   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2251:   for (i=0; i<mbs; i++) {
2252:     ncols = ai[1] - ai[0]; ai++;
2253:     brow  = bs*i;
2254:     for (j=0; j<ncols; j++){
2255:       for (kcol=0; kcol<bs; kcol++){
2256:         for (krow=0; krow<bs; krow++){
2257:           atmp = PetscAbsScalar(*aa);aa++;
2258:           row = brow + krow;    /* row index */
2259:           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2260:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2261:         }
2262:       }
2263:       aj++;
2264:     }
2265:   }
2266:   VecRestoreArray(v,&x);
2267:   return(0);
2268: }

2272: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2273: {

2277:   /* If the two matrices have the same copy implementation, use fast copy. */
2278:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2279:     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2280:     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;

2282:     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2283:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2284:     }
2285:     PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));
2286:   } else {
2287:     MatCopy_Basic(A,B,str);
2288:   }
2289:   return(0);
2290: }

2294: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2295: {

2299:    MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);
2300:   return(0);
2301: }

2305: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2306: {
2307:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2309:   *array = a->a;
2310:   return(0);
2311: }

2315: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2316: {
2318:   return(0);
2319: }

2321:  #include petscblaslapack.h
2324: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2325: {
2326:   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2328:   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2329:   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);

2332:   if (str == SAME_NONZERO_PATTERN) {
2333:     PetscScalar alpha = a;
2334:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2335:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2336:     if (y->xtoy && y->XtoY != X) {
2337:       PetscFree(y->xtoy);
2338:       MatDestroy(y->XtoY);
2339:     }
2340:     if (!y->xtoy) { /* get xtoy */
2341:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2342:       y->XtoY = X;
2343:       PetscObjectReference((PetscObject)X);
2344:     }
2345:     bs2 = bs*bs;
2346:     for (i=0; i<x->nz; i++) {
2347:       j = 0;
2348:       while (j < bs2){
2349:         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2350:         j++;
2351:       }
2352:     }
2353:     PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2354:   } else {
2355:     MatAXPY_Basic(Y,a,X,str);
2356:   }
2357:   return(0);
2358: }

2362: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2363: {
2364:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2365:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2366:   MatScalar      *aa = a->a;

2369:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2370:   return(0);
2371: }

2375: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2376: {
2377:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2378:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2379:   MatScalar      *aa = a->a;

2382:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2383:   return(0);
2384: }


2387: /* -------------------------------------------------------------------*/
2388: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2389:        MatGetRow_SeqBAIJ,
2390:        MatRestoreRow_SeqBAIJ,
2391:        MatMult_SeqBAIJ_N,
2392: /* 4*/ MatMultAdd_SeqBAIJ_N,
2393:        MatMultTranspose_SeqBAIJ,
2394:        MatMultTransposeAdd_SeqBAIJ,
2395:        0,
2396:        0,
2397:        0,
2398: /*10*/ 0,
2399:        MatLUFactor_SeqBAIJ,
2400:        0,
2401:        0,
2402:        MatTranspose_SeqBAIJ,
2403: /*15*/ MatGetInfo_SeqBAIJ,
2404:        MatEqual_SeqBAIJ,
2405:        MatGetDiagonal_SeqBAIJ,
2406:        MatDiagonalScale_SeqBAIJ,
2407:        MatNorm_SeqBAIJ,
2408: /*20*/ 0,
2409:        MatAssemblyEnd_SeqBAIJ,
2410:        0,
2411:        MatSetOption_SeqBAIJ,
2412:        MatZeroEntries_SeqBAIJ,
2413: /*25*/ MatZeroRows_SeqBAIJ,
2414:        0,
2415:        0,
2416:        0,
2417:        0,
2418: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2419:        0,
2420:        0,
2421:        MatGetArray_SeqBAIJ,
2422:        MatRestoreArray_SeqBAIJ,
2423: /*35*/ MatDuplicate_SeqBAIJ,
2424:        0,
2425:        0,
2426:        MatILUFactor_SeqBAIJ,
2427:        0,
2428: /*40*/ MatAXPY_SeqBAIJ,
2429:        MatGetSubMatrices_SeqBAIJ,
2430:        MatIncreaseOverlap_SeqBAIJ,
2431:        MatGetValues_SeqBAIJ,
2432:        MatCopy_SeqBAIJ,
2433: /*45*/ 0,
2434:        MatScale_SeqBAIJ,
2435:        0,
2436:        0,
2437:        0,
2438: /*50*/ 0,
2439:        MatGetRowIJ_SeqBAIJ,
2440:        MatRestoreRowIJ_SeqBAIJ,
2441:        0,
2442:        0,
2443: /*55*/ 0,
2444:        0,
2445:        0,
2446:        0,
2447:        MatSetValuesBlocked_SeqBAIJ,
2448: /*60*/ MatGetSubMatrix_SeqBAIJ,
2449:        MatDestroy_SeqBAIJ,
2450:        MatView_SeqBAIJ,
2451:        0,
2452:        0,
2453: /*65*/ 0,
2454:        0,
2455:        0,
2456:        0,
2457:        0,
2458: /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2459:        0,
2460:        MatConvert_Basic,
2461:        0,
2462:        0,
2463: /*75*/ 0,
2464:        0,
2465:        0,
2466:        0,
2467:        0,
2468: /*80*/ 0,
2469:        0,
2470:        0,
2471:        0,
2472:        MatLoad_SeqBAIJ,
2473: /*85*/ 0,
2474:        0,
2475:        0,
2476:        0,
2477:        0,
2478: /*90*/ 0,
2479:        0,
2480:        0,
2481:        0,
2482:        0,
2483: /*95*/ 0,
2484:        0,
2485:        0,
2486:        0,
2487:        0,
2488: /*100*/0,
2489:        0,
2490:        0,
2491:        0,
2492:        0,
2493: /*105*/0,
2494:        MatRealPart_SeqBAIJ,
2495:        MatImaginaryPart_SeqBAIJ,
2496:        0,
2497:        0,
2498: /*110*/0,
2499:        0,
2500:        0,
2501:        0,
2502:        MatMissingDiagonal_SeqBAIJ
2503: /*115*/
2504: };

2509: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2510: {
2511:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2512:   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;

2516:   if (aij->nonew != 1) {
2517:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2518:   }

2520:   /* allocate space for values if not already there */
2521:   if (!aij->saved_values) {
2522:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2523:   }

2525:   /* copy values over */
2526:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2527:   return(0);
2528: }

2534: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2535: {
2536:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2538:   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;

2541:   if (aij->nonew != 1) {
2542:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2543:   }
2544:   if (!aij->saved_values) {
2545:     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2546:   }

2548:   /* copy values over */
2549:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2550:   return(0);
2551: }


2562: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2563: {
2564:   Mat_SeqBAIJ    *b;
2566:   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2567:   PetscTruth     flg,skipallocation = PETSC_FALSE;


2571:   if (nz == MAT_SKIP_ALLOCATION) {
2572:     skipallocation = PETSC_TRUE;
2573:     nz             = 0;
2574:   }

2576:   if (bs < 0) {
2577:     PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");
2578:       PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);
2579:     PetscOptionsEnd();
2580:     bs   = PetscAbs(bs);
2581:   }
2582:   if (nnz && newbs != bs) {
2583:     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2584:   }
2585:   bs   = newbs;

2587:   PetscMapSetBlockSize(B->rmap,bs);
2588:   PetscMapSetBlockSize(B->cmap,bs);
2589:   PetscMapSetUp(B->rmap);
2590:   PetscMapSetUp(B->cmap);

2592:   B->preallocated = PETSC_TRUE;

2594:   mbs  = B->rmap->n/bs;
2595:   nbs  = B->cmap->n/bs;
2596:   bs2  = bs*bs;

2598:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2599:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2600:   }

2602:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2603:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2604:   if (nnz) {
2605:     for (i=0; i<mbs; i++) {
2606:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2607:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2608:     }
2609:   }

2611:   b       = (Mat_SeqBAIJ*)B->data;
2612:   PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2613:     PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2614:   PetscOptionsEnd();

2616:   if (!flg) {
2617:     switch (bs) {
2618:     case 1:
2619:       B->ops->mult            = MatMult_SeqBAIJ_1;
2620:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2621:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2622:       break;
2623:     case 2:
2624:       B->ops->mult            = MatMult_SeqBAIJ_2;
2625:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2626:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2627:       break;
2628:     case 3:
2629:       B->ops->mult            = MatMult_SeqBAIJ_3;
2630:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2631:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2632:       break;
2633:     case 4:
2634:       B->ops->mult            = MatMult_SeqBAIJ_4;
2635:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2636:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2637:       break;
2638:     case 5:
2639:       B->ops->mult            = MatMult_SeqBAIJ_5;
2640:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2641:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2642:       break;
2643:     case 6:
2644:       B->ops->mult            = MatMult_SeqBAIJ_6;
2645:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2646:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2647:       break;
2648:     case 7:
2649:       B->ops->mult            = MatMult_SeqBAIJ_7;
2650:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2651:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2652:       break;
2653:     default:
2654:       B->ops->mult            = MatMult_SeqBAIJ_N;
2655:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2656:       break;
2657:     }
2658:   }
2659:   B->rmap->bs      = bs;
2660:   b->mbs     = mbs;
2661:   b->nbs     = nbs;
2662:   if (!skipallocation) {
2663:     if (!b->imax) {
2664:       PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2665:     }
2666:     /* b->ilen will count nonzeros in each block row so far. */
2667:     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2668:     if (!nnz) {
2669:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2670:       else if (nz <= 0)        nz = 1;
2671:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2672:       nz = nz*mbs;
2673:     } else {
2674:       nz = 0;
2675:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2676:     }

2678:     /* allocate the matrix space */
2679:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2680:     PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);
2681:     PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2682:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2683:     PetscMemzero(b->j,nz*sizeof(PetscInt));
2684:     b->singlemalloc = PETSC_TRUE;
2685:     b->i[0] = 0;
2686:     for (i=1; i<mbs+1; i++) {
2687:       b->i[i] = b->i[i-1] + b->imax[i-1];
2688:     }
2689:     b->free_a     = PETSC_TRUE;
2690:     b->free_ij    = PETSC_TRUE;
2691:   } else {
2692:     b->free_a     = PETSC_FALSE;
2693:     b->free_ij    = PETSC_FALSE;
2694:   }

2696:   B->rmap->bs          = bs;
2697:   b->bs2              = bs2;
2698:   b->mbs              = mbs;
2699:   b->nz               = 0;
2700:   b->maxnz            = nz*bs2;
2701:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2702:   return(0);
2703: }

2709: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2710: {
2711:   PetscInt       i,m,nz,nz_max=0,*nnz;
2712:   PetscScalar    *values=0;


2717:   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);

2719:   PetscMapSetBlockSize(B->rmap,bs);
2720:   PetscMapSetBlockSize(B->cmap,bs);
2721:   PetscMapSetUp(B->rmap);
2722:   PetscMapSetUp(B->cmap);
2723:   m = B->rmap->n/bs;

2725:   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2726:   PetscMalloc((m+1) * sizeof(PetscInt), &nnz);
2727:   for(i=0; i<m; i++) {
2728:     nz = ii[i+1]- ii[i];
2729:     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2730:     nz_max = PetscMax(nz_max, nz);
2731:     nnz[i] = nz;
2732:   }
2733:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2734:   PetscFree(nnz);

2736:   values = (PetscScalar*)V;
2737:   if (!values) {
2738:     PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);
2739:     PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
2740:   }
2741:   for (i=0; i<m; i++) {
2742:     PetscInt          ncols  = ii[i+1] - ii[i];
2743:     const PetscInt    *icols = jj + ii[i];
2744:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2745:     MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
2746:   }
2747:   if (!V) { PetscFree(values); }
2748:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2749:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2751:   return(0);
2752: }



2761: /*MC
2762:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
2763:    block sparse compressed row format.

2765:    Options Database Keys:
2766: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

2768:   Level: beginner

2770: .seealso: MatCreateSeqBAIJ()
2771: M*/


2777: PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
2778: {
2780:   PetscMPIInt    size;
2781:   Mat_SeqBAIJ    *b;

2784:   MPI_Comm_size(((PetscObject)B)->comm,&size);
2785:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

2787:   PetscNewLog(B,Mat_SeqBAIJ,&b);
2788:   B->data = (void*)b;
2789:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2790:   B->mapping          = 0;
2791:   b->row              = 0;
2792:   b->col              = 0;
2793:   b->icol             = 0;
2794:   b->reallocs         = 0;
2795:   b->saved_values     = 0;

2797:   b->roworiented      = PETSC_TRUE;
2798:   b->nonew            = 0;
2799:   b->diag             = 0;
2800:   b->solve_work       = 0;
2801:   b->mult_work        = 0;
2802:   B->spptr            = 0;
2803:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2804:   b->keepzeroedrows   = PETSC_FALSE;
2805:   b->xtoy              = 0;
2806:   b->XtoY              = 0;
2807:   b->compressedrow.use     = PETSC_FALSE;
2808:   b->compressedrow.nrows   = 0;
2809:   b->compressedrow.i       = PETSC_NULL;
2810:   b->compressedrow.rindex  = PETSC_NULL;
2811:   b->compressedrow.checked = PETSC_FALSE;
2812:   B->same_nonzero          = PETSC_FALSE;

2814:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2815:                                      "MatGetFactorAvailable_seqbaij_petsc",
2816:                                      MatGetFactorAvailable_seqbaij_petsc);
2817:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2818:                                      "MatGetFactor_seqbaij_petsc",
2819:                                      MatGetFactor_seqbaij_petsc);
2820:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2821:                                      "MatInvertBlockDiagonal_SeqBAIJ",
2822:                                       MatInvertBlockDiagonal_SeqBAIJ);
2823:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2824:                                      "MatStoreValues_SeqBAIJ",
2825:                                       MatStoreValues_SeqBAIJ);
2826:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2827:                                      "MatRetrieveValues_SeqBAIJ",
2828:                                       MatRetrieveValues_SeqBAIJ);
2829:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2830:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2831:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
2832:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2833:                                      "MatConvert_SeqBAIJ_SeqAIJ",
2834:                                       MatConvert_SeqBAIJ_SeqAIJ);
2835:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2836:                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2837:                                       MatConvert_SeqBAIJ_SeqSBAIJ);
2838:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2839:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2840:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
2841:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2842:                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2843:                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
2844:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
2845:   return(0);
2846: }

2851: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
2852: {
2853:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2855:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

2858:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

2860:   PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
2861:   for (i=0; i<mbs; i++) {
2862:     c->imax[i] = a->imax[i];
2863:     c->ilen[i] = a->ilen[i];
2864:   }

2866:   /* allocate the matrix space */
2867:   PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
2868:   c->singlemalloc = PETSC_TRUE;
2869:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2870:   if (mbs > 0) {
2871:     PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2872:     if (cpvalues == MAT_COPY_VALUES) {
2873:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2874:     } else {
2875:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2876:     }
2877:   }
2878:   c->roworiented = a->roworiented;
2879:   c->nonew       = a->nonew;
2880:   PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);
2881:   PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);
2882:   c->bs2         = a->bs2;
2883:   c->mbs         = a->mbs;
2884:   c->nbs         = a->nbs;

2886:   if (a->diag) {
2887:     PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2888:     PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2889:     for (i=0; i<mbs; i++) {
2890:       c->diag[i] = a->diag[i];
2891:     }
2892:   } else c->diag        = 0;
2893:   c->nz                 = a->nz;
2894:   c->maxnz              = a->maxnz;
2895:   c->solve_work         = 0;
2896:   c->mult_work          = 0;
2897:   c->free_a             = PETSC_TRUE;
2898:   c->free_ij            = PETSC_TRUE;
2899:   C->preallocated       = PETSC_TRUE;
2900:   C->assembled          = PETSC_TRUE;

2902:   c->compressedrow.use     = a->compressedrow.use;
2903:   c->compressedrow.nrows   = a->compressedrow.nrows;
2904:   c->compressedrow.checked = a->compressedrow.checked;
2905:   if ( a->compressedrow.checked && a->compressedrow.use){
2906:     i = a->compressedrow.nrows;
2907:     PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);
2908:     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2909:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
2910:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
2911:   } else {
2912:     c->compressedrow.use    = PETSC_FALSE;
2913:     c->compressedrow.i      = PETSC_NULL;
2914:     c->compressedrow.rindex = PETSC_NULL;
2915:   }
2916:   C->same_nonzero = A->same_nonzero;
2917:   PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
2918:   return(0);
2919: }

2923: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2924: {

2928:   MatCreate(((PetscObject)A)->comm,B);
2929:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
2930:   MatSetType(*B,MATSEQBAIJ);
2931:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
2932:   return(0);
2933: }

2937: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2938: {
2939:   Mat_SeqBAIJ    *a;
2940:   Mat            B;
2942:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2943:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2944:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2945:   PetscInt       *masked,nmask,tmp,bs2,ishift;
2946:   PetscMPIInt    size;
2947:   int            fd;
2948:   PetscScalar    *aa;
2949:   MPI_Comm       comm = ((PetscObject)viewer)->comm;

2952:   PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
2953:     PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2954:   PetscOptionsEnd();
2955:   bs2  = bs*bs;

2957:   MPI_Comm_size(comm,&size);
2958:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2959:   PetscViewerBinaryGetDescriptor(viewer,&fd);
2960:   PetscBinaryRead(fd,header,4,PETSC_INT);
2961:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2962:   M = header[1]; N = header[2]; nz = header[3];

2964:   if (header[3] < 0) {
2965:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2966:   }

2968:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

2970:   /* 
2971:      This code adds extra rows to make sure the number of rows is 
2972:     divisible by the blocksize
2973:   */
2974:   mbs        = M/bs;
2975:   extra_rows = bs - M + bs*(mbs);
2976:   if (extra_rows == bs) extra_rows = 0;
2977:   else                  mbs++;
2978:   if (extra_rows) {
2979:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2980:   }

2982:   /* read in row lengths */
2983:   PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2984:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2985:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

2987:   /* read in column indices */
2988:   PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2989:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
2990:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

2992:   /* loop over row lengths determining block row lengths */
2993:   PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2994:   PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2995:   PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
2996:   PetscMemzero(mask,mbs*sizeof(PetscInt));
2997:   masked   = mask + mbs;
2998:   rowcount = 0; nzcount = 0;
2999:   for (i=0; i<mbs; i++) {
3000:     nmask = 0;
3001:     for (j=0; j<bs; j++) {
3002:       kmax = rowlengths[rowcount];
3003:       for (k=0; k<kmax; k++) {
3004:         tmp = jj[nzcount++]/bs;
3005:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3006:       }
3007:       rowcount++;
3008:     }
3009:     browlengths[i] += nmask;
3010:     /* zero out the mask elements we set */
3011:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3012:   }

3014:   /* create our matrix */
3015:   MatCreate(comm,&B);
3016:   MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3017:   MatSetType(B,type);
3018:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
3019:   a = (Mat_SeqBAIJ*)B->data;

3021:   /* set matrix "i" values */
3022:   a->i[0] = 0;
3023:   for (i=1; i<= mbs; i++) {
3024:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3025:     a->ilen[i-1] = browlengths[i-1];
3026:   }
3027:   a->nz         = 0;
3028:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3030:   /* read in nonzero values */
3031:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3032:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3033:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3035:   /* set "a" and "j" values into matrix */
3036:   nzcount = 0; jcount = 0;
3037:   for (i=0; i<mbs; i++) {
3038:     nzcountb = nzcount;
3039:     nmask    = 0;
3040:     for (j=0; j<bs; j++) {
3041:       kmax = rowlengths[i*bs+j];
3042:       for (k=0; k<kmax; k++) {
3043:         tmp = jj[nzcount++]/bs;
3044:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3045:       }
3046:     }
3047:     /* sort the masked values */
3048:     PetscSortInt(nmask,masked);

3050:     /* set "j" values into matrix */
3051:     maskcount = 1;
3052:     for (j=0; j<nmask; j++) {
3053:       a->j[jcount++]  = masked[j];
3054:       mask[masked[j]] = maskcount++;
3055:     }
3056:     /* set "a" values into matrix */
3057:     ishift = bs2*a->i[i];
3058:     for (j=0; j<bs; j++) {
3059:       kmax = rowlengths[i*bs+j];
3060:       for (k=0; k<kmax; k++) {
3061:         tmp       = jj[nzcountb]/bs ;
3062:         block     = mask[tmp] - 1;
3063:         point     = jj[nzcountb] - bs*tmp;
3064:         idx       = ishift + bs2*block + j + bs*point;
3065:         a->a[idx] = (MatScalar)aa[nzcountb++];
3066:       }
3067:     }
3068:     /* zero out the mask elements we set */
3069:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3070:   }
3071:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3073:   PetscFree(rowlengths);
3074:   PetscFree(browlengths);
3075:   PetscFree(aa);
3076:   PetscFree(jj);
3077:   PetscFree(mask);

3079:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3080:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3081:   MatView_Private(B);

3083:   *A = B;
3084:   return(0);
3085: }

3089: /*@C
3090:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3091:    compressed row) format.  For good matrix assembly performance the
3092:    user should preallocate the matrix storage by setting the parameter nz
3093:    (or the array nnz).  By setting these parameters accurately, performance
3094:    during matrix assembly can be increased by more than a factor of 50.

3096:    Collective on MPI_Comm

3098:    Input Parameters:
3099: +  comm - MPI communicator, set to PETSC_COMM_SELF
3100: .  bs - size of block
3101: .  m - number of rows
3102: .  n - number of columns
3103: .  nz - number of nonzero blocks  per block row (same for all rows)
3104: -  nnz - array containing the number of nonzero blocks in the various block rows 
3105:          (possibly different for each block row) or PETSC_NULL

3107:    Output Parameter:
3108: .  A - the matrix 

3110:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3111:    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3112:    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3113:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3115:    Options Database Keys:
3116: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3117:                      block calculations (much slower)
3118: .    -mat_block_size - size of the blocks to use

3120:    Level: intermediate

3122:    Notes:
3123:    The number of rows and columns must be divisible by blocksize.

3125:    If the nnz parameter is given then the nz parameter is ignored

3127:    A nonzero block is any block that as 1 or more nonzeros in it

3129:    The block AIJ format is fully compatible with standard Fortran 77
3130:    storage.  That is, the stored row and column indices can begin at
3131:    either one (as in Fortran) or zero.  See the users' manual for details.

3133:    Specify the preallocated storage with either nz or nnz (not both).
3134:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3135:    allocation.  For additional details, see the users manual chapter on
3136:    matrices.

3138: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3139: @*/
3140: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3141: {
3143: 
3145:   MatCreate(comm,A);
3146:   MatSetSizes(*A,m,n,m,n);
3147:   MatSetType(*A,MATSEQBAIJ);
3148:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3149:   return(0);
3150: }

3154: /*@C
3155:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3156:    per row in the matrix. For good matrix assembly performance the
3157:    user should preallocate the matrix storage by setting the parameter nz
3158:    (or the array nnz).  By setting these parameters accurately, performance
3159:    during matrix assembly can be increased by more than a factor of 50.

3161:    Collective on MPI_Comm

3163:    Input Parameters:
3164: +  A - the matrix
3165: .  bs - size of block
3166: .  nz - number of block nonzeros per block row (same for all rows)
3167: -  nnz - array containing the number of block nonzeros in the various block rows 
3168:          (possibly different for each block row) or PETSC_NULL

3170:    Options Database Keys:
3171: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3172:                      block calculations (much slower)
3173: .    -mat_block_size - size of the blocks to use

3175:    Level: intermediate

3177:    Notes:
3178:    If the nnz parameter is given then the nz parameter is ignored

3180:    You can call MatGetInfo() to get information on how effective the preallocation was;
3181:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3182:    You can also run with the option -info and look for messages with the string 
3183:    malloc in them to see if additional memory allocation was needed.

3185:    The block AIJ format is fully compatible with standard Fortran 77
3186:    storage.  That is, the stored row and column indices can begin at
3187:    either one (as in Fortran) or zero.  See the users' manual for details.

3189:    Specify the preallocated storage with either nz or nnz (not both).
3190:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3191:    allocation.  For additional details, see the users manual chapter on
3192:    matrices.

3194: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3195: @*/
3196: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3197: {
3198:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);

3201:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
3202:   if (f) {
3203:     (*f)(B,bs,nz,nnz);
3204:   }
3205:   return(0);
3206: }

3210: /*@C
3211:    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3212:    (the default sequential PETSc format).  

3214:    Collective on MPI_Comm

3216:    Input Parameters:
3217: +  A - the matrix 
3218: .  i - the indices into j for the start of each local row (starts with zero)
3219: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3220: -  v - optional values in the matrix

3222:    Level: developer

3224: .keywords: matrix, aij, compressed row, sparse

3226: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3227: @*/
3228: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3229: {
3230:   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);

3233:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);
3234:   if (f) {
3235:     (*f)(B,bs,i,j,v);
3236:   }
3237:   return(0);
3238: }


3243: /*@
3244:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 
3245:               (upper triangular entries in CSR format) provided by the user.

3247:      Collective on MPI_Comm

3249:    Input Parameters:
3250: +  comm - must be an MPI communicator of size 1
3251: .  bs - size of block
3252: .  m - number of rows
3253: .  n - number of columns
3254: .  i - row indices
3255: .  j - column indices
3256: -  a - matrix values

3258:    Output Parameter:
3259: .  mat - the matrix

3261:    Level: intermediate

3263:    Notes:
3264:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3265:     once the matrix is destroyed

3267:        You cannot set new nonzero locations into this matrix, that will generate an error.

3269:        The i and j indices are 0 based

3271: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()

3273: @*/
3274: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3275: {
3277:   PetscInt       ii;
3278:   Mat_SeqBAIJ    *baij;

3281:   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3282:   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3283: 
3284:   MatCreate(comm,mat);
3285:   MatSetSizes(*mat,m,n,m,n);
3286:   MatSetType(*mat,MATSEQBAIJ);
3287:   MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3288:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3289:   PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);

3291:   baij->i = i;
3292:   baij->j = j;
3293:   baij->a = a;
3294:   baij->singlemalloc = PETSC_FALSE;
3295:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3296:   baij->free_a       = PETSC_FALSE;
3297:   baij->free_ij       = PETSC_FALSE;

3299:   for (ii=0; ii<m; ii++) {
3300:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3301: #if defined(PETSC_USE_DEBUG)
3302:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3303: #endif    
3304:   }
3305: #if defined(PETSC_USE_DEBUG)
3306:   for (ii=0; ii<baij->i[m]; ii++) {
3307:     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3308:     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3309:   }
3310: #endif    

3312:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3313:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3314:   return(0);
3315: }