Actual source code: maij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  5:   This format is used for restriction and interpolation operations for 
  6:   multicomponent problems. It interpolates each component the same way
  7:   independently.

  9:      We provide:
 10:          MatMult()
 11:          MatMultTranspose()
 12:          MatMultTransposeAdd()
 13:          MatMultAdd()
 14:           and
 15:          MatCreateMAIJ(Mat,dof,Mat*)

 17:      This single directory handles both the sequential and parallel codes
 18: */

 20:  #include ../src/mat/impls/maij/maij.h
 21:  #include ../src/mat/utils/freespace.h
 22:  #include private/vecimpl.h

 26: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 27: {
 29:   PetscTruth     ismpimaij,isseqmaij;

 32:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 33:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 34:   if (ismpimaij) {
 35:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 37:     *B = b->A;
 38:   } else if (isseqmaij) {
 39:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 41:     *B = b->AIJ;
 42:   } else {
 43:     *B = A;
 44:   }
 45:   return(0);
 46: }

 50: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 51: {
 53:   Mat            Aij;

 56:   MatMAIJGetAIJ(A,&Aij);
 57:   MatCreateMAIJ(Aij,dof,B);
 58:   return(0);
 59: }

 63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 64: {
 66:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 69:   if (b->AIJ) {
 70:     MatDestroy(b->AIJ);
 71:   }
 72:   PetscFree(b);
 73:   return(0);
 74: }

 78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
 79: {
 81:   Mat            B;

 84:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
 85:   MatView(B,viewer);
 86:   MatDestroy(B);
 87:   return(0);
 88: }

 92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
 93: {
 95:   Mat            B;

 98:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
 99:   MatView(B,viewer);
100:   MatDestroy(B);
101:   return(0);
102: }

106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

112:   if (b->AIJ) {
113:     MatDestroy(b->AIJ);
114:   }
115:   if (b->OAIJ) {
116:     MatDestroy(b->OAIJ);
117:   }
118:   if (b->A) {
119:     MatDestroy(b->A);
120:   }
121:   if (b->ctx) {
122:     VecScatterDestroy(b->ctx);
123:   }
124:   if (b->w) {
125:     VecDestroy(b->w);
126:   }
127:   PetscFree(b);
128:   PetscObjectChangeTypeName((PetscObject)A,0);
129:   return(0);
130: }

132: /*MC
133:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
134:   multicomponent problems, interpolating or restricting each component the same way independently.
135:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

137:   Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd

143:   Level: advanced

145: .seealso: MatCreateSeqDense
146: M*/

151: PetscErrorCode  MatCreate_MAIJ(Mat A)
152: {
154:   Mat_MPIMAIJ    *b;
155:   PetscMPIInt    size;

158:   PetscNewLog(A,Mat_MPIMAIJ,&b);
159:   A->data  = (void*)b;
160:   PetscMemzero(A->ops,sizeof(struct _MatOps));
161:   A->mapping          = 0;

163:   b->AIJ  = 0;
164:   b->dof  = 0;
165:   b->OAIJ = 0;
166:   b->ctx  = 0;
167:   b->w    = 0;
168:   MPI_Comm_size(((PetscObject)A)->comm,&size);
169:   if (size == 1){
170:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
171:   } else {
172:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
173:   }
174:   return(0);
175: }

178: /* --------------------------------------------------------------------------------------*/
181: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
182: {
183:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
184:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
185:   PetscScalar    *x,*y,*v,sum1, sum2;
187:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
188:   PetscInt       n,i,jrow,j;

191:   VecGetArray(xx,&x);
192:   VecGetArray(yy,&y);
193:   idx  = a->j;
194:   v    = a->a;
195:   ii   = a->i;

197:   for (i=0; i<m; i++) {
198:     jrow = ii[i];
199:     n    = ii[i+1] - jrow;
200:     sum1  = 0.0;
201:     sum2  = 0.0;
202:     nonzerorow += (n>0);
203:     for (j=0; j<n; j++) {
204:       sum1 += v[jrow]*x[2*idx[jrow]];
205:       sum2 += v[jrow]*x[2*idx[jrow]+1];
206:       jrow++;
207:      }
208:     y[2*i]   = sum1;
209:     y[2*i+1] = sum2;
210:   }

212:   PetscLogFlops(4*a->nz - 2*nonzerorow);
213:   VecRestoreArray(xx,&x);
214:   VecRestoreArray(yy,&y);
215:   return(0);
216: }

220: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
221: {
222:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
223:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
224:   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
226:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

229:   VecSet(yy,zero);
230:   VecGetArray(xx,&x);
231:   VecGetArray(yy,&y);
232: 
233:   for (i=0; i<m; i++) {
234:     idx    = a->j + a->i[i] ;
235:     v      = a->a + a->i[i] ;
236:     n      = a->i[i+1] - a->i[i];
237:     alpha1 = x[2*i];
238:     alpha2 = x[2*i+1];
239:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
240:   }
241:   PetscLogFlops(4*a->nz);
242:   VecRestoreArray(xx,&x);
243:   VecRestoreArray(yy,&y);
244:   return(0);
245: }

249: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250: {
251:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
252:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
253:   PetscScalar    *x,*y,*v,sum1, sum2;
255:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
256:   PetscInt       n,i,jrow,j;

259:   if (yy != zz) {VecCopy(yy,zz);}
260:   VecGetArray(xx,&x);
261:   VecGetArray(zz,&y);
262:   idx  = a->j;
263:   v    = a->a;
264:   ii   = a->i;

266:   for (i=0; i<m; i++) {
267:     jrow = ii[i];
268:     n    = ii[i+1] - jrow;
269:     sum1  = 0.0;
270:     sum2  = 0.0;
271:     for (j=0; j<n; j++) {
272:       sum1 += v[jrow]*x[2*idx[jrow]];
273:       sum2 += v[jrow]*x[2*idx[jrow]+1];
274:       jrow++;
275:      }
276:     y[2*i]   += sum1;
277:     y[2*i+1] += sum2;
278:   }

280:   PetscLogFlops(4*a->nz);
281:   VecRestoreArray(xx,&x);
282:   VecRestoreArray(zz,&y);
283:   return(0);
284: }
287: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
288: {
289:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
290:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
291:   PetscScalar    *x,*y,*v,alpha1,alpha2;
293:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

296:   if (yy != zz) {VecCopy(yy,zz);}
297:   VecGetArray(xx,&x);
298:   VecGetArray(zz,&y);
299: 
300:   for (i=0; i<m; i++) {
301:     idx   = a->j + a->i[i] ;
302:     v     = a->a + a->i[i] ;
303:     n     = a->i[i+1] - a->i[i];
304:     alpha1 = x[2*i];
305:     alpha2 = x[2*i+1];
306:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
307:   }
308:   PetscLogFlops(4*a->nz);
309:   VecRestoreArray(xx,&x);
310:   VecRestoreArray(zz,&y);
311:   return(0);
312: }
313: /* --------------------------------------------------------------------------------------*/
316: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
317: {
318:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
319:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
320:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
322:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
323:   PetscInt       n,i,jrow,j;

326:   VecGetArray(xx,&x);
327:   VecGetArray(yy,&y);
328:   idx  = a->j;
329:   v    = a->a;
330:   ii   = a->i;

332:   for (i=0; i<m; i++) {
333:     jrow = ii[i];
334:     n    = ii[i+1] - jrow;
335:     sum1  = 0.0;
336:     sum2  = 0.0;
337:     sum3  = 0.0;
338:     nonzerorow += (n>0);
339:     for (j=0; j<n; j++) {
340:       sum1 += v[jrow]*x[3*idx[jrow]];
341:       sum2 += v[jrow]*x[3*idx[jrow]+1];
342:       sum3 += v[jrow]*x[3*idx[jrow]+2];
343:       jrow++;
344:      }
345:     y[3*i]   = sum1;
346:     y[3*i+1] = sum2;
347:     y[3*i+2] = sum3;
348:   }

350:   PetscLogFlops(6*a->nz - 3*nonzerorow);
351:   VecRestoreArray(xx,&x);
352:   VecRestoreArray(yy,&y);
353:   return(0);
354: }

358: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
359: {
360:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
361:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
362:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
364:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

367:   VecSet(yy,zero);
368:   VecGetArray(xx,&x);
369:   VecGetArray(yy,&y);
370: 
371:   for (i=0; i<m; i++) {
372:     idx    = a->j + a->i[i];
373:     v      = a->a + a->i[i];
374:     n      = a->i[i+1] - a->i[i];
375:     alpha1 = x[3*i];
376:     alpha2 = x[3*i+1];
377:     alpha3 = x[3*i+2];
378:     while (n-->0) {
379:       y[3*(*idx)]   += alpha1*(*v);
380:       y[3*(*idx)+1] += alpha2*(*v);
381:       y[3*(*idx)+2] += alpha3*(*v);
382:       idx++; v++;
383:     }
384:   }
385:   PetscLogFlops(6*a->nz);
386:   VecRestoreArray(xx,&x);
387:   VecRestoreArray(yy,&y);
388:   return(0);
389: }

393: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
394: {
395:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
396:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
397:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
399:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
400:   PetscInt       n,i,jrow,j;

403:   if (yy != zz) {VecCopy(yy,zz);}
404:   VecGetArray(xx,&x);
405:   VecGetArray(zz,&y);
406:   idx  = a->j;
407:   v    = a->a;
408:   ii   = a->i;

410:   for (i=0; i<m; i++) {
411:     jrow = ii[i];
412:     n    = ii[i+1] - jrow;
413:     sum1  = 0.0;
414:     sum2  = 0.0;
415:     sum3  = 0.0;
416:     for (j=0; j<n; j++) {
417:       sum1 += v[jrow]*x[3*idx[jrow]];
418:       sum2 += v[jrow]*x[3*idx[jrow]+1];
419:       sum3 += v[jrow]*x[3*idx[jrow]+2];
420:       jrow++;
421:      }
422:     y[3*i]   += sum1;
423:     y[3*i+1] += sum2;
424:     y[3*i+2] += sum3;
425:   }

427:   PetscLogFlops(6*a->nz);
428:   VecRestoreArray(xx,&x);
429:   VecRestoreArray(zz,&y);
430:   return(0);
431: }
434: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
435: {
436:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
437:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
438:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
440:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

443:   if (yy != zz) {VecCopy(yy,zz);}
444:   VecGetArray(xx,&x);
445:   VecGetArray(zz,&y);
446:   for (i=0; i<m; i++) {
447:     idx    = a->j + a->i[i] ;
448:     v      = a->a + a->i[i] ;
449:     n      = a->i[i+1] - a->i[i];
450:     alpha1 = x[3*i];
451:     alpha2 = x[3*i+1];
452:     alpha3 = x[3*i+2];
453:     while (n-->0) {
454:       y[3*(*idx)]   += alpha1*(*v);
455:       y[3*(*idx)+1] += alpha2*(*v);
456:       y[3*(*idx)+2] += alpha3*(*v);
457:       idx++; v++;
458:     }
459:   }
460:   PetscLogFlops(6*a->nz);
461:   VecRestoreArray(xx,&x);
462:   VecRestoreArray(zz,&y);
463:   return(0);
464: }

466: /* ------------------------------------------------------------------------------*/
469: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
470: {
471:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
472:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
473:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
475:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
476:   PetscInt       n,i,jrow,j;

479:   VecGetArray(xx,&x);
480:   VecGetArray(yy,&y);
481:   idx  = a->j;
482:   v    = a->a;
483:   ii   = a->i;

485:   for (i=0; i<m; i++) {
486:     jrow = ii[i];
487:     n    = ii[i+1] - jrow;
488:     sum1  = 0.0;
489:     sum2  = 0.0;
490:     sum3  = 0.0;
491:     sum4  = 0.0;
492:     nonzerorow += (n>0);
493:     for (j=0; j<n; j++) {
494:       sum1 += v[jrow]*x[4*idx[jrow]];
495:       sum2 += v[jrow]*x[4*idx[jrow]+1];
496:       sum3 += v[jrow]*x[4*idx[jrow]+2];
497:       sum4 += v[jrow]*x[4*idx[jrow]+3];
498:       jrow++;
499:      }
500:     y[4*i]   = sum1;
501:     y[4*i+1] = sum2;
502:     y[4*i+2] = sum3;
503:     y[4*i+3] = sum4;
504:   }

506:   PetscLogFlops(8*a->nz - 4*nonzerorow);
507:   VecRestoreArray(xx,&x);
508:   VecRestoreArray(yy,&y);
509:   return(0);
510: }

514: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
515: {
516:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
517:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
518:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
520:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

523:   VecSet(yy,zero);
524:   VecGetArray(xx,&x);
525:   VecGetArray(yy,&y);
526:   for (i=0; i<m; i++) {
527:     idx    = a->j + a->i[i] ;
528:     v      = a->a + a->i[i] ;
529:     n      = a->i[i+1] - a->i[i];
530:     alpha1 = x[4*i];
531:     alpha2 = x[4*i+1];
532:     alpha3 = x[4*i+2];
533:     alpha4 = x[4*i+3];
534:     while (n-->0) {
535:       y[4*(*idx)]   += alpha1*(*v);
536:       y[4*(*idx)+1] += alpha2*(*v);
537:       y[4*(*idx)+2] += alpha3*(*v);
538:       y[4*(*idx)+3] += alpha4*(*v);
539:       idx++; v++;
540:     }
541:   }
542:   PetscLogFlops(8*a->nz);
543:   VecRestoreArray(xx,&x);
544:   VecRestoreArray(yy,&y);
545:   return(0);
546: }

550: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
551: {
552:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
553:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
554:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
556:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
557:   PetscInt       n,i,jrow,j;

560:   if (yy != zz) {VecCopy(yy,zz);}
561:   VecGetArray(xx,&x);
562:   VecGetArray(zz,&y);
563:   idx  = a->j;
564:   v    = a->a;
565:   ii   = a->i;

567:   for (i=0; i<m; i++) {
568:     jrow = ii[i];
569:     n    = ii[i+1] - jrow;
570:     sum1  = 0.0;
571:     sum2  = 0.0;
572:     sum3  = 0.0;
573:     sum4  = 0.0;
574:     for (j=0; j<n; j++) {
575:       sum1 += v[jrow]*x[4*idx[jrow]];
576:       sum2 += v[jrow]*x[4*idx[jrow]+1];
577:       sum3 += v[jrow]*x[4*idx[jrow]+2];
578:       sum4 += v[jrow]*x[4*idx[jrow]+3];
579:       jrow++;
580:      }
581:     y[4*i]   += sum1;
582:     y[4*i+1] += sum2;
583:     y[4*i+2] += sum3;
584:     y[4*i+3] += sum4;
585:   }

587:   PetscLogFlops(8*a->nz);
588:   VecRestoreArray(xx,&x);
589:   VecRestoreArray(zz,&y);
590:   return(0);
591: }
594: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
595: {
596:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
597:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
598:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
600:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

603:   if (yy != zz) {VecCopy(yy,zz);}
604:   VecGetArray(xx,&x);
605:   VecGetArray(zz,&y);
606: 
607:   for (i=0; i<m; i++) {
608:     idx    = a->j + a->i[i] ;
609:     v      = a->a + a->i[i] ;
610:     n      = a->i[i+1] - a->i[i];
611:     alpha1 = x[4*i];
612:     alpha2 = x[4*i+1];
613:     alpha3 = x[4*i+2];
614:     alpha4 = x[4*i+3];
615:     while (n-->0) {
616:       y[4*(*idx)]   += alpha1*(*v);
617:       y[4*(*idx)+1] += alpha2*(*v);
618:       y[4*(*idx)+2] += alpha3*(*v);
619:       y[4*(*idx)+3] += alpha4*(*v);
620:       idx++; v++;
621:     }
622:   }
623:   PetscLogFlops(8*a->nz);
624:   VecRestoreArray(xx,&x);
625:   VecRestoreArray(zz,&y);
626:   return(0);
627: }
628: /* ------------------------------------------------------------------------------*/

632: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
633: {
634:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
635:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
636:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
638:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
639:   PetscInt       n,i,jrow,j;

642:   VecGetArray(xx,&x);
643:   VecGetArray(yy,&y);
644:   idx  = a->j;
645:   v    = a->a;
646:   ii   = a->i;

648:   for (i=0; i<m; i++) {
649:     jrow = ii[i];
650:     n    = ii[i+1] - jrow;
651:     sum1  = 0.0;
652:     sum2  = 0.0;
653:     sum3  = 0.0;
654:     sum4  = 0.0;
655:     sum5  = 0.0;
656:     nonzerorow += (n>0);
657:     for (j=0; j<n; j++) {
658:       sum1 += v[jrow]*x[5*idx[jrow]];
659:       sum2 += v[jrow]*x[5*idx[jrow]+1];
660:       sum3 += v[jrow]*x[5*idx[jrow]+2];
661:       sum4 += v[jrow]*x[5*idx[jrow]+3];
662:       sum5 += v[jrow]*x[5*idx[jrow]+4];
663:       jrow++;
664:      }
665:     y[5*i]   = sum1;
666:     y[5*i+1] = sum2;
667:     y[5*i+2] = sum3;
668:     y[5*i+3] = sum4;
669:     y[5*i+4] = sum5;
670:   }

672:   PetscLogFlops(10*a->nz - 5*nonzerorow);
673:   VecRestoreArray(xx,&x);
674:   VecRestoreArray(yy,&y);
675:   return(0);
676: }

680: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
681: {
682:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
683:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
684:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
686:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

689:   VecSet(yy,zero);
690:   VecGetArray(xx,&x);
691:   VecGetArray(yy,&y);
692: 
693:   for (i=0; i<m; i++) {
694:     idx    = a->j + a->i[i] ;
695:     v      = a->a + a->i[i] ;
696:     n      = a->i[i+1] - a->i[i];
697:     alpha1 = x[5*i];
698:     alpha2 = x[5*i+1];
699:     alpha3 = x[5*i+2];
700:     alpha4 = x[5*i+3];
701:     alpha5 = x[5*i+4];
702:     while (n-->0) {
703:       y[5*(*idx)]   += alpha1*(*v);
704:       y[5*(*idx)+1] += alpha2*(*v);
705:       y[5*(*idx)+2] += alpha3*(*v);
706:       y[5*(*idx)+3] += alpha4*(*v);
707:       y[5*(*idx)+4] += alpha5*(*v);
708:       idx++; v++;
709:     }
710:   }
711:   PetscLogFlops(10*a->nz);
712:   VecRestoreArray(xx,&x);
713:   VecRestoreArray(yy,&y);
714:   return(0);
715: }

719: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
720: {
721:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
722:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
723:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
725:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
726:   PetscInt       n,i,jrow,j;

729:   if (yy != zz) {VecCopy(yy,zz);}
730:   VecGetArray(xx,&x);
731:   VecGetArray(zz,&y);
732:   idx  = a->j;
733:   v    = a->a;
734:   ii   = a->i;

736:   for (i=0; i<m; i++) {
737:     jrow = ii[i];
738:     n    = ii[i+1] - jrow;
739:     sum1  = 0.0;
740:     sum2  = 0.0;
741:     sum3  = 0.0;
742:     sum4  = 0.0;
743:     sum5  = 0.0;
744:     for (j=0; j<n; j++) {
745:       sum1 += v[jrow]*x[5*idx[jrow]];
746:       sum2 += v[jrow]*x[5*idx[jrow]+1];
747:       sum3 += v[jrow]*x[5*idx[jrow]+2];
748:       sum4 += v[jrow]*x[5*idx[jrow]+3];
749:       sum5 += v[jrow]*x[5*idx[jrow]+4];
750:       jrow++;
751:      }
752:     y[5*i]   += sum1;
753:     y[5*i+1] += sum2;
754:     y[5*i+2] += sum3;
755:     y[5*i+3] += sum4;
756:     y[5*i+4] += sum5;
757:   }

759:   PetscLogFlops(10*a->nz);
760:   VecRestoreArray(xx,&x);
761:   VecRestoreArray(zz,&y);
762:   return(0);
763: }

767: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
768: {
769:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
770:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
771:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
773:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

776:   if (yy != zz) {VecCopy(yy,zz);}
777:   VecGetArray(xx,&x);
778:   VecGetArray(zz,&y);
779: 
780:   for (i=0; i<m; i++) {
781:     idx    = a->j + a->i[i] ;
782:     v      = a->a + a->i[i] ;
783:     n      = a->i[i+1] - a->i[i];
784:     alpha1 = x[5*i];
785:     alpha2 = x[5*i+1];
786:     alpha3 = x[5*i+2];
787:     alpha4 = x[5*i+3];
788:     alpha5 = x[5*i+4];
789:     while (n-->0) {
790:       y[5*(*idx)]   += alpha1*(*v);
791:       y[5*(*idx)+1] += alpha2*(*v);
792:       y[5*(*idx)+2] += alpha3*(*v);
793:       y[5*(*idx)+3] += alpha4*(*v);
794:       y[5*(*idx)+4] += alpha5*(*v);
795:       idx++; v++;
796:     }
797:   }
798:   PetscLogFlops(10*a->nz);
799:   VecRestoreArray(xx,&x);
800:   VecRestoreArray(zz,&y);
801:   return(0);
802: }

804: /* ------------------------------------------------------------------------------*/
807: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
808: {
809:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
810:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
811:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
813:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
814:   PetscInt       n,i,jrow,j;

817:   VecGetArray(xx,&x);
818:   VecGetArray(yy,&y);
819:   idx  = a->j;
820:   v    = a->a;
821:   ii   = a->i;

823:   for (i=0; i<m; i++) {
824:     jrow = ii[i];
825:     n    = ii[i+1] - jrow;
826:     sum1  = 0.0;
827:     sum2  = 0.0;
828:     sum3  = 0.0;
829:     sum4  = 0.0;
830:     sum5  = 0.0;
831:     sum6  = 0.0;
832:     nonzerorow += (n>0);
833:     for (j=0; j<n; j++) {
834:       sum1 += v[jrow]*x[6*idx[jrow]];
835:       sum2 += v[jrow]*x[6*idx[jrow]+1];
836:       sum3 += v[jrow]*x[6*idx[jrow]+2];
837:       sum4 += v[jrow]*x[6*idx[jrow]+3];
838:       sum5 += v[jrow]*x[6*idx[jrow]+4];
839:       sum6 += v[jrow]*x[6*idx[jrow]+5];
840:       jrow++;
841:      }
842:     y[6*i]   = sum1;
843:     y[6*i+1] = sum2;
844:     y[6*i+2] = sum3;
845:     y[6*i+3] = sum4;
846:     y[6*i+4] = sum5;
847:     y[6*i+5] = sum6;
848:   }

850:   PetscLogFlops(12*a->nz - 6*nonzerorow);
851:   VecRestoreArray(xx,&x);
852:   VecRestoreArray(yy,&y);
853:   return(0);
854: }

858: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
859: {
860:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
861:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
862:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
864:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

867:   VecSet(yy,zero);
868:   VecGetArray(xx,&x);
869:   VecGetArray(yy,&y);

871:   for (i=0; i<m; i++) {
872:     idx    = a->j + a->i[i] ;
873:     v      = a->a + a->i[i] ;
874:     n      = a->i[i+1] - a->i[i];
875:     alpha1 = x[6*i];
876:     alpha2 = x[6*i+1];
877:     alpha3 = x[6*i+2];
878:     alpha4 = x[6*i+3];
879:     alpha5 = x[6*i+4];
880:     alpha6 = x[6*i+5];
881:     while (n-->0) {
882:       y[6*(*idx)]   += alpha1*(*v);
883:       y[6*(*idx)+1] += alpha2*(*v);
884:       y[6*(*idx)+2] += alpha3*(*v);
885:       y[6*(*idx)+3] += alpha4*(*v);
886:       y[6*(*idx)+4] += alpha5*(*v);
887:       y[6*(*idx)+5] += alpha6*(*v);
888:       idx++; v++;
889:     }
890:   }
891:   PetscLogFlops(12*a->nz);
892:   VecRestoreArray(xx,&x);
893:   VecRestoreArray(yy,&y);
894:   return(0);
895: }

899: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
900: {
901:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
902:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
903:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
905:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
906:   PetscInt       n,i,jrow,j;

909:   if (yy != zz) {VecCopy(yy,zz);}
910:   VecGetArray(xx,&x);
911:   VecGetArray(zz,&y);
912:   idx  = a->j;
913:   v    = a->a;
914:   ii   = a->i;

916:   for (i=0; i<m; i++) {
917:     jrow = ii[i];
918:     n    = ii[i+1] - jrow;
919:     sum1  = 0.0;
920:     sum2  = 0.0;
921:     sum3  = 0.0;
922:     sum4  = 0.0;
923:     sum5  = 0.0;
924:     sum6  = 0.0;
925:     for (j=0; j<n; j++) {
926:       sum1 += v[jrow]*x[6*idx[jrow]];
927:       sum2 += v[jrow]*x[6*idx[jrow]+1];
928:       sum3 += v[jrow]*x[6*idx[jrow]+2];
929:       sum4 += v[jrow]*x[6*idx[jrow]+3];
930:       sum5 += v[jrow]*x[6*idx[jrow]+4];
931:       sum6 += v[jrow]*x[6*idx[jrow]+5];
932:       jrow++;
933:      }
934:     y[6*i]   += sum1;
935:     y[6*i+1] += sum2;
936:     y[6*i+2] += sum3;
937:     y[6*i+3] += sum4;
938:     y[6*i+4] += sum5;
939:     y[6*i+5] += sum6;
940:   }

942:   PetscLogFlops(12*a->nz);
943:   VecRestoreArray(xx,&x);
944:   VecRestoreArray(zz,&y);
945:   return(0);
946: }

950: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
951: {
952:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
953:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
954:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
956:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

959:   if (yy != zz) {VecCopy(yy,zz);}
960:   VecGetArray(xx,&x);
961:   VecGetArray(zz,&y);
962: 
963:   for (i=0; i<m; i++) {
964:     idx    = a->j + a->i[i] ;
965:     v      = a->a + a->i[i] ;
966:     n      = a->i[i+1] - a->i[i];
967:     alpha1 = x[6*i];
968:     alpha2 = x[6*i+1];
969:     alpha3 = x[6*i+2];
970:     alpha4 = x[6*i+3];
971:     alpha5 = x[6*i+4];
972:     alpha6 = x[6*i+5];
973:     while (n-->0) {
974:       y[6*(*idx)]   += alpha1*(*v);
975:       y[6*(*idx)+1] += alpha2*(*v);
976:       y[6*(*idx)+2] += alpha3*(*v);
977:       y[6*(*idx)+3] += alpha4*(*v);
978:       y[6*(*idx)+4] += alpha5*(*v);
979:       y[6*(*idx)+5] += alpha6*(*v);
980:       idx++; v++;
981:     }
982:   }
983:   PetscLogFlops(12*a->nz);
984:   VecRestoreArray(xx,&x);
985:   VecRestoreArray(zz,&y);
986:   return(0);
987: }

989: /* ------------------------------------------------------------------------------*/
992: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
993: {
994:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
995:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
996:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
998:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
999:   PetscInt       n,i,jrow,j;

1002:   VecGetArray(xx,&x);
1003:   VecGetArray(yy,&y);
1004:   idx  = a->j;
1005:   v    = a->a;
1006:   ii   = a->i;

1008:   for (i=0; i<m; i++) {
1009:     jrow = ii[i];
1010:     n    = ii[i+1] - jrow;
1011:     sum1  = 0.0;
1012:     sum2  = 0.0;
1013:     sum3  = 0.0;
1014:     sum4  = 0.0;
1015:     sum5  = 0.0;
1016:     sum6  = 0.0;
1017:     sum7  = 0.0;
1018:     nonzerorow += (n>0);
1019:     for (j=0; j<n; j++) {
1020:       sum1 += v[jrow]*x[7*idx[jrow]];
1021:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1022:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1023:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1024:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1025:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1026:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1027:       jrow++;
1028:      }
1029:     y[7*i]   = sum1;
1030:     y[7*i+1] = sum2;
1031:     y[7*i+2] = sum3;
1032:     y[7*i+3] = sum4;
1033:     y[7*i+4] = sum5;
1034:     y[7*i+5] = sum6;
1035:     y[7*i+6] = sum7;
1036:   }

1038:   PetscLogFlops(14*a->nz - 7*nonzerorow);
1039:   VecRestoreArray(xx,&x);
1040:   VecRestoreArray(yy,&y);
1041:   return(0);
1042: }

1046: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1047: {
1048:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1049:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1050:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1052:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1055:   VecSet(yy,zero);
1056:   VecGetArray(xx,&x);
1057:   VecGetArray(yy,&y);

1059:   for (i=0; i<m; i++) {
1060:     idx    = a->j + a->i[i] ;
1061:     v      = a->a + a->i[i] ;
1062:     n      = a->i[i+1] - a->i[i];
1063:     alpha1 = x[7*i];
1064:     alpha2 = x[7*i+1];
1065:     alpha3 = x[7*i+2];
1066:     alpha4 = x[7*i+3];
1067:     alpha5 = x[7*i+4];
1068:     alpha6 = x[7*i+5];
1069:     alpha7 = x[7*i+6];
1070:     while (n-->0) {
1071:       y[7*(*idx)]   += alpha1*(*v);
1072:       y[7*(*idx)+1] += alpha2*(*v);
1073:       y[7*(*idx)+2] += alpha3*(*v);
1074:       y[7*(*idx)+3] += alpha4*(*v);
1075:       y[7*(*idx)+4] += alpha5*(*v);
1076:       y[7*(*idx)+5] += alpha6*(*v);
1077:       y[7*(*idx)+6] += alpha7*(*v);
1078:       idx++; v++;
1079:     }
1080:   }
1081:   PetscLogFlops(14*a->nz);
1082:   VecRestoreArray(xx,&x);
1083:   VecRestoreArray(yy,&y);
1084:   return(0);
1085: }

1089: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1090: {
1091:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1092:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1093:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1095:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1096:   PetscInt       n,i,jrow,j;

1099:   if (yy != zz) {VecCopy(yy,zz);}
1100:   VecGetArray(xx,&x);
1101:   VecGetArray(zz,&y);
1102:   idx  = a->j;
1103:   v    = a->a;
1104:   ii   = a->i;

1106:   for (i=0; i<m; i++) {
1107:     jrow = ii[i];
1108:     n    = ii[i+1] - jrow;
1109:     sum1  = 0.0;
1110:     sum2  = 0.0;
1111:     sum3  = 0.0;
1112:     sum4  = 0.0;
1113:     sum5  = 0.0;
1114:     sum6  = 0.0;
1115:     sum7  = 0.0;
1116:     for (j=0; j<n; j++) {
1117:       sum1 += v[jrow]*x[7*idx[jrow]];
1118:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1119:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1120:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1121:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1122:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1123:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1124:       jrow++;
1125:      }
1126:     y[7*i]   += sum1;
1127:     y[7*i+1] += sum2;
1128:     y[7*i+2] += sum3;
1129:     y[7*i+3] += sum4;
1130:     y[7*i+4] += sum5;
1131:     y[7*i+5] += sum6;
1132:     y[7*i+6] += sum7;
1133:   }

1135:   PetscLogFlops(14*a->nz);
1136:   VecRestoreArray(xx,&x);
1137:   VecRestoreArray(zz,&y);
1138:   return(0);
1139: }

1143: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1144: {
1145:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1146:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1147:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1149:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1152:   if (yy != zz) {VecCopy(yy,zz);}
1153:   VecGetArray(xx,&x);
1154:   VecGetArray(zz,&y);
1155:   for (i=0; i<m; i++) {
1156:     idx    = a->j + a->i[i] ;
1157:     v      = a->a + a->i[i] ;
1158:     n      = a->i[i+1] - a->i[i];
1159:     alpha1 = x[7*i];
1160:     alpha2 = x[7*i+1];
1161:     alpha3 = x[7*i+2];
1162:     alpha4 = x[7*i+3];
1163:     alpha5 = x[7*i+4];
1164:     alpha6 = x[7*i+5];
1165:     alpha7 = x[7*i+6];
1166:     while (n-->0) {
1167:       y[7*(*idx)]   += alpha1*(*v);
1168:       y[7*(*idx)+1] += alpha2*(*v);
1169:       y[7*(*idx)+2] += alpha3*(*v);
1170:       y[7*(*idx)+3] += alpha4*(*v);
1171:       y[7*(*idx)+4] += alpha5*(*v);
1172:       y[7*(*idx)+5] += alpha6*(*v);
1173:       y[7*(*idx)+6] += alpha7*(*v);
1174:       idx++; v++;
1175:     }
1176:   }
1177:   PetscLogFlops(14*a->nz);
1178:   VecRestoreArray(xx,&x);
1179:   VecRestoreArray(zz,&y);
1180:   return(0);
1181: }

1185: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1186: {
1187:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1188:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1189:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1191:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1192:   PetscInt       n,i,jrow,j;

1195:   VecGetArray(xx,&x);
1196:   VecGetArray(yy,&y);
1197:   idx  = a->j;
1198:   v    = a->a;
1199:   ii   = a->i;

1201:   for (i=0; i<m; i++) {
1202:     jrow = ii[i];
1203:     n    = ii[i+1] - jrow;
1204:     sum1  = 0.0;
1205:     sum2  = 0.0;
1206:     sum3  = 0.0;
1207:     sum4  = 0.0;
1208:     sum5  = 0.0;
1209:     sum6  = 0.0;
1210:     sum7  = 0.0;
1211:     sum8  = 0.0;
1212:     nonzerorow += (n>0);
1213:     for (j=0; j<n; j++) {
1214:       sum1 += v[jrow]*x[8*idx[jrow]];
1215:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1216:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1217:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1218:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1219:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1220:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1221:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1222:       jrow++;
1223:      }
1224:     y[8*i]   = sum1;
1225:     y[8*i+1] = sum2;
1226:     y[8*i+2] = sum3;
1227:     y[8*i+3] = sum4;
1228:     y[8*i+4] = sum5;
1229:     y[8*i+5] = sum6;
1230:     y[8*i+6] = sum7;
1231:     y[8*i+7] = sum8;
1232:   }

1234:   PetscLogFlops(16*a->nz - 8*nonzerorow);
1235:   VecRestoreArray(xx,&x);
1236:   VecRestoreArray(yy,&y);
1237:   return(0);
1238: }

1242: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1243: {
1244:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1245:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1246:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1248:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1251:   VecSet(yy,zero);
1252:   VecGetArray(xx,&x);
1253:   VecGetArray(yy,&y);

1255:   for (i=0; i<m; i++) {
1256:     idx    = a->j + a->i[i] ;
1257:     v      = a->a + a->i[i] ;
1258:     n      = a->i[i+1] - a->i[i];
1259:     alpha1 = x[8*i];
1260:     alpha2 = x[8*i+1];
1261:     alpha3 = x[8*i+2];
1262:     alpha4 = x[8*i+3];
1263:     alpha5 = x[8*i+4];
1264:     alpha6 = x[8*i+5];
1265:     alpha7 = x[8*i+6];
1266:     alpha8 = x[8*i+7];
1267:     while (n-->0) {
1268:       y[8*(*idx)]   += alpha1*(*v);
1269:       y[8*(*idx)+1] += alpha2*(*v);
1270:       y[8*(*idx)+2] += alpha3*(*v);
1271:       y[8*(*idx)+3] += alpha4*(*v);
1272:       y[8*(*idx)+4] += alpha5*(*v);
1273:       y[8*(*idx)+5] += alpha6*(*v);
1274:       y[8*(*idx)+6] += alpha7*(*v);
1275:       y[8*(*idx)+7] += alpha8*(*v);
1276:       idx++; v++;
1277:     }
1278:   }
1279:   PetscLogFlops(16*a->nz);
1280:   VecRestoreArray(xx,&x);
1281:   VecRestoreArray(yy,&y);
1282:   return(0);
1283: }

1287: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1288: {
1289:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1290:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1291:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1293:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1294:   PetscInt       n,i,jrow,j;

1297:   if (yy != zz) {VecCopy(yy,zz);}
1298:   VecGetArray(xx,&x);
1299:   VecGetArray(zz,&y);
1300:   idx  = a->j;
1301:   v    = a->a;
1302:   ii   = a->i;

1304:   for (i=0; i<m; i++) {
1305:     jrow = ii[i];
1306:     n    = ii[i+1] - jrow;
1307:     sum1  = 0.0;
1308:     sum2  = 0.0;
1309:     sum3  = 0.0;
1310:     sum4  = 0.0;
1311:     sum5  = 0.0;
1312:     sum6  = 0.0;
1313:     sum7  = 0.0;
1314:     sum8  = 0.0;
1315:     for (j=0; j<n; j++) {
1316:       sum1 += v[jrow]*x[8*idx[jrow]];
1317:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1318:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1319:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1320:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1321:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1322:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1323:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1324:       jrow++;
1325:      }
1326:     y[8*i]   += sum1;
1327:     y[8*i+1] += sum2;
1328:     y[8*i+2] += sum3;
1329:     y[8*i+3] += sum4;
1330:     y[8*i+4] += sum5;
1331:     y[8*i+5] += sum6;
1332:     y[8*i+6] += sum7;
1333:     y[8*i+7] += sum8;
1334:   }

1336:   PetscLogFlops(16*a->nz);
1337:   VecRestoreArray(xx,&x);
1338:   VecRestoreArray(zz,&y);
1339:   return(0);
1340: }

1344: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1345: {
1346:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1347:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1348:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1350:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1353:   if (yy != zz) {VecCopy(yy,zz);}
1354:   VecGetArray(xx,&x);
1355:   VecGetArray(zz,&y);
1356:   for (i=0; i<m; i++) {
1357:     idx    = a->j + a->i[i] ;
1358:     v      = a->a + a->i[i] ;
1359:     n      = a->i[i+1] - a->i[i];
1360:     alpha1 = x[8*i];
1361:     alpha2 = x[8*i+1];
1362:     alpha3 = x[8*i+2];
1363:     alpha4 = x[8*i+3];
1364:     alpha5 = x[8*i+4];
1365:     alpha6 = x[8*i+5];
1366:     alpha7 = x[8*i+6];
1367:     alpha8 = x[8*i+7];
1368:     while (n-->0) {
1369:       y[8*(*idx)]   += alpha1*(*v);
1370:       y[8*(*idx)+1] += alpha2*(*v);
1371:       y[8*(*idx)+2] += alpha3*(*v);
1372:       y[8*(*idx)+3] += alpha4*(*v);
1373:       y[8*(*idx)+4] += alpha5*(*v);
1374:       y[8*(*idx)+5] += alpha6*(*v);
1375:       y[8*(*idx)+6] += alpha7*(*v);
1376:       y[8*(*idx)+7] += alpha8*(*v);
1377:       idx++; v++;
1378:     }
1379:   }
1380:   PetscLogFlops(16*a->nz);
1381:   VecRestoreArray(xx,&x);
1382:   VecRestoreArray(zz,&y);
1383:   return(0);
1384: }

1386: /* ------------------------------------------------------------------------------*/
1389: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1390: {
1391:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1392:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1393:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1395:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1396:   PetscInt       n,i,jrow,j;

1399:   VecGetArray(xx,&x);
1400:   VecGetArray(yy,&y);
1401:   idx  = a->j;
1402:   v    = a->a;
1403:   ii   = a->i;

1405:   for (i=0; i<m; i++) {
1406:     jrow = ii[i];
1407:     n    = ii[i+1] - jrow;
1408:     sum1  = 0.0;
1409:     sum2  = 0.0;
1410:     sum3  = 0.0;
1411:     sum4  = 0.0;
1412:     sum5  = 0.0;
1413:     sum6  = 0.0;
1414:     sum7  = 0.0;
1415:     sum8  = 0.0;
1416:     sum9  = 0.0;
1417:     nonzerorow += (n>0);
1418:     for (j=0; j<n; j++) {
1419:       sum1 += v[jrow]*x[9*idx[jrow]];
1420:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1421:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1422:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1423:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1424:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1425:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1426:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1427:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1428:       jrow++;
1429:      }
1430:     y[9*i]   = sum1;
1431:     y[9*i+1] = sum2;
1432:     y[9*i+2] = sum3;
1433:     y[9*i+3] = sum4;
1434:     y[9*i+4] = sum5;
1435:     y[9*i+5] = sum6;
1436:     y[9*i+6] = sum7;
1437:     y[9*i+7] = sum8;
1438:     y[9*i+8] = sum9;
1439:   }

1441:   PetscLogFlops(18*a->nz - 9*nonzerorow);
1442:   VecRestoreArray(xx,&x);
1443:   VecRestoreArray(yy,&y);
1444:   return(0);
1445: }

1447: /* ------------------------------------------------------------------------------*/

1451: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1452: {
1453:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1454:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1455:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1457:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1460:   VecSet(yy,zero);
1461:   VecGetArray(xx,&x);
1462:   VecGetArray(yy,&y);

1464:   for (i=0; i<m; i++) {
1465:     idx    = a->j + a->i[i] ;
1466:     v      = a->a + a->i[i] ;
1467:     n      = a->i[i+1] - a->i[i];
1468:     alpha1 = x[9*i];
1469:     alpha2 = x[9*i+1];
1470:     alpha3 = x[9*i+2];
1471:     alpha4 = x[9*i+3];
1472:     alpha5 = x[9*i+4];
1473:     alpha6 = x[9*i+5];
1474:     alpha7 = x[9*i+6];
1475:     alpha8 = x[9*i+7];
1476:     alpha9 = x[9*i+8];
1477:     while (n-->0) {
1478:       y[9*(*idx)]   += alpha1*(*v);
1479:       y[9*(*idx)+1] += alpha2*(*v);
1480:       y[9*(*idx)+2] += alpha3*(*v);
1481:       y[9*(*idx)+3] += alpha4*(*v);
1482:       y[9*(*idx)+4] += alpha5*(*v);
1483:       y[9*(*idx)+5] += alpha6*(*v);
1484:       y[9*(*idx)+6] += alpha7*(*v);
1485:       y[9*(*idx)+7] += alpha8*(*v);
1486:       y[9*(*idx)+8] += alpha9*(*v);
1487:       idx++; v++;
1488:     }
1489:   }
1490:   PetscLogFlops(18*a->nz);
1491:   VecRestoreArray(xx,&x);
1492:   VecRestoreArray(yy,&y);
1493:   return(0);
1494: }

1498: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1499: {
1500:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1501:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1502:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1504:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1505:   PetscInt       n,i,jrow,j;

1508:   if (yy != zz) {VecCopy(yy,zz);}
1509:   VecGetArray(xx,&x);
1510:   VecGetArray(zz,&y);
1511:   idx  = a->j;
1512:   v    = a->a;
1513:   ii   = a->i;

1515:   for (i=0; i<m; i++) {
1516:     jrow = ii[i];
1517:     n    = ii[i+1] - jrow;
1518:     sum1  = 0.0;
1519:     sum2  = 0.0;
1520:     sum3  = 0.0;
1521:     sum4  = 0.0;
1522:     sum5  = 0.0;
1523:     sum6  = 0.0;
1524:     sum7  = 0.0;
1525:     sum8  = 0.0;
1526:     sum9  = 0.0;
1527:     for (j=0; j<n; j++) {
1528:       sum1 += v[jrow]*x[9*idx[jrow]];
1529:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1530:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1531:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1532:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1533:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1534:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1535:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1536:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1537:       jrow++;
1538:      }
1539:     y[9*i]   += sum1;
1540:     y[9*i+1] += sum2;
1541:     y[9*i+2] += sum3;
1542:     y[9*i+3] += sum4;
1543:     y[9*i+4] += sum5;
1544:     y[9*i+5] += sum6;
1545:     y[9*i+6] += sum7;
1546:     y[9*i+7] += sum8;
1547:     y[9*i+8] += sum9;
1548:   }

1550:   PetscLogFlops(18*a->nz);
1551:   VecRestoreArray(xx,&x);
1552:   VecRestoreArray(zz,&y);
1553:   return(0);
1554: }

1558: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1559: {
1560:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1561:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1562:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1564:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1567:   if (yy != zz) {VecCopy(yy,zz);}
1568:   VecGetArray(xx,&x);
1569:   VecGetArray(zz,&y);
1570:   for (i=0; i<m; i++) {
1571:     idx    = a->j + a->i[i] ;
1572:     v      = a->a + a->i[i] ;
1573:     n      = a->i[i+1] - a->i[i];
1574:     alpha1 = x[9*i];
1575:     alpha2 = x[9*i+1];
1576:     alpha3 = x[9*i+2];
1577:     alpha4 = x[9*i+3];
1578:     alpha5 = x[9*i+4];
1579:     alpha6 = x[9*i+5];
1580:     alpha7 = x[9*i+6];
1581:     alpha8 = x[9*i+7];
1582:     alpha9 = x[9*i+8];
1583:     while (n-->0) {
1584:       y[9*(*idx)]   += alpha1*(*v);
1585:       y[9*(*idx)+1] += alpha2*(*v);
1586:       y[9*(*idx)+2] += alpha3*(*v);
1587:       y[9*(*idx)+3] += alpha4*(*v);
1588:       y[9*(*idx)+4] += alpha5*(*v);
1589:       y[9*(*idx)+5] += alpha6*(*v);
1590:       y[9*(*idx)+6] += alpha7*(*v);
1591:       y[9*(*idx)+7] += alpha8*(*v);
1592:       y[9*(*idx)+8] += alpha9*(*v);
1593:       idx++; v++;
1594:     }
1595:   }
1596:   PetscLogFlops(18*a->nz);
1597:   VecRestoreArray(xx,&x);
1598:   VecRestoreArray(zz,&y);
1599:   return(0);
1600: }
1601: /*--------------------------------------------------------------------------------------------*/
1604: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1605: {
1606:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1607:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1608:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1610:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1611:   PetscInt       n,i,jrow,j;

1614:   VecGetArray(xx,&x);
1615:   VecGetArray(yy,&y);
1616:   idx  = a->j;
1617:   v    = a->a;
1618:   ii   = a->i;

1620:   for (i=0; i<m; i++) {
1621:     jrow = ii[i];
1622:     n    = ii[i+1] - jrow;
1623:     sum1  = 0.0;
1624:     sum2  = 0.0;
1625:     sum3  = 0.0;
1626:     sum4  = 0.0;
1627:     sum5  = 0.0;
1628:     sum6  = 0.0;
1629:     sum7  = 0.0;
1630:     sum8  = 0.0;
1631:     sum9  = 0.0;
1632:     sum10 = 0.0;
1633:     nonzerorow += (n>0);
1634:     for (j=0; j<n; j++) {
1635:       sum1  += v[jrow]*x[10*idx[jrow]];
1636:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1637:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1638:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1639:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1640:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1641:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1642:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1643:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1644:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1645:       jrow++;
1646:      }
1647:     y[10*i]   = sum1;
1648:     y[10*i+1] = sum2;
1649:     y[10*i+2] = sum3;
1650:     y[10*i+3] = sum4;
1651:     y[10*i+4] = sum5;
1652:     y[10*i+5] = sum6;
1653:     y[10*i+6] = sum7;
1654:     y[10*i+7] = sum8;
1655:     y[10*i+8] = sum9;
1656:     y[10*i+9] = sum10;
1657:   }

1659:   PetscLogFlops(20*a->nz - 10*nonzerorow);
1660:   VecRestoreArray(xx,&x);
1661:   VecRestoreArray(yy,&y);
1662:   return(0);
1663: }

1667: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1668: {
1669:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1670:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1671:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1673:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1674:   PetscInt       n,i,jrow,j;

1677:   if (yy != zz) {VecCopy(yy,zz);}
1678:   VecGetArray(xx,&x);
1679:   VecGetArray(zz,&y);
1680:   idx  = a->j;
1681:   v    = a->a;
1682:   ii   = a->i;

1684:   for (i=0; i<m; i++) {
1685:     jrow = ii[i];
1686:     n    = ii[i+1] - jrow;
1687:     sum1  = 0.0;
1688:     sum2  = 0.0;
1689:     sum3  = 0.0;
1690:     sum4  = 0.0;
1691:     sum5  = 0.0;
1692:     sum6  = 0.0;
1693:     sum7  = 0.0;
1694:     sum8  = 0.0;
1695:     sum9  = 0.0;
1696:     sum10 = 0.0;
1697:     for (j=0; j<n; j++) {
1698:       sum1  += v[jrow]*x[10*idx[jrow]];
1699:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1700:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1701:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1702:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1703:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1704:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1705:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1706:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1707:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1708:       jrow++;
1709:      }
1710:     y[10*i]   += sum1;
1711:     y[10*i+1] += sum2;
1712:     y[10*i+2] += sum3;
1713:     y[10*i+3] += sum4;
1714:     y[10*i+4] += sum5;
1715:     y[10*i+5] += sum6;
1716:     y[10*i+6] += sum7;
1717:     y[10*i+7] += sum8;
1718:     y[10*i+8] += sum9;
1719:     y[10*i+9] += sum10;
1720:   }

1722:   PetscLogFlops(20*a->nz);
1723:   VecRestoreArray(xx,&x);
1724:   VecRestoreArray(yy,&y);
1725:   return(0);
1726: }

1730: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1731: {
1732:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1733:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1734:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1736:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1739:   VecSet(yy,zero);
1740:   VecGetArray(xx,&x);
1741:   VecGetArray(yy,&y);

1743:   for (i=0; i<m; i++) {
1744:     idx    = a->j + a->i[i] ;
1745:     v      = a->a + a->i[i] ;
1746:     n      = a->i[i+1] - a->i[i];
1747:     alpha1 = x[10*i];
1748:     alpha2 = x[10*i+1];
1749:     alpha3 = x[10*i+2];
1750:     alpha4 = x[10*i+3];
1751:     alpha5 = x[10*i+4];
1752:     alpha6 = x[10*i+5];
1753:     alpha7 = x[10*i+6];
1754:     alpha8 = x[10*i+7];
1755:     alpha9 = x[10*i+8];
1756:     alpha10 = x[10*i+9];
1757:     while (n-->0) {
1758:       y[10*(*idx)]   += alpha1*(*v);
1759:       y[10*(*idx)+1] += alpha2*(*v);
1760:       y[10*(*idx)+2] += alpha3*(*v);
1761:       y[10*(*idx)+3] += alpha4*(*v);
1762:       y[10*(*idx)+4] += alpha5*(*v);
1763:       y[10*(*idx)+5] += alpha6*(*v);
1764:       y[10*(*idx)+6] += alpha7*(*v);
1765:       y[10*(*idx)+7] += alpha8*(*v);
1766:       y[10*(*idx)+8] += alpha9*(*v);
1767:       y[10*(*idx)+9] += alpha10*(*v);
1768:       idx++; v++;
1769:     }
1770:   }
1771:   PetscLogFlops(20*a->nz);
1772:   VecRestoreArray(xx,&x);
1773:   VecRestoreArray(yy,&y);
1774:   return(0);
1775: }

1779: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1780: {
1781:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1782:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1783:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1785:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1788:   if (yy != zz) {VecCopy(yy,zz);}
1789:   VecGetArray(xx,&x);
1790:   VecGetArray(zz,&y);
1791:   for (i=0; i<m; i++) {
1792:     idx    = a->j + a->i[i] ;
1793:     v      = a->a + a->i[i] ;
1794:     n      = a->i[i+1] - a->i[i];
1795:     alpha1 = x[10*i];
1796:     alpha2 = x[10*i+1];
1797:     alpha3 = x[10*i+2];
1798:     alpha4 = x[10*i+3];
1799:     alpha5 = x[10*i+4];
1800:     alpha6 = x[10*i+5];
1801:     alpha7 = x[10*i+6];
1802:     alpha8 = x[10*i+7];
1803:     alpha9 = x[10*i+8];
1804:     alpha10 = x[10*i+9];
1805:     while (n-->0) {
1806:       y[10*(*idx)]   += alpha1*(*v);
1807:       y[10*(*idx)+1] += alpha2*(*v);
1808:       y[10*(*idx)+2] += alpha3*(*v);
1809:       y[10*(*idx)+3] += alpha4*(*v);
1810:       y[10*(*idx)+4] += alpha5*(*v);
1811:       y[10*(*idx)+5] += alpha6*(*v);
1812:       y[10*(*idx)+6] += alpha7*(*v);
1813:       y[10*(*idx)+7] += alpha8*(*v);
1814:       y[10*(*idx)+8] += alpha9*(*v);
1815:       y[10*(*idx)+9] += alpha10*(*v);
1816:       idx++; v++;
1817:     }
1818:   }
1819:   PetscLogFlops(20*a->nz);
1820:   VecRestoreArray(xx,&x);
1821:   VecRestoreArray(zz,&y);
1822:   return(0);
1823: }


1826: /*--------------------------------------------------------------------------------------------*/
1829: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1830: {
1831:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1832:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1833:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1834:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1836:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1837:   PetscInt       n,i,jrow,j;

1840:   VecGetArray(xx,&x);
1841:   VecGetArray(yy,&y);
1842:   idx  = a->j;
1843:   v    = a->a;
1844:   ii   = a->i;

1846:   for (i=0; i<m; i++) {
1847:     jrow = ii[i];
1848:     n    = ii[i+1] - jrow;
1849:     sum1  = 0.0;
1850:     sum2  = 0.0;
1851:     sum3  = 0.0;
1852:     sum4  = 0.0;
1853:     sum5  = 0.0;
1854:     sum6  = 0.0;
1855:     sum7  = 0.0;
1856:     sum8  = 0.0;
1857:     sum9  = 0.0;
1858:     sum10 = 0.0;
1859:     sum11 = 0.0;
1860:     sum12 = 0.0;
1861:     sum13 = 0.0;
1862:     sum14 = 0.0;
1863:     sum15 = 0.0;
1864:     sum16 = 0.0;
1865:     nonzerorow += (n>0);
1866:     for (j=0; j<n; j++) {
1867:       sum1  += v[jrow]*x[16*idx[jrow]];
1868:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1869:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1870:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1871:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1872:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1873:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1874:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1875:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1876:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1877:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1878:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1879:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1880:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1881:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1882:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1883:       jrow++;
1884:      }
1885:     y[16*i]    = sum1;
1886:     y[16*i+1]  = sum2;
1887:     y[16*i+2]  = sum3;
1888:     y[16*i+3]  = sum4;
1889:     y[16*i+4]  = sum5;
1890:     y[16*i+5]  = sum6;
1891:     y[16*i+6]  = sum7;
1892:     y[16*i+7]  = sum8;
1893:     y[16*i+8]  = sum9;
1894:     y[16*i+9]  = sum10;
1895:     y[16*i+10] = sum11;
1896:     y[16*i+11] = sum12;
1897:     y[16*i+12] = sum13;
1898:     y[16*i+13] = sum14;
1899:     y[16*i+14] = sum15;
1900:     y[16*i+15] = sum16;
1901:   }

1903:   PetscLogFlops(32*a->nz - 16*nonzerorow);
1904:   VecRestoreArray(xx,&x);
1905:   VecRestoreArray(yy,&y);
1906:   return(0);
1907: }

1911: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1912: {
1913:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1914:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1915:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1916:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1918:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1921:   VecSet(yy,zero);
1922:   VecGetArray(xx,&x);
1923:   VecGetArray(yy,&y);

1925:   for (i=0; i<m; i++) {
1926:     idx    = a->j + a->i[i] ;
1927:     v      = a->a + a->i[i] ;
1928:     n      = a->i[i+1] - a->i[i];
1929:     alpha1  = x[16*i];
1930:     alpha2  = x[16*i+1];
1931:     alpha3  = x[16*i+2];
1932:     alpha4  = x[16*i+3];
1933:     alpha5  = x[16*i+4];
1934:     alpha6  = x[16*i+5];
1935:     alpha7  = x[16*i+6];
1936:     alpha8  = x[16*i+7];
1937:     alpha9  = x[16*i+8];
1938:     alpha10 = x[16*i+9];
1939:     alpha11 = x[16*i+10];
1940:     alpha12 = x[16*i+11];
1941:     alpha13 = x[16*i+12];
1942:     alpha14 = x[16*i+13];
1943:     alpha15 = x[16*i+14];
1944:     alpha16 = x[16*i+15];
1945:     while (n-->0) {
1946:       y[16*(*idx)]    += alpha1*(*v);
1947:       y[16*(*idx)+1]  += alpha2*(*v);
1948:       y[16*(*idx)+2]  += alpha3*(*v);
1949:       y[16*(*idx)+3]  += alpha4*(*v);
1950:       y[16*(*idx)+4]  += alpha5*(*v);
1951:       y[16*(*idx)+5]  += alpha6*(*v);
1952:       y[16*(*idx)+6]  += alpha7*(*v);
1953:       y[16*(*idx)+7]  += alpha8*(*v);
1954:       y[16*(*idx)+8]  += alpha9*(*v);
1955:       y[16*(*idx)+9]  += alpha10*(*v);
1956:       y[16*(*idx)+10] += alpha11*(*v);
1957:       y[16*(*idx)+11] += alpha12*(*v);
1958:       y[16*(*idx)+12] += alpha13*(*v);
1959:       y[16*(*idx)+13] += alpha14*(*v);
1960:       y[16*(*idx)+14] += alpha15*(*v);
1961:       y[16*(*idx)+15] += alpha16*(*v);
1962:       idx++; v++;
1963:     }
1964:   }
1965:   PetscLogFlops(32*a->nz);
1966:   VecRestoreArray(xx,&x);
1967:   VecRestoreArray(yy,&y);
1968:   return(0);
1969: }

1973: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1974: {
1975:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1976:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1977:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1978:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1980:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1981:   PetscInt       n,i,jrow,j;

1984:   if (yy != zz) {VecCopy(yy,zz);}
1985:   VecGetArray(xx,&x);
1986:   VecGetArray(zz,&y);
1987:   idx  = a->j;
1988:   v    = a->a;
1989:   ii   = a->i;

1991:   for (i=0; i<m; i++) {
1992:     jrow = ii[i];
1993:     n    = ii[i+1] - jrow;
1994:     sum1  = 0.0;
1995:     sum2  = 0.0;
1996:     sum3  = 0.0;
1997:     sum4  = 0.0;
1998:     sum5  = 0.0;
1999:     sum6  = 0.0;
2000:     sum7  = 0.0;
2001:     sum8  = 0.0;
2002:     sum9  = 0.0;
2003:     sum10 = 0.0;
2004:     sum11 = 0.0;
2005:     sum12 = 0.0;
2006:     sum13 = 0.0;
2007:     sum14 = 0.0;
2008:     sum15 = 0.0;
2009:     sum16 = 0.0;
2010:     for (j=0; j<n; j++) {
2011:       sum1  += v[jrow]*x[16*idx[jrow]];
2012:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2013:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2014:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2015:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2016:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2017:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2018:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2019:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2020:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2021:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2022:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2023:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2024:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2025:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2026:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2027:       jrow++;
2028:      }
2029:     y[16*i]    += sum1;
2030:     y[16*i+1]  += sum2;
2031:     y[16*i+2]  += sum3;
2032:     y[16*i+3]  += sum4;
2033:     y[16*i+4]  += sum5;
2034:     y[16*i+5]  += sum6;
2035:     y[16*i+6]  += sum7;
2036:     y[16*i+7]  += sum8;
2037:     y[16*i+8]  += sum9;
2038:     y[16*i+9]  += sum10;
2039:     y[16*i+10] += sum11;
2040:     y[16*i+11] += sum12;
2041:     y[16*i+12] += sum13;
2042:     y[16*i+13] += sum14;
2043:     y[16*i+14] += sum15;
2044:     y[16*i+15] += sum16;
2045:   }

2047:   PetscLogFlops(32*a->nz);
2048:   VecRestoreArray(xx,&x);
2049:   VecRestoreArray(zz,&y);
2050:   return(0);
2051: }

2055: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2056: {
2057:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2058:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2059:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2060:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2062:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

2065:   if (yy != zz) {VecCopy(yy,zz);}
2066:   VecGetArray(xx,&x);
2067:   VecGetArray(zz,&y);
2068:   for (i=0; i<m; i++) {
2069:     idx    = a->j + a->i[i] ;
2070:     v      = a->a + a->i[i] ;
2071:     n      = a->i[i+1] - a->i[i];
2072:     alpha1 = x[16*i];
2073:     alpha2 = x[16*i+1];
2074:     alpha3 = x[16*i+2];
2075:     alpha4 = x[16*i+3];
2076:     alpha5 = x[16*i+4];
2077:     alpha6 = x[16*i+5];
2078:     alpha7 = x[16*i+6];
2079:     alpha8 = x[16*i+7];
2080:     alpha9  = x[16*i+8];
2081:     alpha10 = x[16*i+9];
2082:     alpha11 = x[16*i+10];
2083:     alpha12 = x[16*i+11];
2084:     alpha13 = x[16*i+12];
2085:     alpha14 = x[16*i+13];
2086:     alpha15 = x[16*i+14];
2087:     alpha16 = x[16*i+15];
2088:     while (n-->0) {
2089:       y[16*(*idx)]   += alpha1*(*v);
2090:       y[16*(*idx)+1] += alpha2*(*v);
2091:       y[16*(*idx)+2] += alpha3*(*v);
2092:       y[16*(*idx)+3] += alpha4*(*v);
2093:       y[16*(*idx)+4] += alpha5*(*v);
2094:       y[16*(*idx)+5] += alpha6*(*v);
2095:       y[16*(*idx)+6] += alpha7*(*v);
2096:       y[16*(*idx)+7] += alpha8*(*v);
2097:       y[16*(*idx)+8]  += alpha9*(*v);
2098:       y[16*(*idx)+9]  += alpha10*(*v);
2099:       y[16*(*idx)+10] += alpha11*(*v);
2100:       y[16*(*idx)+11] += alpha12*(*v);
2101:       y[16*(*idx)+12] += alpha13*(*v);
2102:       y[16*(*idx)+13] += alpha14*(*v);
2103:       y[16*(*idx)+14] += alpha15*(*v);
2104:       y[16*(*idx)+15] += alpha16*(*v);
2105:       idx++; v++;
2106:     }
2107:   }
2108:   PetscLogFlops(32*a->nz);
2109:   VecRestoreArray(xx,&x);
2110:   VecRestoreArray(zz,&y);
2111:   return(0);
2112: }

2114: /*--------------------------------------------------------------------------------------------*/
2117: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2118: {
2119:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2120:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2121:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2122:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2124:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
2125:   PetscInt       n,i,jrow,j;

2128:   VecGetArray(xx,&x);
2129:   VecGetArray(yy,&y);
2130:   idx  = a->j;
2131:   v    = a->a;
2132:   ii   = a->i;

2134:   for (i=0; i<m; i++) {
2135:     jrow = ii[i];
2136:     n    = ii[i+1] - jrow;
2137:     sum1  = 0.0;
2138:     sum2  = 0.0;
2139:     sum3  = 0.0;
2140:     sum4  = 0.0;
2141:     sum5  = 0.0;
2142:     sum6  = 0.0;
2143:     sum7  = 0.0;
2144:     sum8  = 0.0;
2145:     sum9  = 0.0;
2146:     sum10 = 0.0;
2147:     sum11 = 0.0;
2148:     sum12 = 0.0;
2149:     sum13 = 0.0;
2150:     sum14 = 0.0;
2151:     sum15 = 0.0;
2152:     sum16 = 0.0;
2153:     sum17 = 0.0;
2154:     sum18 = 0.0;
2155:     nonzerorow += (n>0);
2156:     for (j=0; j<n; j++) {
2157:       sum1  += v[jrow]*x[18*idx[jrow]];
2158:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2159:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2160:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2161:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2162:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2163:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2164:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2165:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2166:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2167:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2168:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2169:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2170:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2171:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2172:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2173:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2174:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2175:       jrow++;
2176:      }
2177:     y[18*i]    = sum1;
2178:     y[18*i+1]  = sum2;
2179:     y[18*i+2]  = sum3;
2180:     y[18*i+3]  = sum4;
2181:     y[18*i+4]  = sum5;
2182:     y[18*i+5]  = sum6;
2183:     y[18*i+6]  = sum7;
2184:     y[18*i+7]  = sum8;
2185:     y[18*i+8]  = sum9;
2186:     y[18*i+9]  = sum10;
2187:     y[18*i+10] = sum11;
2188:     y[18*i+11] = sum12;
2189:     y[18*i+12] = sum13;
2190:     y[18*i+13] = sum14;
2191:     y[18*i+14] = sum15;
2192:     y[18*i+15] = sum16;
2193:     y[18*i+16] = sum17;
2194:     y[18*i+17] = sum18;
2195:   }

2197:   PetscLogFlops(36*a->nz - 18*nonzerorow);
2198:   VecRestoreArray(xx,&x);
2199:   VecRestoreArray(yy,&y);
2200:   return(0);
2201: }

2205: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2206: {
2207:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2208:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2209:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
2210:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2212:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

2215:   VecSet(yy,zero);
2216:   VecGetArray(xx,&x);
2217:   VecGetArray(yy,&y);

2219:   for (i=0; i<m; i++) {
2220:     idx    = a->j + a->i[i] ;
2221:     v      = a->a + a->i[i] ;
2222:     n      = a->i[i+1] - a->i[i];
2223:     alpha1  = x[18*i];
2224:     alpha2  = x[18*i+1];
2225:     alpha3  = x[18*i+2];
2226:     alpha4  = x[18*i+3];
2227:     alpha5  = x[18*i+4];
2228:     alpha6  = x[18*i+5];
2229:     alpha7  = x[18*i+6];
2230:     alpha8  = x[18*i+7];
2231:     alpha9  = x[18*i+8];
2232:     alpha10 = x[18*i+9];
2233:     alpha11 = x[18*i+10];
2234:     alpha12 = x[18*i+11];
2235:     alpha13 = x[18*i+12];
2236:     alpha14 = x[18*i+13];
2237:     alpha15 = x[18*i+14];
2238:     alpha16 = x[18*i+15];
2239:     alpha17 = x[18*i+16];
2240:     alpha18 = x[18*i+17];
2241:     while (n-->0) {
2242:       y[18*(*idx)]    += alpha1*(*v);
2243:       y[18*(*idx)+1]  += alpha2*(*v);
2244:       y[18*(*idx)+2]  += alpha3*(*v);
2245:       y[18*(*idx)+3]  += alpha4*(*v);
2246:       y[18*(*idx)+4]  += alpha5*(*v);
2247:       y[18*(*idx)+5]  += alpha6*(*v);
2248:       y[18*(*idx)+6]  += alpha7*(*v);
2249:       y[18*(*idx)+7]  += alpha8*(*v);
2250:       y[18*(*idx)+8]  += alpha9*(*v);
2251:       y[18*(*idx)+9]  += alpha10*(*v);
2252:       y[18*(*idx)+10] += alpha11*(*v);
2253:       y[18*(*idx)+11] += alpha12*(*v);
2254:       y[18*(*idx)+12] += alpha13*(*v);
2255:       y[18*(*idx)+13] += alpha14*(*v);
2256:       y[18*(*idx)+14] += alpha15*(*v);
2257:       y[18*(*idx)+15] += alpha16*(*v);
2258:       y[18*(*idx)+16] += alpha17*(*v);
2259:       y[18*(*idx)+17] += alpha18*(*v);
2260:       idx++; v++;
2261:     }
2262:   }
2263:   PetscLogFlops(36*a->nz);
2264:   VecRestoreArray(xx,&x);
2265:   VecRestoreArray(yy,&y);
2266:   return(0);
2267: }

2271: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2272: {
2273:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2274:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2275:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2276:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2278:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
2279:   PetscInt       n,i,jrow,j;

2282:   if (yy != zz) {VecCopy(yy,zz);}
2283:   VecGetArray(xx,&x);
2284:   VecGetArray(zz,&y);
2285:   idx  = a->j;
2286:   v    = a->a;
2287:   ii   = a->i;

2289:   for (i=0; i<m; i++) {
2290:     jrow = ii[i];
2291:     n    = ii[i+1] - jrow;
2292:     sum1  = 0.0;
2293:     sum2  = 0.0;
2294:     sum3  = 0.0;
2295:     sum4  = 0.0;
2296:     sum5  = 0.0;
2297:     sum6  = 0.0;
2298:     sum7  = 0.0;
2299:     sum8  = 0.0;
2300:     sum9  = 0.0;
2301:     sum10 = 0.0;
2302:     sum11 = 0.0;
2303:     sum12 = 0.0;
2304:     sum13 = 0.0;
2305:     sum14 = 0.0;
2306:     sum15 = 0.0;
2307:     sum16 = 0.0;
2308:     sum17 = 0.0;
2309:     sum18 = 0.0;
2310:     for (j=0; j<n; j++) {
2311:       sum1  += v[jrow]*x[18*idx[jrow]];
2312:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2313:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2314:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2315:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2316:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2317:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2318:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2319:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2320:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2321:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2322:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2323:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2324:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2325:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2326:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2327:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2328:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2329:       jrow++;
2330:      }
2331:     y[18*i]    += sum1;
2332:     y[18*i+1]  += sum2;
2333:     y[18*i+2]  += sum3;
2334:     y[18*i+3]  += sum4;
2335:     y[18*i+4]  += sum5;
2336:     y[18*i+5]  += sum6;
2337:     y[18*i+6]  += sum7;
2338:     y[18*i+7]  += sum8;
2339:     y[18*i+8]  += sum9;
2340:     y[18*i+9]  += sum10;
2341:     y[18*i+10] += sum11;
2342:     y[18*i+11] += sum12;
2343:     y[18*i+12] += sum13;
2344:     y[18*i+13] += sum14;
2345:     y[18*i+14] += sum15;
2346:     y[18*i+15] += sum16;
2347:     y[18*i+16] += sum17;
2348:     y[18*i+17] += sum18;
2349:   }

2351:   PetscLogFlops(36*a->nz);
2352:   VecRestoreArray(xx,&x);
2353:   VecRestoreArray(zz,&y);
2354:   return(0);
2355: }

2359: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2360: {
2361:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2362:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2363:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2364:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2366:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

2369:   if (yy != zz) {VecCopy(yy,zz);}
2370:   VecGetArray(xx,&x);
2371:   VecGetArray(zz,&y);
2372:   for (i=0; i<m; i++) {
2373:     idx    = a->j + a->i[i] ;
2374:     v      = a->a + a->i[i] ;
2375:     n      = a->i[i+1] - a->i[i];
2376:     alpha1 = x[18*i];
2377:     alpha2 = x[18*i+1];
2378:     alpha3 = x[18*i+2];
2379:     alpha4 = x[18*i+3];
2380:     alpha5 = x[18*i+4];
2381:     alpha6 = x[18*i+5];
2382:     alpha7 = x[18*i+6];
2383:     alpha8 = x[18*i+7];
2384:     alpha9  = x[18*i+8];
2385:     alpha10 = x[18*i+9];
2386:     alpha11 = x[18*i+10];
2387:     alpha12 = x[18*i+11];
2388:     alpha13 = x[18*i+12];
2389:     alpha14 = x[18*i+13];
2390:     alpha15 = x[18*i+14];
2391:     alpha16 = x[18*i+15];
2392:     alpha17 = x[18*i+16];
2393:     alpha18 = x[18*i+17];
2394:     while (n-->0) {
2395:       y[18*(*idx)]   += alpha1*(*v);
2396:       y[18*(*idx)+1] += alpha2*(*v);
2397:       y[18*(*idx)+2] += alpha3*(*v);
2398:       y[18*(*idx)+3] += alpha4*(*v);
2399:       y[18*(*idx)+4] += alpha5*(*v);
2400:       y[18*(*idx)+5] += alpha6*(*v);
2401:       y[18*(*idx)+6] += alpha7*(*v);
2402:       y[18*(*idx)+7] += alpha8*(*v);
2403:       y[18*(*idx)+8]  += alpha9*(*v);
2404:       y[18*(*idx)+9]  += alpha10*(*v);
2405:       y[18*(*idx)+10] += alpha11*(*v);
2406:       y[18*(*idx)+11] += alpha12*(*v);
2407:       y[18*(*idx)+12] += alpha13*(*v);
2408:       y[18*(*idx)+13] += alpha14*(*v);
2409:       y[18*(*idx)+14] += alpha15*(*v);
2410:       y[18*(*idx)+15] += alpha16*(*v);
2411:       y[18*(*idx)+16] += alpha17*(*v);
2412:       y[18*(*idx)+17] += alpha18*(*v);
2413:       idx++; v++;
2414:     }
2415:   }
2416:   PetscLogFlops(36*a->nz);
2417:   VecRestoreArray(xx,&x);
2418:   VecRestoreArray(zz,&y);
2419:   return(0);
2420: }

2422: /*===================================================================================*/
2425: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2426: {
2427:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2431:   /* start the scatter */
2432:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2433:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2434:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2435:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2436:   return(0);
2437: }

2441: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2442: {
2443:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2447:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2448:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2449:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2450:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2451:   return(0);
2452: }

2456: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2457: {
2458:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2462:   /* start the scatter */
2463:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2464:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2465:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2466:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2467:   return(0);
2468: }

2472: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2473: {
2474:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2478:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2479:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2480:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2481:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2482:   return(0);
2483: }

2485: /* ----------------------------------------------------------------*/
2488: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2489: {
2490:   /* This routine requires testing -- but it's getting better. */
2491:   PetscErrorCode     ierr;
2492:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2493:   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2494:   Mat                P=pp->AIJ;
2495:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2496:   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2497:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2498:   PetscInt           an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof,cn;
2499:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2500:   MatScalar          *ca;

2503:   /* Start timer */
2504:   PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);

2506:   /* Get ij structure of P^T */
2507:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2509:   cn = pn*ppdof;
2510:   /* Allocate ci array, arrays for fill computation and */
2511:   /* free space for accumulating nonzero column info */
2512:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2513:   ci[0] = 0;

2515:   /* Work arrays for rows of P^T*A */
2516:   PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2517:   PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2518:   ptasparserow = ptadenserow  + an;
2519:   denserow     = ptasparserow + an;
2520:   sparserow    = denserow     + cn;

2522:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2523:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2524:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2525:   PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2526:   current_space = free_space;

2528:   /* Determine symbolic info for each row of C: */
2529:   for (i=0;i<pn;i++) {
2530:     ptnzi  = pti[i+1] - pti[i];
2531:     ptJ    = ptj + pti[i];
2532:     for (dof=0;dof<ppdof;dof++) {
2533:       ptanzi = 0;
2534:       /* Determine symbolic row of PtA: */
2535:       for (j=0;j<ptnzi;j++) {
2536:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2537:         arow = ptJ[j]*ppdof + dof;
2538:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2539:         anzj = ai[arow+1] - ai[arow];
2540:         ajj  = aj + ai[arow];
2541:         for (k=0;k<anzj;k++) {
2542:           if (!ptadenserow[ajj[k]]) {
2543:             ptadenserow[ajj[k]]    = -1;
2544:             ptasparserow[ptanzi++] = ajj[k];
2545:           }
2546:         }
2547:       }
2548:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2549:       ptaj = ptasparserow;
2550:       cnzi   = 0;
2551:       for (j=0;j<ptanzi;j++) {
2552:         /* Get offset within block of P */
2553:         pshift = *ptaj%ppdof;
2554:         /* Get block row of P */
2555:         prow = (*ptaj++)/ppdof; /* integer division */
2556:         /* P has same number of nonzeros per row as the compressed form */
2557:         pnzj = pi[prow+1] - pi[prow];
2558:         pjj  = pj + pi[prow];
2559:         for (k=0;k<pnzj;k++) {
2560:           /* Locations in C are shifted by the offset within the block */
2561:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2562:           if (!denserow[pjj[k]*ppdof+pshift]) {
2563:             denserow[pjj[k]*ppdof+pshift] = -1;
2564:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2565:           }
2566:         }
2567:       }

2569:       /* sort sparserow */
2570:       PetscSortInt(cnzi,sparserow);
2571: 
2572:       /* If free space is not available, make more free space */
2573:       /* Double the amount of total space in the list */
2574:       if (current_space->local_remaining<cnzi) {
2575:         PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);
2576:       }

2578:       /* Copy data into free space, and zero out denserows */
2579:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2580:       current_space->array           += cnzi;
2581:       current_space->local_used      += cnzi;
2582:       current_space->local_remaining -= cnzi;

2584:       for (j=0;j<ptanzi;j++) {
2585:         ptadenserow[ptasparserow[j]] = 0;
2586:       }
2587:       for (j=0;j<cnzi;j++) {
2588:         denserow[sparserow[j]] = 0;
2589:       }
2590:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2591:       /*        For now, we will recompute what is needed. */
2592:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2593:     }
2594:   }
2595:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2596:   /* Allocate space for cj, initialize cj, and */
2597:   /* destroy list of free space and other temporary array(s) */
2598:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2599:   PetscFreeSpaceContiguous(&free_space,cj);
2600:   PetscFree(ptadenserow);
2601: 
2602:   /* Allocate space for ca */
2603:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2604:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2605: 
2606:   /* put together the new matrix */
2607:   MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);

2609:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2610:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2611:   c          = (Mat_SeqAIJ *)((*C)->data);
2612:   c->free_a  = PETSC_TRUE;
2613:   c->free_ij = PETSC_TRUE;
2614:   c->nonew   = 0;

2616:   /* Clean up. */
2617:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2619:   PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
2620:   return(0);
2621: }

2625: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2626: {
2627:   /* This routine requires testing -- first draft only */
2629:   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2630:   Mat            P=pp->AIJ;
2631:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2632:   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2633:   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2634:   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2635:   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2636:   PetscInt       am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
2637:   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2638:   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;

2641:   /* Allocate temporary array for storage of one row of A*P */
2642:   PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2643:   PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));

2645:   apj      = (PetscInt *)(apa + cn);
2646:   apjdense = apj + cn;

2648:   /* Clear old values in C */
2649:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

2651:   for (i=0;i<am;i++) {
2652:     /* Form sparse row of A*P */
2653:     anzi  = ai[i+1] - ai[i];
2654:     apnzj = 0;
2655:     for (j=0;j<anzi;j++) {
2656:       /* Get offset within block of P */
2657:       pshift = *aj%ppdof;
2658:       /* Get block row of P */
2659:       prow   = *aj++/ppdof; /* integer division */
2660:       pnzj = pi[prow+1] - pi[prow];
2661:       pjj  = pj + pi[prow];
2662:       paj  = pa + pi[prow];
2663:       for (k=0;k<pnzj;k++) {
2664:         poffset = pjj[k]*ppdof+pshift;
2665:         if (!apjdense[poffset]) {
2666:           apjdense[poffset] = -1;
2667:           apj[apnzj++]      = poffset;
2668:         }
2669:         apa[poffset] += (*aa)*paj[k];
2670:       }
2671:       PetscLogFlops(2*pnzj);
2672:       aa++;
2673:     }

2675:     /* Sort the j index array for quick sparse axpy. */
2676:     /* Note: a array does not need sorting as it is in dense storage locations. */
2677:     PetscSortInt(apnzj,apj);

2679:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2680:     prow    = i/ppdof; /* integer division */
2681:     pshift  = i%ppdof;
2682:     poffset = pi[prow];
2683:     pnzi = pi[prow+1] - poffset;
2684:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2685:     pJ   = pj+poffset;
2686:     pA   = pa+poffset;
2687:     for (j=0;j<pnzi;j++) {
2688:       crow   = (*pJ)*ppdof+pshift;
2689:       cjj    = cj + ci[crow];
2690:       caj    = ca + ci[crow];
2691:       pJ++;
2692:       /* Perform sparse axpy operation.  Note cjj includes apj. */
2693:       for (k=0,nextap=0;nextap<apnzj;k++) {
2694:         if (cjj[k]==apj[nextap]) {
2695:           caj[k] += (*pA)*apa[apj[nextap++]];
2696:         }
2697:       }
2698:       PetscLogFlops(2*apnzj);
2699:       pA++;
2700:     }

2702:     /* Zero the current row info for A*P */
2703:     for (j=0;j<apnzj;j++) {
2704:       apa[apj[j]]      = 0.;
2705:       apjdense[apj[j]] = 0;
2706:     }
2707:   }

2709:   /* Assemble the final matrix and clean up */
2710:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2711:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2712:   PetscFree(apa);
2713:   return(0);
2714: }

2718: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2719: {
2720:   PetscErrorCode    ierr;

2723:   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2724:   MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
2725:   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
2726:   return(0);
2727: }

2731: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2732: {
2734:   SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2735:   return(0);
2736: }

2741: PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2742: {
2743:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2744:   Mat               a = b->AIJ,B;
2745:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2746:   PetscErrorCode    ierr;
2747:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2748:   PetscInt          *cols;
2749:   PetscScalar       *vals;

2752:   MatGetSize(a,&m,&n);
2753:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2754:   for (i=0; i<m; i++) {
2755:     nmax = PetscMax(nmax,aij->ilen[i]);
2756:     for (j=0; j<dof; j++) {
2757:       ilen[dof*i+j] = aij->ilen[i];
2758:     }
2759:   }
2760:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2761:   PetscFree(ilen);
2762:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
2763:   ii   = 0;
2764:   for (i=0; i<m; i++) {
2765:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2766:     for (j=0; j<dof; j++) {
2767:       for (k=0; k<ncols; k++) {
2768:         icols[k] = dof*cols[k]+j;
2769:       }
2770:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2771:       ii++;
2772:     }
2773:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2774:   }
2775:   PetscFree(icols);
2776:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2777:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2779:   if (reuse == MAT_REUSE_MATRIX) {
2780:     MatHeaderReplace(A,B);
2781:   } else {
2782:     *newmat = B;
2783:   }
2784:   return(0);
2785: }

2788:  #include ../src/mat/impls/aij/mpi/mpiaij.h

2793: PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2794: {
2795:   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2796:   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2797:   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2798:   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2799:   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2800:   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2801:   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2802:   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2803:   PetscInt          rstart,cstart,*garray,ii,k;
2804:   PetscErrorCode    ierr;
2805:   PetscScalar       *vals,*ovals;

2808:   PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);
2809:   for (i=0; i<A->rmap->n/dof; i++) {
2810:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2811:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2812:     for (j=0; j<dof; j++) {
2813:       dnz[dof*i+j] = AIJ->ilen[i];
2814:       onz[dof*i+j] = OAIJ->ilen[i];
2815:     }
2816:   }
2817:   MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
2818:   PetscFree2(dnz,onz);

2820:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2821:   rstart = dof*maij->A->rmap->rstart;
2822:   cstart = dof*maij->A->cmap->rstart;
2823:   garray = mpiaij->garray;

2825:   ii = rstart;
2826:   for (i=0; i<A->rmap->n/dof; i++) {
2827:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2828:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2829:     for (j=0; j<dof; j++) {
2830:       for (k=0; k<ncols; k++) {
2831:         icols[k] = cstart + dof*cols[k]+j;
2832:       }
2833:       for (k=0; k<oncols; k++) {
2834:         oicols[k] = dof*garray[ocols[k]]+j;
2835:       }
2836:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2837:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2838:       ii++;
2839:     }
2840:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2841:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2842:   }
2843:   PetscFree2(icols,oicols);

2845:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2846:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2848:   if (reuse == MAT_REUSE_MATRIX) {
2849:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
2850:     ((PetscObject)A)->refct = 1;
2851:     MatHeaderReplace(A,B);
2852:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
2853:   } else {
2854:     *newmat = B;
2855:   }
2856:   return(0);
2857: }


2861: /* ---------------------------------------------------------------------------------- */
2862: /*MC
2863:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 
2864:   operations for multicomponent problems.  It interpolates each component the same
2865:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2866:   and MATMPIAIJ for distributed matrices.

2868:   Operations provided:
2869: + MatMult
2870: . MatMultTranspose
2871: . MatMultAdd
2872: . MatMultTransposeAdd
2873: - MatView

2875:   Level: advanced

2877: M*/
2880: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2881: {
2883:   PetscMPIInt    size;
2884:   PetscInt       n;
2885:   Mat_MPIMAIJ    *b;
2886:   Mat            B;

2889:   PetscObjectReference((PetscObject)A);

2891:   if (dof == 1) {
2892:     *maij = A;
2893:   } else {
2894:     MatCreate(((PetscObject)A)->comm,&B);
2895:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
2896:     B->assembled    = PETSC_TRUE;

2898:     MPI_Comm_size(((PetscObject)A)->comm,&size);
2899:     if (size == 1) {
2900:       MatSetType(B,MATSEQMAIJ);
2901:       B->ops->destroy = MatDestroy_SeqMAIJ;
2902:       B->ops->view    = MatView_SeqMAIJ;
2903:       b      = (Mat_MPIMAIJ*)B->data;
2904:       b->dof = dof;
2905:       b->AIJ = A;
2906:       if (dof == 2) {
2907:         B->ops->mult             = MatMult_SeqMAIJ_2;
2908:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2909:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2910:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2911:       } else if (dof == 3) {
2912:         B->ops->mult             = MatMult_SeqMAIJ_3;
2913:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2914:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2915:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2916:       } else if (dof == 4) {
2917:         B->ops->mult             = MatMult_SeqMAIJ_4;
2918:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2919:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2920:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2921:       } else if (dof == 5) {
2922:         B->ops->mult             = MatMult_SeqMAIJ_5;
2923:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2924:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2925:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2926:       } else if (dof == 6) {
2927:         B->ops->mult             = MatMult_SeqMAIJ_6;
2928:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2929:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2930:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2931:       } else if (dof == 7) {
2932:         B->ops->mult             = MatMult_SeqMAIJ_7;
2933:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2934:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2935:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2936:       } else if (dof == 8) {
2937:         B->ops->mult             = MatMult_SeqMAIJ_8;
2938:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2939:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2940:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2941:       } else if (dof == 9) {
2942:         B->ops->mult             = MatMult_SeqMAIJ_9;
2943:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2944:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2945:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2946:       } else if (dof == 10) {
2947:         B->ops->mult             = MatMult_SeqMAIJ_10;
2948:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2949:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2950:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2951:       } else if (dof == 16) {
2952:         B->ops->mult             = MatMult_SeqMAIJ_16;
2953:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2954:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2955:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2956:       } else if (dof == 18) {
2957:         B->ops->mult             = MatMult_SeqMAIJ_18;
2958:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
2959:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
2960:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
2961:       } else {
2962:         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2963:       }
2964:       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2965:       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2966:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2967:     } else {
2968:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2969:       IS         from,to;
2970:       Vec        gvec;
2971:       PetscInt   *garray,i;

2973:       MatSetType(B,MATMPIMAIJ);
2974:       B->ops->destroy = MatDestroy_MPIMAIJ;
2975:       B->ops->view    = MatView_MPIMAIJ;
2976:       b      = (Mat_MPIMAIJ*)B->data;
2977:       b->dof = dof;
2978:       b->A   = A;
2979:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2980:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

2982:       VecGetSize(mpiaij->lvec,&n);
2983:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2984:       VecSetBlockSize(b->w,dof);

2986:       /* create two temporary Index sets for build scatter gather */
2987:       PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2988:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2989:       ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);
2990:       PetscFree(garray);
2991:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

2993:       /* create temporary global vector to generate scatter context */
2994:       VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);
2995:       VecSetBlockSize(gvec,dof);

2997:       /* generate the scatter context */
2998:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3000:       ISDestroy(from);
3001:       ISDestroy(to);
3002:       VecDestroy(gvec);

3004:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3005:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3006:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3007:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3008:       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3009:       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3010:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3011:     }
3012:     *maij = B;
3013:     MatView_Private(B);
3014:   }
3015:   return(0);
3016: }