Actual source code: maij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the MAIJ matrix storage format.
5: This format is used for restriction and interpolation operations for
6: multicomponent problems. It interpolates each component the same way
7: independently.
9: We provide:
10: MatMult()
11: MatMultTranspose()
12: MatMultTransposeAdd()
13: MatMultAdd()
14: and
15: MatCreateMAIJ(Mat,dof,Mat*)
17: This single directory handles both the sequential and parallel codes
18: */
20: #include ../src/mat/impls/maij/maij.h
21: #include ../src/mat/utils/freespace.h
22: #include private/vecimpl.h
26: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
27: {
29: PetscTruth ismpimaij,isseqmaij;
32: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
33: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
34: if (ismpimaij) {
35: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
37: *B = b->A;
38: } else if (isseqmaij) {
39: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
41: *B = b->AIJ;
42: } else {
43: *B = A;
44: }
45: return(0);
46: }
50: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51: {
53: Mat Aij;
56: MatMAIJGetAIJ(A,&Aij);
57: MatCreateMAIJ(Aij,dof,B);
58: return(0);
59: }
63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64: {
66: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
69: if (b->AIJ) {
70: MatDestroy(b->AIJ);
71: }
72: PetscFree(b);
73: return(0);
74: }
78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79: {
81: Mat B;
84: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85: MatView(B,viewer);
86: MatDestroy(B);
87: return(0);
88: }
92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93: {
95: Mat B;
98: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99: MatView(B,viewer);
100: MatDestroy(B);
101: return(0);
102: }
106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
112: if (b->AIJ) {
113: MatDestroy(b->AIJ);
114: }
115: if (b->OAIJ) {
116: MatDestroy(b->OAIJ);
117: }
118: if (b->A) {
119: MatDestroy(b->A);
120: }
121: if (b->ctx) {
122: VecScatterDestroy(b->ctx);
123: }
124: if (b->w) {
125: VecDestroy(b->w);
126: }
127: PetscFree(b);
128: PetscObjectChangeTypeName((PetscObject)A,0);
129: return(0);
130: }
132: /*MC
133: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
134: multicomponent problems, interpolating or restricting each component the same way independently.
135: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
137: Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd
143: Level: advanced
145: .seealso: MatCreateSeqDense
146: M*/
151: PetscErrorCode MatCreate_MAIJ(Mat A)
152: {
154: Mat_MPIMAIJ *b;
155: PetscMPIInt size;
158: PetscNewLog(A,Mat_MPIMAIJ,&b);
159: A->data = (void*)b;
160: PetscMemzero(A->ops,sizeof(struct _MatOps));
161: A->mapping = 0;
163: b->AIJ = 0;
164: b->dof = 0;
165: b->OAIJ = 0;
166: b->ctx = 0;
167: b->w = 0;
168: MPI_Comm_size(((PetscObject)A)->comm,&size);
169: if (size == 1){
170: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
171: } else {
172: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
173: }
174: return(0);
175: }
178: /* --------------------------------------------------------------------------------------*/
181: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
182: {
183: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
184: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
185: PetscScalar *x,*y,*v,sum1, sum2;
187: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
188: PetscInt n,i,jrow,j;
191: VecGetArray(xx,&x);
192: VecGetArray(yy,&y);
193: idx = a->j;
194: v = a->a;
195: ii = a->i;
197: for (i=0; i<m; i++) {
198: jrow = ii[i];
199: n = ii[i+1] - jrow;
200: sum1 = 0.0;
201: sum2 = 0.0;
202: nonzerorow += (n>0);
203: for (j=0; j<n; j++) {
204: sum1 += v[jrow]*x[2*idx[jrow]];
205: sum2 += v[jrow]*x[2*idx[jrow]+1];
206: jrow++;
207: }
208: y[2*i] = sum1;
209: y[2*i+1] = sum2;
210: }
212: PetscLogFlops(4*a->nz - 2*nonzerorow);
213: VecRestoreArray(xx,&x);
214: VecRestoreArray(yy,&y);
215: return(0);
216: }
220: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
221: {
222: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
223: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
224: PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
226: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
229: VecSet(yy,zero);
230: VecGetArray(xx,&x);
231: VecGetArray(yy,&y);
232:
233: for (i=0; i<m; i++) {
234: idx = a->j + a->i[i] ;
235: v = a->a + a->i[i] ;
236: n = a->i[i+1] - a->i[i];
237: alpha1 = x[2*i];
238: alpha2 = x[2*i+1];
239: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
240: }
241: PetscLogFlops(4*a->nz);
242: VecRestoreArray(xx,&x);
243: VecRestoreArray(yy,&y);
244: return(0);
245: }
249: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250: {
251: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
252: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
253: PetscScalar *x,*y,*v,sum1, sum2;
255: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
256: PetscInt n,i,jrow,j;
259: if (yy != zz) {VecCopy(yy,zz);}
260: VecGetArray(xx,&x);
261: VecGetArray(zz,&y);
262: idx = a->j;
263: v = a->a;
264: ii = a->i;
266: for (i=0; i<m; i++) {
267: jrow = ii[i];
268: n = ii[i+1] - jrow;
269: sum1 = 0.0;
270: sum2 = 0.0;
271: for (j=0; j<n; j++) {
272: sum1 += v[jrow]*x[2*idx[jrow]];
273: sum2 += v[jrow]*x[2*idx[jrow]+1];
274: jrow++;
275: }
276: y[2*i] += sum1;
277: y[2*i+1] += sum2;
278: }
280: PetscLogFlops(4*a->nz);
281: VecRestoreArray(xx,&x);
282: VecRestoreArray(zz,&y);
283: return(0);
284: }
287: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
288: {
289: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
290: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
291: PetscScalar *x,*y,*v,alpha1,alpha2;
293: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
296: if (yy != zz) {VecCopy(yy,zz);}
297: VecGetArray(xx,&x);
298: VecGetArray(zz,&y);
299:
300: for (i=0; i<m; i++) {
301: idx = a->j + a->i[i] ;
302: v = a->a + a->i[i] ;
303: n = a->i[i+1] - a->i[i];
304: alpha1 = x[2*i];
305: alpha2 = x[2*i+1];
306: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
307: }
308: PetscLogFlops(4*a->nz);
309: VecRestoreArray(xx,&x);
310: VecRestoreArray(zz,&y);
311: return(0);
312: }
313: /* --------------------------------------------------------------------------------------*/
316: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
317: {
318: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
319: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
320: PetscScalar *x,*y,*v,sum1, sum2, sum3;
322: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
323: PetscInt n,i,jrow,j;
326: VecGetArray(xx,&x);
327: VecGetArray(yy,&y);
328: idx = a->j;
329: v = a->a;
330: ii = a->i;
332: for (i=0; i<m; i++) {
333: jrow = ii[i];
334: n = ii[i+1] - jrow;
335: sum1 = 0.0;
336: sum2 = 0.0;
337: sum3 = 0.0;
338: nonzerorow += (n>0);
339: for (j=0; j<n; j++) {
340: sum1 += v[jrow]*x[3*idx[jrow]];
341: sum2 += v[jrow]*x[3*idx[jrow]+1];
342: sum3 += v[jrow]*x[3*idx[jrow]+2];
343: jrow++;
344: }
345: y[3*i] = sum1;
346: y[3*i+1] = sum2;
347: y[3*i+2] = sum3;
348: }
350: PetscLogFlops(6*a->nz - 3*nonzerorow);
351: VecRestoreArray(xx,&x);
352: VecRestoreArray(yy,&y);
353: return(0);
354: }
358: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
359: {
360: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
361: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
362: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
364: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
367: VecSet(yy,zero);
368: VecGetArray(xx,&x);
369: VecGetArray(yy,&y);
370:
371: for (i=0; i<m; i++) {
372: idx = a->j + a->i[i];
373: v = a->a + a->i[i];
374: n = a->i[i+1] - a->i[i];
375: alpha1 = x[3*i];
376: alpha2 = x[3*i+1];
377: alpha3 = x[3*i+2];
378: while (n-->0) {
379: y[3*(*idx)] += alpha1*(*v);
380: y[3*(*idx)+1] += alpha2*(*v);
381: y[3*(*idx)+2] += alpha3*(*v);
382: idx++; v++;
383: }
384: }
385: PetscLogFlops(6*a->nz);
386: VecRestoreArray(xx,&x);
387: VecRestoreArray(yy,&y);
388: return(0);
389: }
393: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
394: {
395: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
396: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
397: PetscScalar *x,*y,*v,sum1, sum2, sum3;
399: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
400: PetscInt n,i,jrow,j;
403: if (yy != zz) {VecCopy(yy,zz);}
404: VecGetArray(xx,&x);
405: VecGetArray(zz,&y);
406: idx = a->j;
407: v = a->a;
408: ii = a->i;
410: for (i=0; i<m; i++) {
411: jrow = ii[i];
412: n = ii[i+1] - jrow;
413: sum1 = 0.0;
414: sum2 = 0.0;
415: sum3 = 0.0;
416: for (j=0; j<n; j++) {
417: sum1 += v[jrow]*x[3*idx[jrow]];
418: sum2 += v[jrow]*x[3*idx[jrow]+1];
419: sum3 += v[jrow]*x[3*idx[jrow]+2];
420: jrow++;
421: }
422: y[3*i] += sum1;
423: y[3*i+1] += sum2;
424: y[3*i+2] += sum3;
425: }
427: PetscLogFlops(6*a->nz);
428: VecRestoreArray(xx,&x);
429: VecRestoreArray(zz,&y);
430: return(0);
431: }
434: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
435: {
436: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
437: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
438: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3;
440: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
443: if (yy != zz) {VecCopy(yy,zz);}
444: VecGetArray(xx,&x);
445: VecGetArray(zz,&y);
446: for (i=0; i<m; i++) {
447: idx = a->j + a->i[i] ;
448: v = a->a + a->i[i] ;
449: n = a->i[i+1] - a->i[i];
450: alpha1 = x[3*i];
451: alpha2 = x[3*i+1];
452: alpha3 = x[3*i+2];
453: while (n-->0) {
454: y[3*(*idx)] += alpha1*(*v);
455: y[3*(*idx)+1] += alpha2*(*v);
456: y[3*(*idx)+2] += alpha3*(*v);
457: idx++; v++;
458: }
459: }
460: PetscLogFlops(6*a->nz);
461: VecRestoreArray(xx,&x);
462: VecRestoreArray(zz,&y);
463: return(0);
464: }
466: /* ------------------------------------------------------------------------------*/
469: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
470: {
471: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
472: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
473: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
475: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
476: PetscInt n,i,jrow,j;
479: VecGetArray(xx,&x);
480: VecGetArray(yy,&y);
481: idx = a->j;
482: v = a->a;
483: ii = a->i;
485: for (i=0; i<m; i++) {
486: jrow = ii[i];
487: n = ii[i+1] - jrow;
488: sum1 = 0.0;
489: sum2 = 0.0;
490: sum3 = 0.0;
491: sum4 = 0.0;
492: nonzerorow += (n>0);
493: for (j=0; j<n; j++) {
494: sum1 += v[jrow]*x[4*idx[jrow]];
495: sum2 += v[jrow]*x[4*idx[jrow]+1];
496: sum3 += v[jrow]*x[4*idx[jrow]+2];
497: sum4 += v[jrow]*x[4*idx[jrow]+3];
498: jrow++;
499: }
500: y[4*i] = sum1;
501: y[4*i+1] = sum2;
502: y[4*i+2] = sum3;
503: y[4*i+3] = sum4;
504: }
506: PetscLogFlops(8*a->nz - 4*nonzerorow);
507: VecRestoreArray(xx,&x);
508: VecRestoreArray(yy,&y);
509: return(0);
510: }
514: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
515: {
516: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
517: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
518: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
520: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
523: VecSet(yy,zero);
524: VecGetArray(xx,&x);
525: VecGetArray(yy,&y);
526: for (i=0; i<m; i++) {
527: idx = a->j + a->i[i] ;
528: v = a->a + a->i[i] ;
529: n = a->i[i+1] - a->i[i];
530: alpha1 = x[4*i];
531: alpha2 = x[4*i+1];
532: alpha3 = x[4*i+2];
533: alpha4 = x[4*i+3];
534: while (n-->0) {
535: y[4*(*idx)] += alpha1*(*v);
536: y[4*(*idx)+1] += alpha2*(*v);
537: y[4*(*idx)+2] += alpha3*(*v);
538: y[4*(*idx)+3] += alpha4*(*v);
539: idx++; v++;
540: }
541: }
542: PetscLogFlops(8*a->nz);
543: VecRestoreArray(xx,&x);
544: VecRestoreArray(yy,&y);
545: return(0);
546: }
550: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
551: {
552: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
553: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
554: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
556: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
557: PetscInt n,i,jrow,j;
560: if (yy != zz) {VecCopy(yy,zz);}
561: VecGetArray(xx,&x);
562: VecGetArray(zz,&y);
563: idx = a->j;
564: v = a->a;
565: ii = a->i;
567: for (i=0; i<m; i++) {
568: jrow = ii[i];
569: n = ii[i+1] - jrow;
570: sum1 = 0.0;
571: sum2 = 0.0;
572: sum3 = 0.0;
573: sum4 = 0.0;
574: for (j=0; j<n; j++) {
575: sum1 += v[jrow]*x[4*idx[jrow]];
576: sum2 += v[jrow]*x[4*idx[jrow]+1];
577: sum3 += v[jrow]*x[4*idx[jrow]+2];
578: sum4 += v[jrow]*x[4*idx[jrow]+3];
579: jrow++;
580: }
581: y[4*i] += sum1;
582: y[4*i+1] += sum2;
583: y[4*i+2] += sum3;
584: y[4*i+3] += sum4;
585: }
587: PetscLogFlops(8*a->nz);
588: VecRestoreArray(xx,&x);
589: VecRestoreArray(zz,&y);
590: return(0);
591: }
594: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
595: {
596: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
597: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
598: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
600: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
603: if (yy != zz) {VecCopy(yy,zz);}
604: VecGetArray(xx,&x);
605: VecGetArray(zz,&y);
606:
607: for (i=0; i<m; i++) {
608: idx = a->j + a->i[i] ;
609: v = a->a + a->i[i] ;
610: n = a->i[i+1] - a->i[i];
611: alpha1 = x[4*i];
612: alpha2 = x[4*i+1];
613: alpha3 = x[4*i+2];
614: alpha4 = x[4*i+3];
615: while (n-->0) {
616: y[4*(*idx)] += alpha1*(*v);
617: y[4*(*idx)+1] += alpha2*(*v);
618: y[4*(*idx)+2] += alpha3*(*v);
619: y[4*(*idx)+3] += alpha4*(*v);
620: idx++; v++;
621: }
622: }
623: PetscLogFlops(8*a->nz);
624: VecRestoreArray(xx,&x);
625: VecRestoreArray(zz,&y);
626: return(0);
627: }
628: /* ------------------------------------------------------------------------------*/
632: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
633: {
634: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
635: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
636: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
638: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
639: PetscInt n,i,jrow,j;
642: VecGetArray(xx,&x);
643: VecGetArray(yy,&y);
644: idx = a->j;
645: v = a->a;
646: ii = a->i;
648: for (i=0; i<m; i++) {
649: jrow = ii[i];
650: n = ii[i+1] - jrow;
651: sum1 = 0.0;
652: sum2 = 0.0;
653: sum3 = 0.0;
654: sum4 = 0.0;
655: sum5 = 0.0;
656: nonzerorow += (n>0);
657: for (j=0; j<n; j++) {
658: sum1 += v[jrow]*x[5*idx[jrow]];
659: sum2 += v[jrow]*x[5*idx[jrow]+1];
660: sum3 += v[jrow]*x[5*idx[jrow]+2];
661: sum4 += v[jrow]*x[5*idx[jrow]+3];
662: sum5 += v[jrow]*x[5*idx[jrow]+4];
663: jrow++;
664: }
665: y[5*i] = sum1;
666: y[5*i+1] = sum2;
667: y[5*i+2] = sum3;
668: y[5*i+3] = sum4;
669: y[5*i+4] = sum5;
670: }
672: PetscLogFlops(10*a->nz - 5*nonzerorow);
673: VecRestoreArray(xx,&x);
674: VecRestoreArray(yy,&y);
675: return(0);
676: }
680: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
681: {
682: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
683: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
684: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
686: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
689: VecSet(yy,zero);
690: VecGetArray(xx,&x);
691: VecGetArray(yy,&y);
692:
693: for (i=0; i<m; i++) {
694: idx = a->j + a->i[i] ;
695: v = a->a + a->i[i] ;
696: n = a->i[i+1] - a->i[i];
697: alpha1 = x[5*i];
698: alpha2 = x[5*i+1];
699: alpha3 = x[5*i+2];
700: alpha4 = x[5*i+3];
701: alpha5 = x[5*i+4];
702: while (n-->0) {
703: y[5*(*idx)] += alpha1*(*v);
704: y[5*(*idx)+1] += alpha2*(*v);
705: y[5*(*idx)+2] += alpha3*(*v);
706: y[5*(*idx)+3] += alpha4*(*v);
707: y[5*(*idx)+4] += alpha5*(*v);
708: idx++; v++;
709: }
710: }
711: PetscLogFlops(10*a->nz);
712: VecRestoreArray(xx,&x);
713: VecRestoreArray(yy,&y);
714: return(0);
715: }
719: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
720: {
721: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
722: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
723: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
725: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
726: PetscInt n,i,jrow,j;
729: if (yy != zz) {VecCopy(yy,zz);}
730: VecGetArray(xx,&x);
731: VecGetArray(zz,&y);
732: idx = a->j;
733: v = a->a;
734: ii = a->i;
736: for (i=0; i<m; i++) {
737: jrow = ii[i];
738: n = ii[i+1] - jrow;
739: sum1 = 0.0;
740: sum2 = 0.0;
741: sum3 = 0.0;
742: sum4 = 0.0;
743: sum5 = 0.0;
744: for (j=0; j<n; j++) {
745: sum1 += v[jrow]*x[5*idx[jrow]];
746: sum2 += v[jrow]*x[5*idx[jrow]+1];
747: sum3 += v[jrow]*x[5*idx[jrow]+2];
748: sum4 += v[jrow]*x[5*idx[jrow]+3];
749: sum5 += v[jrow]*x[5*idx[jrow]+4];
750: jrow++;
751: }
752: y[5*i] += sum1;
753: y[5*i+1] += sum2;
754: y[5*i+2] += sum3;
755: y[5*i+3] += sum4;
756: y[5*i+4] += sum5;
757: }
759: PetscLogFlops(10*a->nz);
760: VecRestoreArray(xx,&x);
761: VecRestoreArray(zz,&y);
762: return(0);
763: }
767: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
768: {
769: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
770: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
771: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
773: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
776: if (yy != zz) {VecCopy(yy,zz);}
777: VecGetArray(xx,&x);
778: VecGetArray(zz,&y);
779:
780: for (i=0; i<m; i++) {
781: idx = a->j + a->i[i] ;
782: v = a->a + a->i[i] ;
783: n = a->i[i+1] - a->i[i];
784: alpha1 = x[5*i];
785: alpha2 = x[5*i+1];
786: alpha3 = x[5*i+2];
787: alpha4 = x[5*i+3];
788: alpha5 = x[5*i+4];
789: while (n-->0) {
790: y[5*(*idx)] += alpha1*(*v);
791: y[5*(*idx)+1] += alpha2*(*v);
792: y[5*(*idx)+2] += alpha3*(*v);
793: y[5*(*idx)+3] += alpha4*(*v);
794: y[5*(*idx)+4] += alpha5*(*v);
795: idx++; v++;
796: }
797: }
798: PetscLogFlops(10*a->nz);
799: VecRestoreArray(xx,&x);
800: VecRestoreArray(zz,&y);
801: return(0);
802: }
804: /* ------------------------------------------------------------------------------*/
807: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
808: {
809: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
810: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
811: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
813: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
814: PetscInt n,i,jrow,j;
817: VecGetArray(xx,&x);
818: VecGetArray(yy,&y);
819: idx = a->j;
820: v = a->a;
821: ii = a->i;
823: for (i=0; i<m; i++) {
824: jrow = ii[i];
825: n = ii[i+1] - jrow;
826: sum1 = 0.0;
827: sum2 = 0.0;
828: sum3 = 0.0;
829: sum4 = 0.0;
830: sum5 = 0.0;
831: sum6 = 0.0;
832: nonzerorow += (n>0);
833: for (j=0; j<n; j++) {
834: sum1 += v[jrow]*x[6*idx[jrow]];
835: sum2 += v[jrow]*x[6*idx[jrow]+1];
836: sum3 += v[jrow]*x[6*idx[jrow]+2];
837: sum4 += v[jrow]*x[6*idx[jrow]+3];
838: sum5 += v[jrow]*x[6*idx[jrow]+4];
839: sum6 += v[jrow]*x[6*idx[jrow]+5];
840: jrow++;
841: }
842: y[6*i] = sum1;
843: y[6*i+1] = sum2;
844: y[6*i+2] = sum3;
845: y[6*i+3] = sum4;
846: y[6*i+4] = sum5;
847: y[6*i+5] = sum6;
848: }
850: PetscLogFlops(12*a->nz - 6*nonzerorow);
851: VecRestoreArray(xx,&x);
852: VecRestoreArray(yy,&y);
853: return(0);
854: }
858: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
859: {
860: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
861: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
862: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
864: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
867: VecSet(yy,zero);
868: VecGetArray(xx,&x);
869: VecGetArray(yy,&y);
871: for (i=0; i<m; i++) {
872: idx = a->j + a->i[i] ;
873: v = a->a + a->i[i] ;
874: n = a->i[i+1] - a->i[i];
875: alpha1 = x[6*i];
876: alpha2 = x[6*i+1];
877: alpha3 = x[6*i+2];
878: alpha4 = x[6*i+3];
879: alpha5 = x[6*i+4];
880: alpha6 = x[6*i+5];
881: while (n-->0) {
882: y[6*(*idx)] += alpha1*(*v);
883: y[6*(*idx)+1] += alpha2*(*v);
884: y[6*(*idx)+2] += alpha3*(*v);
885: y[6*(*idx)+3] += alpha4*(*v);
886: y[6*(*idx)+4] += alpha5*(*v);
887: y[6*(*idx)+5] += alpha6*(*v);
888: idx++; v++;
889: }
890: }
891: PetscLogFlops(12*a->nz);
892: VecRestoreArray(xx,&x);
893: VecRestoreArray(yy,&y);
894: return(0);
895: }
899: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
900: {
901: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
902: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
903: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
905: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
906: PetscInt n,i,jrow,j;
909: if (yy != zz) {VecCopy(yy,zz);}
910: VecGetArray(xx,&x);
911: VecGetArray(zz,&y);
912: idx = a->j;
913: v = a->a;
914: ii = a->i;
916: for (i=0; i<m; i++) {
917: jrow = ii[i];
918: n = ii[i+1] - jrow;
919: sum1 = 0.0;
920: sum2 = 0.0;
921: sum3 = 0.0;
922: sum4 = 0.0;
923: sum5 = 0.0;
924: sum6 = 0.0;
925: for (j=0; j<n; j++) {
926: sum1 += v[jrow]*x[6*idx[jrow]];
927: sum2 += v[jrow]*x[6*idx[jrow]+1];
928: sum3 += v[jrow]*x[6*idx[jrow]+2];
929: sum4 += v[jrow]*x[6*idx[jrow]+3];
930: sum5 += v[jrow]*x[6*idx[jrow]+4];
931: sum6 += v[jrow]*x[6*idx[jrow]+5];
932: jrow++;
933: }
934: y[6*i] += sum1;
935: y[6*i+1] += sum2;
936: y[6*i+2] += sum3;
937: y[6*i+3] += sum4;
938: y[6*i+4] += sum5;
939: y[6*i+5] += sum6;
940: }
942: PetscLogFlops(12*a->nz);
943: VecRestoreArray(xx,&x);
944: VecRestoreArray(zz,&y);
945: return(0);
946: }
950: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
951: {
952: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
953: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
954: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
956: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
959: if (yy != zz) {VecCopy(yy,zz);}
960: VecGetArray(xx,&x);
961: VecGetArray(zz,&y);
962:
963: for (i=0; i<m; i++) {
964: idx = a->j + a->i[i] ;
965: v = a->a + a->i[i] ;
966: n = a->i[i+1] - a->i[i];
967: alpha1 = x[6*i];
968: alpha2 = x[6*i+1];
969: alpha3 = x[6*i+2];
970: alpha4 = x[6*i+3];
971: alpha5 = x[6*i+4];
972: alpha6 = x[6*i+5];
973: while (n-->0) {
974: y[6*(*idx)] += alpha1*(*v);
975: y[6*(*idx)+1] += alpha2*(*v);
976: y[6*(*idx)+2] += alpha3*(*v);
977: y[6*(*idx)+3] += alpha4*(*v);
978: y[6*(*idx)+4] += alpha5*(*v);
979: y[6*(*idx)+5] += alpha6*(*v);
980: idx++; v++;
981: }
982: }
983: PetscLogFlops(12*a->nz);
984: VecRestoreArray(xx,&x);
985: VecRestoreArray(zz,&y);
986: return(0);
987: }
989: /* ------------------------------------------------------------------------------*/
992: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
993: {
994: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
995: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
996: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
998: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
999: PetscInt n,i,jrow,j;
1002: VecGetArray(xx,&x);
1003: VecGetArray(yy,&y);
1004: idx = a->j;
1005: v = a->a;
1006: ii = a->i;
1008: for (i=0; i<m; i++) {
1009: jrow = ii[i];
1010: n = ii[i+1] - jrow;
1011: sum1 = 0.0;
1012: sum2 = 0.0;
1013: sum3 = 0.0;
1014: sum4 = 0.0;
1015: sum5 = 0.0;
1016: sum6 = 0.0;
1017: sum7 = 0.0;
1018: nonzerorow += (n>0);
1019: for (j=0; j<n; j++) {
1020: sum1 += v[jrow]*x[7*idx[jrow]];
1021: sum2 += v[jrow]*x[7*idx[jrow]+1];
1022: sum3 += v[jrow]*x[7*idx[jrow]+2];
1023: sum4 += v[jrow]*x[7*idx[jrow]+3];
1024: sum5 += v[jrow]*x[7*idx[jrow]+4];
1025: sum6 += v[jrow]*x[7*idx[jrow]+5];
1026: sum7 += v[jrow]*x[7*idx[jrow]+6];
1027: jrow++;
1028: }
1029: y[7*i] = sum1;
1030: y[7*i+1] = sum2;
1031: y[7*i+2] = sum3;
1032: y[7*i+3] = sum4;
1033: y[7*i+4] = sum5;
1034: y[7*i+5] = sum6;
1035: y[7*i+6] = sum7;
1036: }
1038: PetscLogFlops(14*a->nz - 7*nonzerorow);
1039: VecRestoreArray(xx,&x);
1040: VecRestoreArray(yy,&y);
1041: return(0);
1042: }
1046: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1047: {
1048: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1049: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1050: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1052: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1055: VecSet(yy,zero);
1056: VecGetArray(xx,&x);
1057: VecGetArray(yy,&y);
1059: for (i=0; i<m; i++) {
1060: idx = a->j + a->i[i] ;
1061: v = a->a + a->i[i] ;
1062: n = a->i[i+1] - a->i[i];
1063: alpha1 = x[7*i];
1064: alpha2 = x[7*i+1];
1065: alpha3 = x[7*i+2];
1066: alpha4 = x[7*i+3];
1067: alpha5 = x[7*i+4];
1068: alpha6 = x[7*i+5];
1069: alpha7 = x[7*i+6];
1070: while (n-->0) {
1071: y[7*(*idx)] += alpha1*(*v);
1072: y[7*(*idx)+1] += alpha2*(*v);
1073: y[7*(*idx)+2] += alpha3*(*v);
1074: y[7*(*idx)+3] += alpha4*(*v);
1075: y[7*(*idx)+4] += alpha5*(*v);
1076: y[7*(*idx)+5] += alpha6*(*v);
1077: y[7*(*idx)+6] += alpha7*(*v);
1078: idx++; v++;
1079: }
1080: }
1081: PetscLogFlops(14*a->nz);
1082: VecRestoreArray(xx,&x);
1083: VecRestoreArray(yy,&y);
1084: return(0);
1085: }
1089: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1090: {
1091: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1092: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1093: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1095: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1096: PetscInt n,i,jrow,j;
1099: if (yy != zz) {VecCopy(yy,zz);}
1100: VecGetArray(xx,&x);
1101: VecGetArray(zz,&y);
1102: idx = a->j;
1103: v = a->a;
1104: ii = a->i;
1106: for (i=0; i<m; i++) {
1107: jrow = ii[i];
1108: n = ii[i+1] - jrow;
1109: sum1 = 0.0;
1110: sum2 = 0.0;
1111: sum3 = 0.0;
1112: sum4 = 0.0;
1113: sum5 = 0.0;
1114: sum6 = 0.0;
1115: sum7 = 0.0;
1116: for (j=0; j<n; j++) {
1117: sum1 += v[jrow]*x[7*idx[jrow]];
1118: sum2 += v[jrow]*x[7*idx[jrow]+1];
1119: sum3 += v[jrow]*x[7*idx[jrow]+2];
1120: sum4 += v[jrow]*x[7*idx[jrow]+3];
1121: sum5 += v[jrow]*x[7*idx[jrow]+4];
1122: sum6 += v[jrow]*x[7*idx[jrow]+5];
1123: sum7 += v[jrow]*x[7*idx[jrow]+6];
1124: jrow++;
1125: }
1126: y[7*i] += sum1;
1127: y[7*i+1] += sum2;
1128: y[7*i+2] += sum3;
1129: y[7*i+3] += sum4;
1130: y[7*i+4] += sum5;
1131: y[7*i+5] += sum6;
1132: y[7*i+6] += sum7;
1133: }
1135: PetscLogFlops(14*a->nz);
1136: VecRestoreArray(xx,&x);
1137: VecRestoreArray(zz,&y);
1138: return(0);
1139: }
1143: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1144: {
1145: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1146: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1147: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1149: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1152: if (yy != zz) {VecCopy(yy,zz);}
1153: VecGetArray(xx,&x);
1154: VecGetArray(zz,&y);
1155: for (i=0; i<m; i++) {
1156: idx = a->j + a->i[i] ;
1157: v = a->a + a->i[i] ;
1158: n = a->i[i+1] - a->i[i];
1159: alpha1 = x[7*i];
1160: alpha2 = x[7*i+1];
1161: alpha3 = x[7*i+2];
1162: alpha4 = x[7*i+3];
1163: alpha5 = x[7*i+4];
1164: alpha6 = x[7*i+5];
1165: alpha7 = x[7*i+6];
1166: while (n-->0) {
1167: y[7*(*idx)] += alpha1*(*v);
1168: y[7*(*idx)+1] += alpha2*(*v);
1169: y[7*(*idx)+2] += alpha3*(*v);
1170: y[7*(*idx)+3] += alpha4*(*v);
1171: y[7*(*idx)+4] += alpha5*(*v);
1172: y[7*(*idx)+5] += alpha6*(*v);
1173: y[7*(*idx)+6] += alpha7*(*v);
1174: idx++; v++;
1175: }
1176: }
1177: PetscLogFlops(14*a->nz);
1178: VecRestoreArray(xx,&x);
1179: VecRestoreArray(zz,&y);
1180: return(0);
1181: }
1185: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1186: {
1187: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1188: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1189: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1191: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1192: PetscInt n,i,jrow,j;
1195: VecGetArray(xx,&x);
1196: VecGetArray(yy,&y);
1197: idx = a->j;
1198: v = a->a;
1199: ii = a->i;
1201: for (i=0; i<m; i++) {
1202: jrow = ii[i];
1203: n = ii[i+1] - jrow;
1204: sum1 = 0.0;
1205: sum2 = 0.0;
1206: sum3 = 0.0;
1207: sum4 = 0.0;
1208: sum5 = 0.0;
1209: sum6 = 0.0;
1210: sum7 = 0.0;
1211: sum8 = 0.0;
1212: nonzerorow += (n>0);
1213: for (j=0; j<n; j++) {
1214: sum1 += v[jrow]*x[8*idx[jrow]];
1215: sum2 += v[jrow]*x[8*idx[jrow]+1];
1216: sum3 += v[jrow]*x[8*idx[jrow]+2];
1217: sum4 += v[jrow]*x[8*idx[jrow]+3];
1218: sum5 += v[jrow]*x[8*idx[jrow]+4];
1219: sum6 += v[jrow]*x[8*idx[jrow]+5];
1220: sum7 += v[jrow]*x[8*idx[jrow]+6];
1221: sum8 += v[jrow]*x[8*idx[jrow]+7];
1222: jrow++;
1223: }
1224: y[8*i] = sum1;
1225: y[8*i+1] = sum2;
1226: y[8*i+2] = sum3;
1227: y[8*i+3] = sum4;
1228: y[8*i+4] = sum5;
1229: y[8*i+5] = sum6;
1230: y[8*i+6] = sum7;
1231: y[8*i+7] = sum8;
1232: }
1234: PetscLogFlops(16*a->nz - 8*nonzerorow);
1235: VecRestoreArray(xx,&x);
1236: VecRestoreArray(yy,&y);
1237: return(0);
1238: }
1242: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1243: {
1244: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1245: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1246: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1248: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1251: VecSet(yy,zero);
1252: VecGetArray(xx,&x);
1253: VecGetArray(yy,&y);
1255: for (i=0; i<m; i++) {
1256: idx = a->j + a->i[i] ;
1257: v = a->a + a->i[i] ;
1258: n = a->i[i+1] - a->i[i];
1259: alpha1 = x[8*i];
1260: alpha2 = x[8*i+1];
1261: alpha3 = x[8*i+2];
1262: alpha4 = x[8*i+3];
1263: alpha5 = x[8*i+4];
1264: alpha6 = x[8*i+5];
1265: alpha7 = x[8*i+6];
1266: alpha8 = x[8*i+7];
1267: while (n-->0) {
1268: y[8*(*idx)] += alpha1*(*v);
1269: y[8*(*idx)+1] += alpha2*(*v);
1270: y[8*(*idx)+2] += alpha3*(*v);
1271: y[8*(*idx)+3] += alpha4*(*v);
1272: y[8*(*idx)+4] += alpha5*(*v);
1273: y[8*(*idx)+5] += alpha6*(*v);
1274: y[8*(*idx)+6] += alpha7*(*v);
1275: y[8*(*idx)+7] += alpha8*(*v);
1276: idx++; v++;
1277: }
1278: }
1279: PetscLogFlops(16*a->nz);
1280: VecRestoreArray(xx,&x);
1281: VecRestoreArray(yy,&y);
1282: return(0);
1283: }
1287: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1288: {
1289: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1290: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1291: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1293: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1294: PetscInt n,i,jrow,j;
1297: if (yy != zz) {VecCopy(yy,zz);}
1298: VecGetArray(xx,&x);
1299: VecGetArray(zz,&y);
1300: idx = a->j;
1301: v = a->a;
1302: ii = a->i;
1304: for (i=0; i<m; i++) {
1305: jrow = ii[i];
1306: n = ii[i+1] - jrow;
1307: sum1 = 0.0;
1308: sum2 = 0.0;
1309: sum3 = 0.0;
1310: sum4 = 0.0;
1311: sum5 = 0.0;
1312: sum6 = 0.0;
1313: sum7 = 0.0;
1314: sum8 = 0.0;
1315: for (j=0; j<n; j++) {
1316: sum1 += v[jrow]*x[8*idx[jrow]];
1317: sum2 += v[jrow]*x[8*idx[jrow]+1];
1318: sum3 += v[jrow]*x[8*idx[jrow]+2];
1319: sum4 += v[jrow]*x[8*idx[jrow]+3];
1320: sum5 += v[jrow]*x[8*idx[jrow]+4];
1321: sum6 += v[jrow]*x[8*idx[jrow]+5];
1322: sum7 += v[jrow]*x[8*idx[jrow]+6];
1323: sum8 += v[jrow]*x[8*idx[jrow]+7];
1324: jrow++;
1325: }
1326: y[8*i] += sum1;
1327: y[8*i+1] += sum2;
1328: y[8*i+2] += sum3;
1329: y[8*i+3] += sum4;
1330: y[8*i+4] += sum5;
1331: y[8*i+5] += sum6;
1332: y[8*i+6] += sum7;
1333: y[8*i+7] += sum8;
1334: }
1336: PetscLogFlops(16*a->nz);
1337: VecRestoreArray(xx,&x);
1338: VecRestoreArray(zz,&y);
1339: return(0);
1340: }
1344: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1345: {
1346: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1347: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1348: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1350: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1353: if (yy != zz) {VecCopy(yy,zz);}
1354: VecGetArray(xx,&x);
1355: VecGetArray(zz,&y);
1356: for (i=0; i<m; i++) {
1357: idx = a->j + a->i[i] ;
1358: v = a->a + a->i[i] ;
1359: n = a->i[i+1] - a->i[i];
1360: alpha1 = x[8*i];
1361: alpha2 = x[8*i+1];
1362: alpha3 = x[8*i+2];
1363: alpha4 = x[8*i+3];
1364: alpha5 = x[8*i+4];
1365: alpha6 = x[8*i+5];
1366: alpha7 = x[8*i+6];
1367: alpha8 = x[8*i+7];
1368: while (n-->0) {
1369: y[8*(*idx)] += alpha1*(*v);
1370: y[8*(*idx)+1] += alpha2*(*v);
1371: y[8*(*idx)+2] += alpha3*(*v);
1372: y[8*(*idx)+3] += alpha4*(*v);
1373: y[8*(*idx)+4] += alpha5*(*v);
1374: y[8*(*idx)+5] += alpha6*(*v);
1375: y[8*(*idx)+6] += alpha7*(*v);
1376: y[8*(*idx)+7] += alpha8*(*v);
1377: idx++; v++;
1378: }
1379: }
1380: PetscLogFlops(16*a->nz);
1381: VecRestoreArray(xx,&x);
1382: VecRestoreArray(zz,&y);
1383: return(0);
1384: }
1386: /* ------------------------------------------------------------------------------*/
1389: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1390: {
1391: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1392: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1393: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1395: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1396: PetscInt n,i,jrow,j;
1399: VecGetArray(xx,&x);
1400: VecGetArray(yy,&y);
1401: idx = a->j;
1402: v = a->a;
1403: ii = a->i;
1405: for (i=0; i<m; i++) {
1406: jrow = ii[i];
1407: n = ii[i+1] - jrow;
1408: sum1 = 0.0;
1409: sum2 = 0.0;
1410: sum3 = 0.0;
1411: sum4 = 0.0;
1412: sum5 = 0.0;
1413: sum6 = 0.0;
1414: sum7 = 0.0;
1415: sum8 = 0.0;
1416: sum9 = 0.0;
1417: nonzerorow += (n>0);
1418: for (j=0; j<n; j++) {
1419: sum1 += v[jrow]*x[9*idx[jrow]];
1420: sum2 += v[jrow]*x[9*idx[jrow]+1];
1421: sum3 += v[jrow]*x[9*idx[jrow]+2];
1422: sum4 += v[jrow]*x[9*idx[jrow]+3];
1423: sum5 += v[jrow]*x[9*idx[jrow]+4];
1424: sum6 += v[jrow]*x[9*idx[jrow]+5];
1425: sum7 += v[jrow]*x[9*idx[jrow]+6];
1426: sum8 += v[jrow]*x[9*idx[jrow]+7];
1427: sum9 += v[jrow]*x[9*idx[jrow]+8];
1428: jrow++;
1429: }
1430: y[9*i] = sum1;
1431: y[9*i+1] = sum2;
1432: y[9*i+2] = sum3;
1433: y[9*i+3] = sum4;
1434: y[9*i+4] = sum5;
1435: y[9*i+5] = sum6;
1436: y[9*i+6] = sum7;
1437: y[9*i+7] = sum8;
1438: y[9*i+8] = sum9;
1439: }
1441: PetscLogFlops(18*a->nz - 9*nonzerorow);
1442: VecRestoreArray(xx,&x);
1443: VecRestoreArray(yy,&y);
1444: return(0);
1445: }
1447: /* ------------------------------------------------------------------------------*/
1451: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1452: {
1453: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1454: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1455: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1457: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1460: VecSet(yy,zero);
1461: VecGetArray(xx,&x);
1462: VecGetArray(yy,&y);
1464: for (i=0; i<m; i++) {
1465: idx = a->j + a->i[i] ;
1466: v = a->a + a->i[i] ;
1467: n = a->i[i+1] - a->i[i];
1468: alpha1 = x[9*i];
1469: alpha2 = x[9*i+1];
1470: alpha3 = x[9*i+2];
1471: alpha4 = x[9*i+3];
1472: alpha5 = x[9*i+4];
1473: alpha6 = x[9*i+5];
1474: alpha7 = x[9*i+6];
1475: alpha8 = x[9*i+7];
1476: alpha9 = x[9*i+8];
1477: while (n-->0) {
1478: y[9*(*idx)] += alpha1*(*v);
1479: y[9*(*idx)+1] += alpha2*(*v);
1480: y[9*(*idx)+2] += alpha3*(*v);
1481: y[9*(*idx)+3] += alpha4*(*v);
1482: y[9*(*idx)+4] += alpha5*(*v);
1483: y[9*(*idx)+5] += alpha6*(*v);
1484: y[9*(*idx)+6] += alpha7*(*v);
1485: y[9*(*idx)+7] += alpha8*(*v);
1486: y[9*(*idx)+8] += alpha9*(*v);
1487: idx++; v++;
1488: }
1489: }
1490: PetscLogFlops(18*a->nz);
1491: VecRestoreArray(xx,&x);
1492: VecRestoreArray(yy,&y);
1493: return(0);
1494: }
1498: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1499: {
1500: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1501: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1502: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1504: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1505: PetscInt n,i,jrow,j;
1508: if (yy != zz) {VecCopy(yy,zz);}
1509: VecGetArray(xx,&x);
1510: VecGetArray(zz,&y);
1511: idx = a->j;
1512: v = a->a;
1513: ii = a->i;
1515: for (i=0; i<m; i++) {
1516: jrow = ii[i];
1517: n = ii[i+1] - jrow;
1518: sum1 = 0.0;
1519: sum2 = 0.0;
1520: sum3 = 0.0;
1521: sum4 = 0.0;
1522: sum5 = 0.0;
1523: sum6 = 0.0;
1524: sum7 = 0.0;
1525: sum8 = 0.0;
1526: sum9 = 0.0;
1527: for (j=0; j<n; j++) {
1528: sum1 += v[jrow]*x[9*idx[jrow]];
1529: sum2 += v[jrow]*x[9*idx[jrow]+1];
1530: sum3 += v[jrow]*x[9*idx[jrow]+2];
1531: sum4 += v[jrow]*x[9*idx[jrow]+3];
1532: sum5 += v[jrow]*x[9*idx[jrow]+4];
1533: sum6 += v[jrow]*x[9*idx[jrow]+5];
1534: sum7 += v[jrow]*x[9*idx[jrow]+6];
1535: sum8 += v[jrow]*x[9*idx[jrow]+7];
1536: sum9 += v[jrow]*x[9*idx[jrow]+8];
1537: jrow++;
1538: }
1539: y[9*i] += sum1;
1540: y[9*i+1] += sum2;
1541: y[9*i+2] += sum3;
1542: y[9*i+3] += sum4;
1543: y[9*i+4] += sum5;
1544: y[9*i+5] += sum6;
1545: y[9*i+6] += sum7;
1546: y[9*i+7] += sum8;
1547: y[9*i+8] += sum9;
1548: }
1550: PetscLogFlops(18*a->nz);
1551: VecRestoreArray(xx,&x);
1552: VecRestoreArray(zz,&y);
1553: return(0);
1554: }
1558: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1559: {
1560: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1561: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1562: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1564: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1567: if (yy != zz) {VecCopy(yy,zz);}
1568: VecGetArray(xx,&x);
1569: VecGetArray(zz,&y);
1570: for (i=0; i<m; i++) {
1571: idx = a->j + a->i[i] ;
1572: v = a->a + a->i[i] ;
1573: n = a->i[i+1] - a->i[i];
1574: alpha1 = x[9*i];
1575: alpha2 = x[9*i+1];
1576: alpha3 = x[9*i+2];
1577: alpha4 = x[9*i+3];
1578: alpha5 = x[9*i+4];
1579: alpha6 = x[9*i+5];
1580: alpha7 = x[9*i+6];
1581: alpha8 = x[9*i+7];
1582: alpha9 = x[9*i+8];
1583: while (n-->0) {
1584: y[9*(*idx)] += alpha1*(*v);
1585: y[9*(*idx)+1] += alpha2*(*v);
1586: y[9*(*idx)+2] += alpha3*(*v);
1587: y[9*(*idx)+3] += alpha4*(*v);
1588: y[9*(*idx)+4] += alpha5*(*v);
1589: y[9*(*idx)+5] += alpha6*(*v);
1590: y[9*(*idx)+6] += alpha7*(*v);
1591: y[9*(*idx)+7] += alpha8*(*v);
1592: y[9*(*idx)+8] += alpha9*(*v);
1593: idx++; v++;
1594: }
1595: }
1596: PetscLogFlops(18*a->nz);
1597: VecRestoreArray(xx,&x);
1598: VecRestoreArray(zz,&y);
1599: return(0);
1600: }
1601: /*--------------------------------------------------------------------------------------------*/
1604: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1605: {
1606: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1607: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1608: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1610: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1611: PetscInt n,i,jrow,j;
1614: VecGetArray(xx,&x);
1615: VecGetArray(yy,&y);
1616: idx = a->j;
1617: v = a->a;
1618: ii = a->i;
1620: for (i=0; i<m; i++) {
1621: jrow = ii[i];
1622: n = ii[i+1] - jrow;
1623: sum1 = 0.0;
1624: sum2 = 0.0;
1625: sum3 = 0.0;
1626: sum4 = 0.0;
1627: sum5 = 0.0;
1628: sum6 = 0.0;
1629: sum7 = 0.0;
1630: sum8 = 0.0;
1631: sum9 = 0.0;
1632: sum10 = 0.0;
1633: nonzerorow += (n>0);
1634: for (j=0; j<n; j++) {
1635: sum1 += v[jrow]*x[10*idx[jrow]];
1636: sum2 += v[jrow]*x[10*idx[jrow]+1];
1637: sum3 += v[jrow]*x[10*idx[jrow]+2];
1638: sum4 += v[jrow]*x[10*idx[jrow]+3];
1639: sum5 += v[jrow]*x[10*idx[jrow]+4];
1640: sum6 += v[jrow]*x[10*idx[jrow]+5];
1641: sum7 += v[jrow]*x[10*idx[jrow]+6];
1642: sum8 += v[jrow]*x[10*idx[jrow]+7];
1643: sum9 += v[jrow]*x[10*idx[jrow]+8];
1644: sum10 += v[jrow]*x[10*idx[jrow]+9];
1645: jrow++;
1646: }
1647: y[10*i] = sum1;
1648: y[10*i+1] = sum2;
1649: y[10*i+2] = sum3;
1650: y[10*i+3] = sum4;
1651: y[10*i+4] = sum5;
1652: y[10*i+5] = sum6;
1653: y[10*i+6] = sum7;
1654: y[10*i+7] = sum8;
1655: y[10*i+8] = sum9;
1656: y[10*i+9] = sum10;
1657: }
1659: PetscLogFlops(20*a->nz - 10*nonzerorow);
1660: VecRestoreArray(xx,&x);
1661: VecRestoreArray(yy,&y);
1662: return(0);
1663: }
1667: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1668: {
1669: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1670: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1671: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1673: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1674: PetscInt n,i,jrow,j;
1677: if (yy != zz) {VecCopy(yy,zz);}
1678: VecGetArray(xx,&x);
1679: VecGetArray(zz,&y);
1680: idx = a->j;
1681: v = a->a;
1682: ii = a->i;
1684: for (i=0; i<m; i++) {
1685: jrow = ii[i];
1686: n = ii[i+1] - jrow;
1687: sum1 = 0.0;
1688: sum2 = 0.0;
1689: sum3 = 0.0;
1690: sum4 = 0.0;
1691: sum5 = 0.0;
1692: sum6 = 0.0;
1693: sum7 = 0.0;
1694: sum8 = 0.0;
1695: sum9 = 0.0;
1696: sum10 = 0.0;
1697: for (j=0; j<n; j++) {
1698: sum1 += v[jrow]*x[10*idx[jrow]];
1699: sum2 += v[jrow]*x[10*idx[jrow]+1];
1700: sum3 += v[jrow]*x[10*idx[jrow]+2];
1701: sum4 += v[jrow]*x[10*idx[jrow]+3];
1702: sum5 += v[jrow]*x[10*idx[jrow]+4];
1703: sum6 += v[jrow]*x[10*idx[jrow]+5];
1704: sum7 += v[jrow]*x[10*idx[jrow]+6];
1705: sum8 += v[jrow]*x[10*idx[jrow]+7];
1706: sum9 += v[jrow]*x[10*idx[jrow]+8];
1707: sum10 += v[jrow]*x[10*idx[jrow]+9];
1708: jrow++;
1709: }
1710: y[10*i] += sum1;
1711: y[10*i+1] += sum2;
1712: y[10*i+2] += sum3;
1713: y[10*i+3] += sum4;
1714: y[10*i+4] += sum5;
1715: y[10*i+5] += sum6;
1716: y[10*i+6] += sum7;
1717: y[10*i+7] += sum8;
1718: y[10*i+8] += sum9;
1719: y[10*i+9] += sum10;
1720: }
1722: PetscLogFlops(20*a->nz);
1723: VecRestoreArray(xx,&x);
1724: VecRestoreArray(yy,&y);
1725: return(0);
1726: }
1730: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1731: {
1732: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1733: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1734: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1736: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1739: VecSet(yy,zero);
1740: VecGetArray(xx,&x);
1741: VecGetArray(yy,&y);
1743: for (i=0; i<m; i++) {
1744: idx = a->j + a->i[i] ;
1745: v = a->a + a->i[i] ;
1746: n = a->i[i+1] - a->i[i];
1747: alpha1 = x[10*i];
1748: alpha2 = x[10*i+1];
1749: alpha3 = x[10*i+2];
1750: alpha4 = x[10*i+3];
1751: alpha5 = x[10*i+4];
1752: alpha6 = x[10*i+5];
1753: alpha7 = x[10*i+6];
1754: alpha8 = x[10*i+7];
1755: alpha9 = x[10*i+8];
1756: alpha10 = x[10*i+9];
1757: while (n-->0) {
1758: y[10*(*idx)] += alpha1*(*v);
1759: y[10*(*idx)+1] += alpha2*(*v);
1760: y[10*(*idx)+2] += alpha3*(*v);
1761: y[10*(*idx)+3] += alpha4*(*v);
1762: y[10*(*idx)+4] += alpha5*(*v);
1763: y[10*(*idx)+5] += alpha6*(*v);
1764: y[10*(*idx)+6] += alpha7*(*v);
1765: y[10*(*idx)+7] += alpha8*(*v);
1766: y[10*(*idx)+8] += alpha9*(*v);
1767: y[10*(*idx)+9] += alpha10*(*v);
1768: idx++; v++;
1769: }
1770: }
1771: PetscLogFlops(20*a->nz);
1772: VecRestoreArray(xx,&x);
1773: VecRestoreArray(yy,&y);
1774: return(0);
1775: }
1779: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1780: {
1781: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1782: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1783: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1785: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1788: if (yy != zz) {VecCopy(yy,zz);}
1789: VecGetArray(xx,&x);
1790: VecGetArray(zz,&y);
1791: for (i=0; i<m; i++) {
1792: idx = a->j + a->i[i] ;
1793: v = a->a + a->i[i] ;
1794: n = a->i[i+1] - a->i[i];
1795: alpha1 = x[10*i];
1796: alpha2 = x[10*i+1];
1797: alpha3 = x[10*i+2];
1798: alpha4 = x[10*i+3];
1799: alpha5 = x[10*i+4];
1800: alpha6 = x[10*i+5];
1801: alpha7 = x[10*i+6];
1802: alpha8 = x[10*i+7];
1803: alpha9 = x[10*i+8];
1804: alpha10 = x[10*i+9];
1805: while (n-->0) {
1806: y[10*(*idx)] += alpha1*(*v);
1807: y[10*(*idx)+1] += alpha2*(*v);
1808: y[10*(*idx)+2] += alpha3*(*v);
1809: y[10*(*idx)+3] += alpha4*(*v);
1810: y[10*(*idx)+4] += alpha5*(*v);
1811: y[10*(*idx)+5] += alpha6*(*v);
1812: y[10*(*idx)+6] += alpha7*(*v);
1813: y[10*(*idx)+7] += alpha8*(*v);
1814: y[10*(*idx)+8] += alpha9*(*v);
1815: y[10*(*idx)+9] += alpha10*(*v);
1816: idx++; v++;
1817: }
1818: }
1819: PetscLogFlops(20*a->nz);
1820: VecRestoreArray(xx,&x);
1821: VecRestoreArray(zz,&y);
1822: return(0);
1823: }
1826: /*--------------------------------------------------------------------------------------------*/
1829: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1830: {
1831: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1832: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1833: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1834: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1836: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1837: PetscInt n,i,jrow,j;
1840: VecGetArray(xx,&x);
1841: VecGetArray(yy,&y);
1842: idx = a->j;
1843: v = a->a;
1844: ii = a->i;
1846: for (i=0; i<m; i++) {
1847: jrow = ii[i];
1848: n = ii[i+1] - jrow;
1849: sum1 = 0.0;
1850: sum2 = 0.0;
1851: sum3 = 0.0;
1852: sum4 = 0.0;
1853: sum5 = 0.0;
1854: sum6 = 0.0;
1855: sum7 = 0.0;
1856: sum8 = 0.0;
1857: sum9 = 0.0;
1858: sum10 = 0.0;
1859: sum11 = 0.0;
1860: sum12 = 0.0;
1861: sum13 = 0.0;
1862: sum14 = 0.0;
1863: sum15 = 0.0;
1864: sum16 = 0.0;
1865: nonzerorow += (n>0);
1866: for (j=0; j<n; j++) {
1867: sum1 += v[jrow]*x[16*idx[jrow]];
1868: sum2 += v[jrow]*x[16*idx[jrow]+1];
1869: sum3 += v[jrow]*x[16*idx[jrow]+2];
1870: sum4 += v[jrow]*x[16*idx[jrow]+3];
1871: sum5 += v[jrow]*x[16*idx[jrow]+4];
1872: sum6 += v[jrow]*x[16*idx[jrow]+5];
1873: sum7 += v[jrow]*x[16*idx[jrow]+6];
1874: sum8 += v[jrow]*x[16*idx[jrow]+7];
1875: sum9 += v[jrow]*x[16*idx[jrow]+8];
1876: sum10 += v[jrow]*x[16*idx[jrow]+9];
1877: sum11 += v[jrow]*x[16*idx[jrow]+10];
1878: sum12 += v[jrow]*x[16*idx[jrow]+11];
1879: sum13 += v[jrow]*x[16*idx[jrow]+12];
1880: sum14 += v[jrow]*x[16*idx[jrow]+13];
1881: sum15 += v[jrow]*x[16*idx[jrow]+14];
1882: sum16 += v[jrow]*x[16*idx[jrow]+15];
1883: jrow++;
1884: }
1885: y[16*i] = sum1;
1886: y[16*i+1] = sum2;
1887: y[16*i+2] = sum3;
1888: y[16*i+3] = sum4;
1889: y[16*i+4] = sum5;
1890: y[16*i+5] = sum6;
1891: y[16*i+6] = sum7;
1892: y[16*i+7] = sum8;
1893: y[16*i+8] = sum9;
1894: y[16*i+9] = sum10;
1895: y[16*i+10] = sum11;
1896: y[16*i+11] = sum12;
1897: y[16*i+12] = sum13;
1898: y[16*i+13] = sum14;
1899: y[16*i+14] = sum15;
1900: y[16*i+15] = sum16;
1901: }
1903: PetscLogFlops(32*a->nz - 16*nonzerorow);
1904: VecRestoreArray(xx,&x);
1905: VecRestoreArray(yy,&y);
1906: return(0);
1907: }
1911: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1912: {
1913: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1914: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1915: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1916: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1918: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1921: VecSet(yy,zero);
1922: VecGetArray(xx,&x);
1923: VecGetArray(yy,&y);
1925: for (i=0; i<m; i++) {
1926: idx = a->j + a->i[i] ;
1927: v = a->a + a->i[i] ;
1928: n = a->i[i+1] - a->i[i];
1929: alpha1 = x[16*i];
1930: alpha2 = x[16*i+1];
1931: alpha3 = x[16*i+2];
1932: alpha4 = x[16*i+3];
1933: alpha5 = x[16*i+4];
1934: alpha6 = x[16*i+5];
1935: alpha7 = x[16*i+6];
1936: alpha8 = x[16*i+7];
1937: alpha9 = x[16*i+8];
1938: alpha10 = x[16*i+9];
1939: alpha11 = x[16*i+10];
1940: alpha12 = x[16*i+11];
1941: alpha13 = x[16*i+12];
1942: alpha14 = x[16*i+13];
1943: alpha15 = x[16*i+14];
1944: alpha16 = x[16*i+15];
1945: while (n-->0) {
1946: y[16*(*idx)] += alpha1*(*v);
1947: y[16*(*idx)+1] += alpha2*(*v);
1948: y[16*(*idx)+2] += alpha3*(*v);
1949: y[16*(*idx)+3] += alpha4*(*v);
1950: y[16*(*idx)+4] += alpha5*(*v);
1951: y[16*(*idx)+5] += alpha6*(*v);
1952: y[16*(*idx)+6] += alpha7*(*v);
1953: y[16*(*idx)+7] += alpha8*(*v);
1954: y[16*(*idx)+8] += alpha9*(*v);
1955: y[16*(*idx)+9] += alpha10*(*v);
1956: y[16*(*idx)+10] += alpha11*(*v);
1957: y[16*(*idx)+11] += alpha12*(*v);
1958: y[16*(*idx)+12] += alpha13*(*v);
1959: y[16*(*idx)+13] += alpha14*(*v);
1960: y[16*(*idx)+14] += alpha15*(*v);
1961: y[16*(*idx)+15] += alpha16*(*v);
1962: idx++; v++;
1963: }
1964: }
1965: PetscLogFlops(32*a->nz);
1966: VecRestoreArray(xx,&x);
1967: VecRestoreArray(yy,&y);
1968: return(0);
1969: }
1973: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1974: {
1975: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1976: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1977: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1978: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1980: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1981: PetscInt n,i,jrow,j;
1984: if (yy != zz) {VecCopy(yy,zz);}
1985: VecGetArray(xx,&x);
1986: VecGetArray(zz,&y);
1987: idx = a->j;
1988: v = a->a;
1989: ii = a->i;
1991: for (i=0; i<m; i++) {
1992: jrow = ii[i];
1993: n = ii[i+1] - jrow;
1994: sum1 = 0.0;
1995: sum2 = 0.0;
1996: sum3 = 0.0;
1997: sum4 = 0.0;
1998: sum5 = 0.0;
1999: sum6 = 0.0;
2000: sum7 = 0.0;
2001: sum8 = 0.0;
2002: sum9 = 0.0;
2003: sum10 = 0.0;
2004: sum11 = 0.0;
2005: sum12 = 0.0;
2006: sum13 = 0.0;
2007: sum14 = 0.0;
2008: sum15 = 0.0;
2009: sum16 = 0.0;
2010: for (j=0; j<n; j++) {
2011: sum1 += v[jrow]*x[16*idx[jrow]];
2012: sum2 += v[jrow]*x[16*idx[jrow]+1];
2013: sum3 += v[jrow]*x[16*idx[jrow]+2];
2014: sum4 += v[jrow]*x[16*idx[jrow]+3];
2015: sum5 += v[jrow]*x[16*idx[jrow]+4];
2016: sum6 += v[jrow]*x[16*idx[jrow]+5];
2017: sum7 += v[jrow]*x[16*idx[jrow]+6];
2018: sum8 += v[jrow]*x[16*idx[jrow]+7];
2019: sum9 += v[jrow]*x[16*idx[jrow]+8];
2020: sum10 += v[jrow]*x[16*idx[jrow]+9];
2021: sum11 += v[jrow]*x[16*idx[jrow]+10];
2022: sum12 += v[jrow]*x[16*idx[jrow]+11];
2023: sum13 += v[jrow]*x[16*idx[jrow]+12];
2024: sum14 += v[jrow]*x[16*idx[jrow]+13];
2025: sum15 += v[jrow]*x[16*idx[jrow]+14];
2026: sum16 += v[jrow]*x[16*idx[jrow]+15];
2027: jrow++;
2028: }
2029: y[16*i] += sum1;
2030: y[16*i+1] += sum2;
2031: y[16*i+2] += sum3;
2032: y[16*i+3] += sum4;
2033: y[16*i+4] += sum5;
2034: y[16*i+5] += sum6;
2035: y[16*i+6] += sum7;
2036: y[16*i+7] += sum8;
2037: y[16*i+8] += sum9;
2038: y[16*i+9] += sum10;
2039: y[16*i+10] += sum11;
2040: y[16*i+11] += sum12;
2041: y[16*i+12] += sum13;
2042: y[16*i+13] += sum14;
2043: y[16*i+14] += sum15;
2044: y[16*i+15] += sum16;
2045: }
2047: PetscLogFlops(32*a->nz);
2048: VecRestoreArray(xx,&x);
2049: VecRestoreArray(zz,&y);
2050: return(0);
2051: }
2055: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2056: {
2057: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2058: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2059: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2060: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2062: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
2065: if (yy != zz) {VecCopy(yy,zz);}
2066: VecGetArray(xx,&x);
2067: VecGetArray(zz,&y);
2068: for (i=0; i<m; i++) {
2069: idx = a->j + a->i[i] ;
2070: v = a->a + a->i[i] ;
2071: n = a->i[i+1] - a->i[i];
2072: alpha1 = x[16*i];
2073: alpha2 = x[16*i+1];
2074: alpha3 = x[16*i+2];
2075: alpha4 = x[16*i+3];
2076: alpha5 = x[16*i+4];
2077: alpha6 = x[16*i+5];
2078: alpha7 = x[16*i+6];
2079: alpha8 = x[16*i+7];
2080: alpha9 = x[16*i+8];
2081: alpha10 = x[16*i+9];
2082: alpha11 = x[16*i+10];
2083: alpha12 = x[16*i+11];
2084: alpha13 = x[16*i+12];
2085: alpha14 = x[16*i+13];
2086: alpha15 = x[16*i+14];
2087: alpha16 = x[16*i+15];
2088: while (n-->0) {
2089: y[16*(*idx)] += alpha1*(*v);
2090: y[16*(*idx)+1] += alpha2*(*v);
2091: y[16*(*idx)+2] += alpha3*(*v);
2092: y[16*(*idx)+3] += alpha4*(*v);
2093: y[16*(*idx)+4] += alpha5*(*v);
2094: y[16*(*idx)+5] += alpha6*(*v);
2095: y[16*(*idx)+6] += alpha7*(*v);
2096: y[16*(*idx)+7] += alpha8*(*v);
2097: y[16*(*idx)+8] += alpha9*(*v);
2098: y[16*(*idx)+9] += alpha10*(*v);
2099: y[16*(*idx)+10] += alpha11*(*v);
2100: y[16*(*idx)+11] += alpha12*(*v);
2101: y[16*(*idx)+12] += alpha13*(*v);
2102: y[16*(*idx)+13] += alpha14*(*v);
2103: y[16*(*idx)+14] += alpha15*(*v);
2104: y[16*(*idx)+15] += alpha16*(*v);
2105: idx++; v++;
2106: }
2107: }
2108: PetscLogFlops(32*a->nz);
2109: VecRestoreArray(xx,&x);
2110: VecRestoreArray(zz,&y);
2111: return(0);
2112: }
2114: /*--------------------------------------------------------------------------------------------*/
2117: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2118: {
2119: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2120: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2121: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2122: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2124: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
2125: PetscInt n,i,jrow,j;
2128: VecGetArray(xx,&x);
2129: VecGetArray(yy,&y);
2130: idx = a->j;
2131: v = a->a;
2132: ii = a->i;
2134: for (i=0; i<m; i++) {
2135: jrow = ii[i];
2136: n = ii[i+1] - jrow;
2137: sum1 = 0.0;
2138: sum2 = 0.0;
2139: sum3 = 0.0;
2140: sum4 = 0.0;
2141: sum5 = 0.0;
2142: sum6 = 0.0;
2143: sum7 = 0.0;
2144: sum8 = 0.0;
2145: sum9 = 0.0;
2146: sum10 = 0.0;
2147: sum11 = 0.0;
2148: sum12 = 0.0;
2149: sum13 = 0.0;
2150: sum14 = 0.0;
2151: sum15 = 0.0;
2152: sum16 = 0.0;
2153: sum17 = 0.0;
2154: sum18 = 0.0;
2155: nonzerorow += (n>0);
2156: for (j=0; j<n; j++) {
2157: sum1 += v[jrow]*x[18*idx[jrow]];
2158: sum2 += v[jrow]*x[18*idx[jrow]+1];
2159: sum3 += v[jrow]*x[18*idx[jrow]+2];
2160: sum4 += v[jrow]*x[18*idx[jrow]+3];
2161: sum5 += v[jrow]*x[18*idx[jrow]+4];
2162: sum6 += v[jrow]*x[18*idx[jrow]+5];
2163: sum7 += v[jrow]*x[18*idx[jrow]+6];
2164: sum8 += v[jrow]*x[18*idx[jrow]+7];
2165: sum9 += v[jrow]*x[18*idx[jrow]+8];
2166: sum10 += v[jrow]*x[18*idx[jrow]+9];
2167: sum11 += v[jrow]*x[18*idx[jrow]+10];
2168: sum12 += v[jrow]*x[18*idx[jrow]+11];
2169: sum13 += v[jrow]*x[18*idx[jrow]+12];
2170: sum14 += v[jrow]*x[18*idx[jrow]+13];
2171: sum15 += v[jrow]*x[18*idx[jrow]+14];
2172: sum16 += v[jrow]*x[18*idx[jrow]+15];
2173: sum17 += v[jrow]*x[18*idx[jrow]+16];
2174: sum18 += v[jrow]*x[18*idx[jrow]+17];
2175: jrow++;
2176: }
2177: y[18*i] = sum1;
2178: y[18*i+1] = sum2;
2179: y[18*i+2] = sum3;
2180: y[18*i+3] = sum4;
2181: y[18*i+4] = sum5;
2182: y[18*i+5] = sum6;
2183: y[18*i+6] = sum7;
2184: y[18*i+7] = sum8;
2185: y[18*i+8] = sum9;
2186: y[18*i+9] = sum10;
2187: y[18*i+10] = sum11;
2188: y[18*i+11] = sum12;
2189: y[18*i+12] = sum13;
2190: y[18*i+13] = sum14;
2191: y[18*i+14] = sum15;
2192: y[18*i+15] = sum16;
2193: y[18*i+16] = sum17;
2194: y[18*i+17] = sum18;
2195: }
2197: PetscLogFlops(36*a->nz - 18*nonzerorow);
2198: VecRestoreArray(xx,&x);
2199: VecRestoreArray(yy,&y);
2200: return(0);
2201: }
2205: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2206: {
2207: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2208: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2209: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
2210: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2212: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
2215: VecSet(yy,zero);
2216: VecGetArray(xx,&x);
2217: VecGetArray(yy,&y);
2219: for (i=0; i<m; i++) {
2220: idx = a->j + a->i[i] ;
2221: v = a->a + a->i[i] ;
2222: n = a->i[i+1] - a->i[i];
2223: alpha1 = x[18*i];
2224: alpha2 = x[18*i+1];
2225: alpha3 = x[18*i+2];
2226: alpha4 = x[18*i+3];
2227: alpha5 = x[18*i+4];
2228: alpha6 = x[18*i+5];
2229: alpha7 = x[18*i+6];
2230: alpha8 = x[18*i+7];
2231: alpha9 = x[18*i+8];
2232: alpha10 = x[18*i+9];
2233: alpha11 = x[18*i+10];
2234: alpha12 = x[18*i+11];
2235: alpha13 = x[18*i+12];
2236: alpha14 = x[18*i+13];
2237: alpha15 = x[18*i+14];
2238: alpha16 = x[18*i+15];
2239: alpha17 = x[18*i+16];
2240: alpha18 = x[18*i+17];
2241: while (n-->0) {
2242: y[18*(*idx)] += alpha1*(*v);
2243: y[18*(*idx)+1] += alpha2*(*v);
2244: y[18*(*idx)+2] += alpha3*(*v);
2245: y[18*(*idx)+3] += alpha4*(*v);
2246: y[18*(*idx)+4] += alpha5*(*v);
2247: y[18*(*idx)+5] += alpha6*(*v);
2248: y[18*(*idx)+6] += alpha7*(*v);
2249: y[18*(*idx)+7] += alpha8*(*v);
2250: y[18*(*idx)+8] += alpha9*(*v);
2251: y[18*(*idx)+9] += alpha10*(*v);
2252: y[18*(*idx)+10] += alpha11*(*v);
2253: y[18*(*idx)+11] += alpha12*(*v);
2254: y[18*(*idx)+12] += alpha13*(*v);
2255: y[18*(*idx)+13] += alpha14*(*v);
2256: y[18*(*idx)+14] += alpha15*(*v);
2257: y[18*(*idx)+15] += alpha16*(*v);
2258: y[18*(*idx)+16] += alpha17*(*v);
2259: y[18*(*idx)+17] += alpha18*(*v);
2260: idx++; v++;
2261: }
2262: }
2263: PetscLogFlops(36*a->nz);
2264: VecRestoreArray(xx,&x);
2265: VecRestoreArray(yy,&y);
2266: return(0);
2267: }
2271: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2272: {
2273: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2274: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2275: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2276: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2278: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2279: PetscInt n,i,jrow,j;
2282: if (yy != zz) {VecCopy(yy,zz);}
2283: VecGetArray(xx,&x);
2284: VecGetArray(zz,&y);
2285: idx = a->j;
2286: v = a->a;
2287: ii = a->i;
2289: for (i=0; i<m; i++) {
2290: jrow = ii[i];
2291: n = ii[i+1] - jrow;
2292: sum1 = 0.0;
2293: sum2 = 0.0;
2294: sum3 = 0.0;
2295: sum4 = 0.0;
2296: sum5 = 0.0;
2297: sum6 = 0.0;
2298: sum7 = 0.0;
2299: sum8 = 0.0;
2300: sum9 = 0.0;
2301: sum10 = 0.0;
2302: sum11 = 0.0;
2303: sum12 = 0.0;
2304: sum13 = 0.0;
2305: sum14 = 0.0;
2306: sum15 = 0.0;
2307: sum16 = 0.0;
2308: sum17 = 0.0;
2309: sum18 = 0.0;
2310: for (j=0; j<n; j++) {
2311: sum1 += v[jrow]*x[18*idx[jrow]];
2312: sum2 += v[jrow]*x[18*idx[jrow]+1];
2313: sum3 += v[jrow]*x[18*idx[jrow]+2];
2314: sum4 += v[jrow]*x[18*idx[jrow]+3];
2315: sum5 += v[jrow]*x[18*idx[jrow]+4];
2316: sum6 += v[jrow]*x[18*idx[jrow]+5];
2317: sum7 += v[jrow]*x[18*idx[jrow]+6];
2318: sum8 += v[jrow]*x[18*idx[jrow]+7];
2319: sum9 += v[jrow]*x[18*idx[jrow]+8];
2320: sum10 += v[jrow]*x[18*idx[jrow]+9];
2321: sum11 += v[jrow]*x[18*idx[jrow]+10];
2322: sum12 += v[jrow]*x[18*idx[jrow]+11];
2323: sum13 += v[jrow]*x[18*idx[jrow]+12];
2324: sum14 += v[jrow]*x[18*idx[jrow]+13];
2325: sum15 += v[jrow]*x[18*idx[jrow]+14];
2326: sum16 += v[jrow]*x[18*idx[jrow]+15];
2327: sum17 += v[jrow]*x[18*idx[jrow]+16];
2328: sum18 += v[jrow]*x[18*idx[jrow]+17];
2329: jrow++;
2330: }
2331: y[18*i] += sum1;
2332: y[18*i+1] += sum2;
2333: y[18*i+2] += sum3;
2334: y[18*i+3] += sum4;
2335: y[18*i+4] += sum5;
2336: y[18*i+5] += sum6;
2337: y[18*i+6] += sum7;
2338: y[18*i+7] += sum8;
2339: y[18*i+8] += sum9;
2340: y[18*i+9] += sum10;
2341: y[18*i+10] += sum11;
2342: y[18*i+11] += sum12;
2343: y[18*i+12] += sum13;
2344: y[18*i+13] += sum14;
2345: y[18*i+14] += sum15;
2346: y[18*i+15] += sum16;
2347: y[18*i+16] += sum17;
2348: y[18*i+17] += sum18;
2349: }
2351: PetscLogFlops(36*a->nz);
2352: VecRestoreArray(xx,&x);
2353: VecRestoreArray(zz,&y);
2354: return(0);
2355: }
2359: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2360: {
2361: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2362: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2363: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2364: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2366: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
2369: if (yy != zz) {VecCopy(yy,zz);}
2370: VecGetArray(xx,&x);
2371: VecGetArray(zz,&y);
2372: for (i=0; i<m; i++) {
2373: idx = a->j + a->i[i] ;
2374: v = a->a + a->i[i] ;
2375: n = a->i[i+1] - a->i[i];
2376: alpha1 = x[18*i];
2377: alpha2 = x[18*i+1];
2378: alpha3 = x[18*i+2];
2379: alpha4 = x[18*i+3];
2380: alpha5 = x[18*i+4];
2381: alpha6 = x[18*i+5];
2382: alpha7 = x[18*i+6];
2383: alpha8 = x[18*i+7];
2384: alpha9 = x[18*i+8];
2385: alpha10 = x[18*i+9];
2386: alpha11 = x[18*i+10];
2387: alpha12 = x[18*i+11];
2388: alpha13 = x[18*i+12];
2389: alpha14 = x[18*i+13];
2390: alpha15 = x[18*i+14];
2391: alpha16 = x[18*i+15];
2392: alpha17 = x[18*i+16];
2393: alpha18 = x[18*i+17];
2394: while (n-->0) {
2395: y[18*(*idx)] += alpha1*(*v);
2396: y[18*(*idx)+1] += alpha2*(*v);
2397: y[18*(*idx)+2] += alpha3*(*v);
2398: y[18*(*idx)+3] += alpha4*(*v);
2399: y[18*(*idx)+4] += alpha5*(*v);
2400: y[18*(*idx)+5] += alpha6*(*v);
2401: y[18*(*idx)+6] += alpha7*(*v);
2402: y[18*(*idx)+7] += alpha8*(*v);
2403: y[18*(*idx)+8] += alpha9*(*v);
2404: y[18*(*idx)+9] += alpha10*(*v);
2405: y[18*(*idx)+10] += alpha11*(*v);
2406: y[18*(*idx)+11] += alpha12*(*v);
2407: y[18*(*idx)+12] += alpha13*(*v);
2408: y[18*(*idx)+13] += alpha14*(*v);
2409: y[18*(*idx)+14] += alpha15*(*v);
2410: y[18*(*idx)+15] += alpha16*(*v);
2411: y[18*(*idx)+16] += alpha17*(*v);
2412: y[18*(*idx)+17] += alpha18*(*v);
2413: idx++; v++;
2414: }
2415: }
2416: PetscLogFlops(36*a->nz);
2417: VecRestoreArray(xx,&x);
2418: VecRestoreArray(zz,&y);
2419: return(0);
2420: }
2422: /*===================================================================================*/
2425: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2426: {
2427: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2431: /* start the scatter */
2432: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2433: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2434: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2435: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2436: return(0);
2437: }
2441: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2442: {
2443: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2447: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2448: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2449: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2450: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2451: return(0);
2452: }
2456: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2457: {
2458: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2462: /* start the scatter */
2463: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2464: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2465: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2466: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2467: return(0);
2468: }
2472: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2473: {
2474: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2478: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2479: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2480: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2481: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2482: return(0);
2483: }
2485: /* ----------------------------------------------------------------*/
2488: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2489: {
2490: /* This routine requires testing -- but it's getting better. */
2491: PetscErrorCode ierr;
2492: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2493: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2494: Mat P=pp->AIJ;
2495: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2496: PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2497: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2498: PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof,cn;
2499: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2500: MatScalar *ca;
2503: /* Start timer */
2504: PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);
2506: /* Get ij structure of P^T */
2507: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2509: cn = pn*ppdof;
2510: /* Allocate ci array, arrays for fill computation and */
2511: /* free space for accumulating nonzero column info */
2512: PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2513: ci[0] = 0;
2515: /* Work arrays for rows of P^T*A */
2516: PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2517: PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2518: ptasparserow = ptadenserow + an;
2519: denserow = ptasparserow + an;
2520: sparserow = denserow + cn;
2522: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2523: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2524: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2525: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2526: current_space = free_space;
2528: /* Determine symbolic info for each row of C: */
2529: for (i=0;i<pn;i++) {
2530: ptnzi = pti[i+1] - pti[i];
2531: ptJ = ptj + pti[i];
2532: for (dof=0;dof<ppdof;dof++) {
2533: ptanzi = 0;
2534: /* Determine symbolic row of PtA: */
2535: for (j=0;j<ptnzi;j++) {
2536: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2537: arow = ptJ[j]*ppdof + dof;
2538: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2539: anzj = ai[arow+1] - ai[arow];
2540: ajj = aj + ai[arow];
2541: for (k=0;k<anzj;k++) {
2542: if (!ptadenserow[ajj[k]]) {
2543: ptadenserow[ajj[k]] = -1;
2544: ptasparserow[ptanzi++] = ajj[k];
2545: }
2546: }
2547: }
2548: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2549: ptaj = ptasparserow;
2550: cnzi = 0;
2551: for (j=0;j<ptanzi;j++) {
2552: /* Get offset within block of P */
2553: pshift = *ptaj%ppdof;
2554: /* Get block row of P */
2555: prow = (*ptaj++)/ppdof; /* integer division */
2556: /* P has same number of nonzeros per row as the compressed form */
2557: pnzj = pi[prow+1] - pi[prow];
2558: pjj = pj + pi[prow];
2559: for (k=0;k<pnzj;k++) {
2560: /* Locations in C are shifted by the offset within the block */
2561: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2562: if (!denserow[pjj[k]*ppdof+pshift]) {
2563: denserow[pjj[k]*ppdof+pshift] = -1;
2564: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
2565: }
2566: }
2567: }
2569: /* sort sparserow */
2570: PetscSortInt(cnzi,sparserow);
2571:
2572: /* If free space is not available, make more free space */
2573: /* Double the amount of total space in the list */
2574: if (current_space->local_remaining<cnzi) {
2575: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
2576: }
2578: /* Copy data into free space, and zero out denserows */
2579: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2580: current_space->array += cnzi;
2581: current_space->local_used += cnzi;
2582: current_space->local_remaining -= cnzi;
2584: for (j=0;j<ptanzi;j++) {
2585: ptadenserow[ptasparserow[j]] = 0;
2586: }
2587: for (j=0;j<cnzi;j++) {
2588: denserow[sparserow[j]] = 0;
2589: }
2590: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2591: /* For now, we will recompute what is needed. */
2592: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2593: }
2594: }
2595: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2596: /* Allocate space for cj, initialize cj, and */
2597: /* destroy list of free space and other temporary array(s) */
2598: PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2599: PetscFreeSpaceContiguous(&free_space,cj);
2600: PetscFree(ptadenserow);
2601:
2602: /* Allocate space for ca */
2603: PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2604: PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2605:
2606: /* put together the new matrix */
2607: MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);
2609: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2610: /* Since these are PETSc arrays, change flags to free them as necessary. */
2611: c = (Mat_SeqAIJ *)((*C)->data);
2612: c->free_a = PETSC_TRUE;
2613: c->free_ij = PETSC_TRUE;
2614: c->nonew = 0;
2616: /* Clean up. */
2617: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2619: PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
2620: return(0);
2621: }
2625: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2626: {
2627: /* This routine requires testing -- first draft only */
2629: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2630: Mat P=pp->AIJ;
2631: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
2632: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
2633: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
2634: PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2635: PetscInt *ci=c->i,*cj=c->j,*cjj;
2636: PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
2637: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2638: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2641: /* Allocate temporary array for storage of one row of A*P */
2642: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2643: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));
2645: apj = (PetscInt *)(apa + cn);
2646: apjdense = apj + cn;
2648: /* Clear old values in C */
2649: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
2651: for (i=0;i<am;i++) {
2652: /* Form sparse row of A*P */
2653: anzi = ai[i+1] - ai[i];
2654: apnzj = 0;
2655: for (j=0;j<anzi;j++) {
2656: /* Get offset within block of P */
2657: pshift = *aj%ppdof;
2658: /* Get block row of P */
2659: prow = *aj++/ppdof; /* integer division */
2660: pnzj = pi[prow+1] - pi[prow];
2661: pjj = pj + pi[prow];
2662: paj = pa + pi[prow];
2663: for (k=0;k<pnzj;k++) {
2664: poffset = pjj[k]*ppdof+pshift;
2665: if (!apjdense[poffset]) {
2666: apjdense[poffset] = -1;
2667: apj[apnzj++] = poffset;
2668: }
2669: apa[poffset] += (*aa)*paj[k];
2670: }
2671: PetscLogFlops(2*pnzj);
2672: aa++;
2673: }
2675: /* Sort the j index array for quick sparse axpy. */
2676: /* Note: a array does not need sorting as it is in dense storage locations. */
2677: PetscSortInt(apnzj,apj);
2679: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2680: prow = i/ppdof; /* integer division */
2681: pshift = i%ppdof;
2682: poffset = pi[prow];
2683: pnzi = pi[prow+1] - poffset;
2684: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2685: pJ = pj+poffset;
2686: pA = pa+poffset;
2687: for (j=0;j<pnzi;j++) {
2688: crow = (*pJ)*ppdof+pshift;
2689: cjj = cj + ci[crow];
2690: caj = ca + ci[crow];
2691: pJ++;
2692: /* Perform sparse axpy operation. Note cjj includes apj. */
2693: for (k=0,nextap=0;nextap<apnzj;k++) {
2694: if (cjj[k]==apj[nextap]) {
2695: caj[k] += (*pA)*apa[apj[nextap++]];
2696: }
2697: }
2698: PetscLogFlops(2*apnzj);
2699: pA++;
2700: }
2702: /* Zero the current row info for A*P */
2703: for (j=0;j<apnzj;j++) {
2704: apa[apj[j]] = 0.;
2705: apjdense[apj[j]] = 0;
2706: }
2707: }
2709: /* Assemble the final matrix and clean up */
2710: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2711: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2712: PetscFree(apa);
2713: return(0);
2714: }
2718: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2719: {
2720: PetscErrorCode ierr;
2723: /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2724: MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
2725: ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
2726: return(0);
2727: }
2731: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2732: {
2734: SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2735: return(0);
2736: }
2741: PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2742: {
2743: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2744: Mat a = b->AIJ,B;
2745: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
2746: PetscErrorCode ierr;
2747: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2748: PetscInt *cols;
2749: PetscScalar *vals;
2752: MatGetSize(a,&m,&n);
2753: PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2754: for (i=0; i<m; i++) {
2755: nmax = PetscMax(nmax,aij->ilen[i]);
2756: for (j=0; j<dof; j++) {
2757: ilen[dof*i+j] = aij->ilen[i];
2758: }
2759: }
2760: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2761: PetscFree(ilen);
2762: PetscMalloc(nmax*sizeof(PetscInt),&icols);
2763: ii = 0;
2764: for (i=0; i<m; i++) {
2765: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2766: for (j=0; j<dof; j++) {
2767: for (k=0; k<ncols; k++) {
2768: icols[k] = dof*cols[k]+j;
2769: }
2770: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2771: ii++;
2772: }
2773: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2774: }
2775: PetscFree(icols);
2776: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2777: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2779: if (reuse == MAT_REUSE_MATRIX) {
2780: MatHeaderReplace(A,B);
2781: } else {
2782: *newmat = B;
2783: }
2784: return(0);
2785: }
2788: #include ../src/mat/impls/aij/mpi/mpiaij.h
2793: PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2794: {
2795: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
2796: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2797: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2798: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2799: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2800: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2801: PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2802: PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2803: PetscInt rstart,cstart,*garray,ii,k;
2804: PetscErrorCode ierr;
2805: PetscScalar *vals,*ovals;
2808: PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);
2809: for (i=0; i<A->rmap->n/dof; i++) {
2810: nmax = PetscMax(nmax,AIJ->ilen[i]);
2811: onmax = PetscMax(onmax,OAIJ->ilen[i]);
2812: for (j=0; j<dof; j++) {
2813: dnz[dof*i+j] = AIJ->ilen[i];
2814: onz[dof*i+j] = OAIJ->ilen[i];
2815: }
2816: }
2817: MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
2818: PetscFree2(dnz,onz);
2820: PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2821: rstart = dof*maij->A->rmap->rstart;
2822: cstart = dof*maij->A->cmap->rstart;
2823: garray = mpiaij->garray;
2825: ii = rstart;
2826: for (i=0; i<A->rmap->n/dof; i++) {
2827: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2828: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2829: for (j=0; j<dof; j++) {
2830: for (k=0; k<ncols; k++) {
2831: icols[k] = cstart + dof*cols[k]+j;
2832: }
2833: for (k=0; k<oncols; k++) {
2834: oicols[k] = dof*garray[ocols[k]]+j;
2835: }
2836: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2837: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2838: ii++;
2839: }
2840: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2841: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2842: }
2843: PetscFree2(icols,oicols);
2845: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2846: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2848: if (reuse == MAT_REUSE_MATRIX) {
2849: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
2850: ((PetscObject)A)->refct = 1;
2851: MatHeaderReplace(A,B);
2852: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
2853: } else {
2854: *newmat = B;
2855: }
2856: return(0);
2857: }
2861: /* ---------------------------------------------------------------------------------- */
2862: /*MC
2863: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2864: operations for multicomponent problems. It interpolates each component the same
2865: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
2866: and MATMPIAIJ for distributed matrices.
2868: Operations provided:
2869: + MatMult
2870: . MatMultTranspose
2871: . MatMultAdd
2872: . MatMultTransposeAdd
2873: - MatView
2875: Level: advanced
2877: M*/
2880: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2881: {
2883: PetscMPIInt size;
2884: PetscInt n;
2885: Mat_MPIMAIJ *b;
2886: Mat B;
2889: PetscObjectReference((PetscObject)A);
2891: if (dof == 1) {
2892: *maij = A;
2893: } else {
2894: MatCreate(((PetscObject)A)->comm,&B);
2895: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
2896: B->assembled = PETSC_TRUE;
2898: MPI_Comm_size(((PetscObject)A)->comm,&size);
2899: if (size == 1) {
2900: MatSetType(B,MATSEQMAIJ);
2901: B->ops->destroy = MatDestroy_SeqMAIJ;
2902: B->ops->view = MatView_SeqMAIJ;
2903: b = (Mat_MPIMAIJ*)B->data;
2904: b->dof = dof;
2905: b->AIJ = A;
2906: if (dof == 2) {
2907: B->ops->mult = MatMult_SeqMAIJ_2;
2908: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
2909: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
2910: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2911: } else if (dof == 3) {
2912: B->ops->mult = MatMult_SeqMAIJ_3;
2913: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
2914: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
2915: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2916: } else if (dof == 4) {
2917: B->ops->mult = MatMult_SeqMAIJ_4;
2918: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
2919: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
2920: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2921: } else if (dof == 5) {
2922: B->ops->mult = MatMult_SeqMAIJ_5;
2923: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
2924: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
2925: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2926: } else if (dof == 6) {
2927: B->ops->mult = MatMult_SeqMAIJ_6;
2928: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
2929: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
2930: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2931: } else if (dof == 7) {
2932: B->ops->mult = MatMult_SeqMAIJ_7;
2933: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
2934: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
2935: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2936: } else if (dof == 8) {
2937: B->ops->mult = MatMult_SeqMAIJ_8;
2938: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
2939: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
2940: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2941: } else if (dof == 9) {
2942: B->ops->mult = MatMult_SeqMAIJ_9;
2943: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
2944: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
2945: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2946: } else if (dof == 10) {
2947: B->ops->mult = MatMult_SeqMAIJ_10;
2948: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
2949: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
2950: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2951: } else if (dof == 16) {
2952: B->ops->mult = MatMult_SeqMAIJ_16;
2953: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
2954: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
2955: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2956: } else if (dof == 18) {
2957: B->ops->mult = MatMult_SeqMAIJ_18;
2958: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
2959: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
2960: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
2961: } else {
2962: SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2963: }
2964: B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2965: B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2966: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2967: } else {
2968: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2969: IS from,to;
2970: Vec gvec;
2971: PetscInt *garray,i;
2973: MatSetType(B,MATMPIMAIJ);
2974: B->ops->destroy = MatDestroy_MPIMAIJ;
2975: B->ops->view = MatView_MPIMAIJ;
2976: b = (Mat_MPIMAIJ*)B->data;
2977: b->dof = dof;
2978: b->A = A;
2979: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2980: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
2982: VecGetSize(mpiaij->lvec,&n);
2983: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2984: VecSetBlockSize(b->w,dof);
2986: /* create two temporary Index sets for build scatter gather */
2987: PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2988: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2989: ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);
2990: PetscFree(garray);
2991: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
2993: /* create temporary global vector to generate scatter context */
2994: VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);
2995: VecSetBlockSize(gvec,dof);
2997: /* generate the scatter context */
2998: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3000: ISDestroy(from);
3001: ISDestroy(to);
3002: VecDestroy(gvec);
3004: B->ops->mult = MatMult_MPIMAIJ_dof;
3005: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3006: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3007: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3008: B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3009: B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3010: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3011: }
3012: *maij = B;
3013: MatView_Private(B);
3014: }
3015: return(0);
3016: }