Actual source code: sbaij2.c
1: #define PETSCMAT_DLL
3: #include ../src/mat/impls/baij/seq/baij.h
4: #include ../src/inline/spops.h
5: #include ../src/inline/ilu.h
6: #include petscbt.h
7: #include ../src/mat/impls/sbaij/seq/sbaij.h
11: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
12: {
13: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
15: PetscInt brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
16: const PetscInt *idx;
17: PetscBT table,table0;
20: if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
21: mbs = a->mbs;
22: ai = a->i;
23: aj = a->j;
24: bs = A->rmap->bs;
25: PetscBTCreate(mbs,table);
26: PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
27: PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);
28: PetscBTCreate(mbs,table0);
30: for (i=0; i<is_max; i++) { /* for each is */
31: isz = 0;
32: PetscBTMemzero(mbs,table);
33:
34: /* Extract the indices, assume there can be duplicate entries */
35: ISGetIndices(is[i],&idx);
36: ISGetLocalSize(is[i],&n);
38: /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
39: bcol_max = 0;
40: for (j=0; j<n ; ++j){
41: brow = idx[j]/bs; /* convert the indices into block indices */
42: if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43: if(!PetscBTLookupSet(table,brow)) {
44: nidx[isz++] = brow;
45: if (bcol_max < brow) bcol_max = brow;
46: }
47: }
48: ISRestoreIndices(is[i],&idx);
49: ISDestroy(is[i]);
50:
51: k = 0;
52: for (j=0; j<ov; j++){ /* for each overlap */
53: /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
54: PetscBTMemzero(mbs,table0);
55: for (l=k; l<isz; l++) { PetscBTSet(table0,nidx[l]); }
57: n = isz; /* length of the updated is[i] */
58: for (brow=0; brow<mbs; brow++){
59: start = ai[brow]; end = ai[brow+1];
60: if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
61: for (l = start; l<end ; l++){
62: bcol = aj[l];
63: if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
64: }
65: k++;
66: if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
67: } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
68: for (l = start; l<end ; l++){
69: bcol = aj[l];
70: if (bcol > bcol_max) break;
71: if (PetscBTLookup(table0,bcol)){
72: if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
73: break; /* for l = start; l<end ; l++) */
74: }
75: }
76: }
77: }
78: } /* for each overlap */
80: /* expand the Index Set */
81: for (j=0; j<isz; j++) {
82: for (k=0; k<bs; k++)
83: nidx2[j*bs+k] = nidx[j]*bs+k;
84: }
85: ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
86: }
87: PetscBTDestroy(table);
88: PetscFree(nidx);
89: PetscFree(nidx2);
90: PetscBTDestroy(table0);
91: return(0);
92: }
96: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
97: {
98: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c;
100: PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
101: PetscInt row,mat_i,*mat_j,tcol,*mat_ilen;
102: PetscInt nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
103: const PetscInt *irow,*aj = a->j,*ai = a->i;
104: MatScalar *mat_a;
105: Mat C;
106: PetscTruth flag,sorted;
109: if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
110: ISSorted(iscol,&sorted);
111: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
113: ISGetIndices(isrow,&irow);
114: ISGetSize(isrow,&nrows);
115:
116: PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);
117: ssmap = smap;
118: PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
119: PetscMemzero(smap,oldcols*sizeof(PetscInt));
120: for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
121: /* determine lens of each row */
122: for (i=0; i<nrows; i++) {
123: kstart = ai[irow[i]];
124: kend = kstart + a->ilen[irow[i]];
125: lens[i] = 0;
126: for (k=kstart; k<kend; k++) {
127: if (ssmap[aj[k]]) {
128: lens[i]++;
129: }
130: }
131: }
132: /* Create and fill new matrix */
133: if (scall == MAT_REUSE_MATRIX) {
134: c = (Mat_SeqSBAIJ *)((*B)->data);
136: if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
137: PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
138: if (!flag) {
139: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
140: }
141: PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
142: C = *B;
143: } else {
144: MatCreate(((PetscObject)A)->comm,&C);
145: MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);
146: MatSetType(C,((PetscObject)A)->type_name);
147: MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);
148: }
149: c = (Mat_SeqSBAIJ *)(C->data);
150: for (i=0; i<nrows; i++) {
151: row = irow[i];
152: kstart = ai[row];
153: kend = kstart + a->ilen[row];
154: mat_i = c->i[i];
155: mat_j = c->j + mat_i;
156: mat_a = c->a + mat_i*bs2;
157: mat_ilen = c->ilen + i;
158: for (k=kstart; k<kend; k++) {
159: if ((tcol=ssmap[a->j[k]])) {
160: *mat_j++ = tcol - 1;
161: PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
162: mat_a += bs2;
163: (*mat_ilen)++;
164: }
165: }
166: }
167:
168: /* Free work space */
169: PetscFree(smap);
170: PetscFree(lens);
171: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
172: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
173:
174: ISRestoreIndices(isrow,&irow);
175: *B = C;
176: return(0);
177: }
181: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
182: {
183: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
184: IS is1;
186: PetscInt *vary,*iary,nrows,i,bs=A->rmap->bs,count;
187: const PetscInt *irow;
190: if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
191:
192: ISGetIndices(isrow,&irow);
193: ISGetSize(isrow,&nrows);
194:
195: /* Verify if the indices corespond to each element in a block
196: and form the IS with compressed IS */
197: PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);
198: iary = vary + a->mbs;
199: PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));
200: for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
201:
202: count = 0;
203: for (i=0; i<a->mbs; i++) {
204: if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
205: if (vary[i]==bs) iary[count++] = i;
206: }
207: ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
208:
209: ISRestoreIndices(isrow,&irow);
210: PetscFree(vary);
212: MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
213: ISDestroy(is1);
214: return(0);
215: }
219: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
220: {
222: PetscInt i;
225: if (scall == MAT_INITIAL_MATRIX) {
226: PetscMalloc((n+1)*sizeof(Mat),B);
227: }
229: for (i=0; i<n; i++) {
230: MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
231: }
232: return(0);
233: }
235: /* -------------------------------------------------------*/
236: /* Should check that shapes of vectors and matrices match */
237: /* -------------------------------------------------------*/
238: #include petscblaslapack.h
242: PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
243: {
244: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
245: PetscScalar *x,*z,*xb,x1,zero=0.0;
246: MatScalar *v;
248: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
249: PetscInt nonzerorow=0;
252: VecSet(zz,zero);
253: VecGetArray(xx,&x);
254: VecGetArray(zz,&z);
256: v = a->a;
257: xb = x;
258:
259: for (i=0; i<mbs; i++) {
260: n = ai[1] - ai[0]; /* length of i_th row of A */
261: x1 = *xb++;
262: ib = aj + *ai++;
263: jmin = 0;
264: nonzerorow += (n>0);
265: /* if we ALWAYS required a diagonal entry then could remove this if test */
266: /* should we use a tmp to hold the accumulated z[i] */
267: if (*ib == i) { /* (diag of A)*x */
268: z[i] += *v++ * x[*ib++];
269: jmin++;
270: }
271: for (j=jmin; j<n; j++) {
272: cval = *ib;
273: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
274: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
275: }
276: }
278: VecRestoreArray(xx,&x);
279: VecRestoreArray(zz,&z);
280: PetscLogFlops(2*(a->nz*2 - nonzerorow) - nonzerorow); /* nz = (nz+m)/2 */
281: return(0);
282: }
286: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
287: {
288: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
289: PetscScalar *x,*z,*xb,x1,x2,zero=0.0;
290: MatScalar *v;
292: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
293: PetscInt nonzerorow=0;
296: VecSet(zz,zero);
297: VecGetArray(xx,&x);
298: VecGetArray(zz,&z);
299:
300: v = a->a;
301: xb = x;
303: for (i=0; i<mbs; i++) {
304: n = ai[1] - ai[0]; /* length of i_th block row of A */
305: x1 = xb[0]; x2 = xb[1];
306: ib = aj + *ai;
307: jmin = 0;
308: nonzerorow += (n>0);
309: if (*ib == i){ /* (diag of A)*x */
310: z[2*i] += v[0]*x1 + v[2]*x2;
311: z[2*i+1] += v[2]*x1 + v[3]*x2;
312: v += 4; jmin++;
313: }
314: for (j=jmin; j<n; j++) {
315: /* (strict lower triangular part of A)*x */
316: cval = ib[j]*2;
317: z[cval] += v[0]*x1 + v[1]*x2;
318: z[cval+1] += v[2]*x1 + v[3]*x2;
319: /* (strict upper triangular part of A)*x */
320: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
321: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
322: v += 4;
323: }
324: xb +=2; ai++;
325: }
327: VecRestoreArray(xx,&x);
328: VecRestoreArray(zz,&z);
329: PetscLogFlops(8*(a->nz*2 - nonzerorow) - nonzerorow);
330: return(0);
331: }
335: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
336: {
337: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
338: PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0;
339: MatScalar *v;
341: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
342: PetscInt nonzerorow=0;
345: VecSet(zz,zero);
346: VecGetArray(xx,&x);
347: VecGetArray(zz,&z);
348:
349: v = a->a;
350: xb = x;
352: for (i=0; i<mbs; i++) {
353: n = ai[1] - ai[0]; /* length of i_th block row of A */
354: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
355: ib = aj + *ai;
356: jmin = 0;
357: nonzerorow += (n>0);
358: if (*ib == i){ /* (diag of A)*x */
359: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
360: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
361: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
362: v += 9; jmin++;
363: }
364: for (j=jmin; j<n; j++) {
365: /* (strict lower triangular part of A)*x */
366: cval = ib[j]*3;
367: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
368: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
369: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
370: /* (strict upper triangular part of A)*x */
371: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
372: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
373: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
374: v += 9;
375: }
376: xb +=3; ai++;
377: }
379: VecRestoreArray(xx,&x);
380: VecRestoreArray(zz,&z);
381: PetscLogFlops(18*(a->nz*2 - nonzerorow) - nonzerorow);
382: return(0);
383: }
387: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
388: {
389: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
390: PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
391: MatScalar *v;
393: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
394: PetscInt nonzerorow=0;
397: VecSet(zz,zero);
398: VecGetArray(xx,&x);
399: VecGetArray(zz,&z);
400:
401: v = a->a;
402: xb = x;
404: for (i=0; i<mbs; i++) {
405: n = ai[1] - ai[0]; /* length of i_th block row of A */
406: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
407: ib = aj + *ai;
408: jmin = 0;
409: nonzerorow += (n>0);
410: if (*ib == i){ /* (diag of A)*x */
411: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
412: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
413: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
414: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
415: v += 16; jmin++;
416: }
417: for (j=jmin; j<n; j++) {
418: /* (strict lower triangular part of A)*x */
419: cval = ib[j]*4;
420: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
421: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
422: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
423: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
424: /* (strict upper triangular part of A)*x */
425: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
426: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
427: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
428: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
429: v += 16;
430: }
431: xb +=4; ai++;
432: }
434: VecRestoreArray(xx,&x);
435: VecRestoreArray(zz,&z);
436: PetscLogFlops(32*(a->nz*2 - nonzerorow) - nonzerorow);
437: return(0);
438: }
442: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
443: {
444: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
445: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
446: MatScalar *v;
448: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
449: PetscInt nonzerorow=0;
452: VecSet(zz,zero);
453: VecGetArray(xx,&x);
454: VecGetArray(zz,&z);
455:
456: v = a->a;
457: xb = x;
459: for (i=0; i<mbs; i++) {
460: n = ai[1] - ai[0]; /* length of i_th block row of A */
461: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
462: ib = aj + *ai;
463: jmin = 0;
464: nonzerorow += (n>0);
465: if (*ib == i){ /* (diag of A)*x */
466: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
467: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
468: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
469: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
470: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
471: v += 25; jmin++;
472: }
473: for (j=jmin; j<n; j++) {
474: /* (strict lower triangular part of A)*x */
475: cval = ib[j]*5;
476: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
477: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
478: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
479: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
480: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
481: /* (strict upper triangular part of A)*x */
482: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
483: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
484: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
485: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
486: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
487: v += 25;
488: }
489: xb +=5; ai++;
490: }
492: VecRestoreArray(xx,&x);
493: VecRestoreArray(zz,&z);
494: PetscLogFlops(50*(a->nz*2 - nonzerorow) - nonzerorow);
495: return(0);
496: }
501: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
502: {
503: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
504: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
505: MatScalar *v;
507: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
508: PetscInt nonzerorow=0;
511: VecSet(zz,zero);
512: VecGetArray(xx,&x);
513: VecGetArray(zz,&z);
514:
515: v = a->a;
516: xb = x;
518: for (i=0; i<mbs; i++) {
519: n = ai[1] - ai[0]; /* length of i_th block row of A */
520: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
521: ib = aj + *ai;
522: jmin = 0;
523: nonzerorow += (n>0);
524: if (*ib == i){ /* (diag of A)*x */
525: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
526: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
527: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
528: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
529: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
530: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
531: v += 36; jmin++;
532: }
533: for (j=jmin; j<n; j++) {
534: /* (strict lower triangular part of A)*x */
535: cval = ib[j]*6;
536: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
537: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
538: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
539: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
540: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
541: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
542: /* (strict upper triangular part of A)*x */
543: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
544: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
545: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
546: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
547: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
548: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
549: v += 36;
550: }
551: xb +=6; ai++;
552: }
554: VecRestoreArray(xx,&x);
555: VecRestoreArray(zz,&z);
556: PetscLogFlops(72*(a->nz*2 - nonzerorow) - nonzerorow);
557: return(0);
558: }
561: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
562: {
563: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
564: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
565: MatScalar *v;
567: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
568: PetscInt nonzerorow=0;
571: VecSet(zz,zero);
572: VecGetArray(xx,&x);
573: VecGetArray(zz,&z);
574:
575: v = a->a;
576: xb = x;
578: for (i=0; i<mbs; i++) {
579: n = ai[1] - ai[0]; /* length of i_th block row of A */
580: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
581: ib = aj + *ai;
582: jmin = 0;
583: nonzerorow += (n>0);
584: if (*ib == i){ /* (diag of A)*x */
585: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
586: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
587: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
588: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
589: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
590: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
591: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
592: v += 49; jmin++;
593: }
594: for (j=jmin; j<n; j++) {
595: /* (strict lower triangular part of A)*x */
596: cval = ib[j]*7;
597: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
598: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
599: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
600: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
601: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
602: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
603: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
604: /* (strict upper triangular part of A)*x */
605: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
606: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
607: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
608: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
609: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
610: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
611: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
612: v += 49;
613: }
614: xb +=7; ai++;
615: }
616: VecRestoreArray(xx,&x);
617: VecRestoreArray(zz,&z);
618: PetscLogFlops(98*(a->nz*2 - nonzerorow) - nonzerorow);
619: return(0);
620: }
622: /*
623: This will not work with MatScalar == float because it calls the BLAS
624: */
627: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
628: {
629: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
630: PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
631: MatScalar *v;
633: PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
634: PetscInt nonzerorow=0;
637: VecSet(zz,zero);
638: VecGetArray(xx,&x); x_ptr=x;
639: VecGetArray(zz,&z); z_ptr=z;
641: aj = a->j;
642: v = a->a;
643: ii = a->i;
645: if (!a->mult_work) {
646: PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);
647: }
648: work = a->mult_work;
649:
650: for (i=0; i<mbs; i++) {
651: n = ii[1] - ii[0]; ncols = n*bs;
652: workt = work; idx=aj+ii[0];
653: nonzerorow += (n>0);
655: /* upper triangular part */
656: for (j=0; j<n; j++) {
657: xb = x_ptr + bs*(*idx++);
658: for (k=0; k<bs; k++) workt[k] = xb[k];
659: workt += bs;
660: }
661: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
662: Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
663:
664: /* strict lower triangular part */
665: idx = aj+ii[0];
666: if (*idx == i){
667: ncols -= bs; v += bs2; idx++; n--;
668: }
669:
670: if (ncols > 0){
671: workt = work;
672: PetscMemzero(workt,ncols*sizeof(PetscScalar));
673: Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
674: for (j=0; j<n; j++) {
675: zb = z_ptr + bs*(*idx++);
676: for (k=0; k<bs; k++) zb[k] += workt[k] ;
677: workt += bs;
678: }
679: }
680: x += bs; v += n*bs2; z += bs; ii++;
681: }
682:
683: VecRestoreArray(xx,&x);
684: VecRestoreArray(zz,&z);
685: PetscLogFlops(2*(a->nz*2 - nonzerorow)*bs2 - nonzerorow);
686: return(0);
687: }
692: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
693: {
694: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
695: PetscScalar *x,*z,*xb,x1;
696: MatScalar *v;
698: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
699: PetscInt nonzerorow=0;
702: VecCopy_Seq(yy,zz);
703: VecGetArray(xx,&x);
704: VecGetArray(zz,&z);
705: v = a->a;
706: xb = x;
708: for (i=0; i<mbs; i++) {
709: n = ai[1] - ai[0]; /* length of i_th row of A */
710: x1 = xb[0];
711: ib = aj + *ai;
712: jmin = 0;
713: nonzerorow += (n>0);
714: if (*ib == i) { /* (diag of A)*x */
715: z[i] += *v++ * x[*ib++]; jmin++;
716: }
717: for (j=jmin; j<n; j++) {
718: cval = *ib;
719: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
720: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
721: }
722: xb++; ai++;
723: }
725: VecRestoreArray(xx,&x);
726: VecRestoreArray(zz,&z);
727:
728: PetscLogFlops(2*(a->nz*2 - nonzerorow));
729: return(0);
730: }
734: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
735: {
736: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
737: PetscScalar *x,*z,*xb,x1,x2;
738: MatScalar *v;
740: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
741: PetscInt nonzerorow=0;
744: VecCopy_Seq(yy,zz);
745: VecGetArray(xx,&x);
746: VecGetArray(zz,&z);
748: v = a->a;
749: xb = x;
751: for (i=0; i<mbs; i++) {
752: n = ai[1] - ai[0]; /* length of i_th block row of A */
753: x1 = xb[0]; x2 = xb[1];
754: ib = aj + *ai;
755: jmin = 0;
756: nonzerorow += (n>0);
757: if (*ib == i){ /* (diag of A)*x */
758: z[2*i] += v[0]*x1 + v[2]*x2;
759: z[2*i+1] += v[2]*x1 + v[3]*x2;
760: v += 4; jmin++;
761: }
762: for (j=jmin; j<n; j++) {
763: /* (strict lower triangular part of A)*x */
764: cval = ib[j]*2;
765: z[cval] += v[0]*x1 + v[1]*x2;
766: z[cval+1] += v[2]*x1 + v[3]*x2;
767: /* (strict upper triangular part of A)*x */
768: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
769: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
770: v += 4;
771: }
772: xb +=2; ai++;
773: }
774: VecRestoreArray(xx,&x);
775: VecRestoreArray(zz,&z);
777: PetscLogFlops(4*(a->nz*2 - nonzerorow));
778: return(0);
779: }
783: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
784: {
785: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
786: PetscScalar *x,*z,*xb,x1,x2,x3;
787: MatScalar *v;
789: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
790: PetscInt nonzerorow=0;
793: VecCopy_Seq(yy,zz);
794: VecGetArray(xx,&x);
795: VecGetArray(zz,&z);
797: v = a->a;
798: xb = x;
800: for (i=0; i<mbs; i++) {
801: n = ai[1] - ai[0]; /* length of i_th block row of A */
802: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
803: ib = aj + *ai;
804: jmin = 0;
805: nonzerorow += (n>0);
806: if (*ib == i){ /* (diag of A)*x */
807: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
808: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
809: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
810: v += 9; jmin++;
811: }
812: for (j=jmin; j<n; j++) {
813: /* (strict lower triangular part of A)*x */
814: cval = ib[j]*3;
815: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
816: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
817: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
818: /* (strict upper triangular part of A)*x */
819: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
820: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
821: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
822: v += 9;
823: }
824: xb +=3; ai++;
825: }
827: VecRestoreArray(xx,&x);
828: VecRestoreArray(zz,&z);
830: PetscLogFlops(18*(a->nz*2 - nonzerorow));
831: return(0);
832: }
836: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
837: {
838: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
839: PetscScalar *x,*z,*xb,x1,x2,x3,x4;
840: MatScalar *v;
842: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
843: PetscInt nonzerorow=0;
846: VecCopy_Seq(yy,zz);
847: VecGetArray(xx,&x);
848: VecGetArray(zz,&z);
850: v = a->a;
851: xb = x;
853: for (i=0; i<mbs; i++) {
854: n = ai[1] - ai[0]; /* length of i_th block row of A */
855: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
856: ib = aj + *ai;
857: jmin = 0;
858: nonzerorow += (n>0);
859: if (*ib == i){ /* (diag of A)*x */
860: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
861: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
862: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
863: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
864: v += 16; jmin++;
865: }
866: for (j=jmin; j<n; j++) {
867: /* (strict lower triangular part of A)*x */
868: cval = ib[j]*4;
869: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
870: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
871: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
872: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
873: /* (strict upper triangular part of A)*x */
874: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
875: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
876: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
877: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
878: v += 16;
879: }
880: xb +=4; ai++;
881: }
883: VecRestoreArray(xx,&x);
884: VecRestoreArray(zz,&z);
886: PetscLogFlops(32*(a->nz*2 - nonzerorow));
887: return(0);
888: }
892: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
893: {
894: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
895: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5;
896: MatScalar *v;
898: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
899: PetscInt nonzerorow=0;
902: VecCopy_Seq(yy,zz);
903: VecGetArray(xx,&x);
904: VecGetArray(zz,&z);
906: v = a->a;
907: xb = x;
909: for (i=0; i<mbs; i++) {
910: n = ai[1] - ai[0]; /* length of i_th block row of A */
911: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
912: ib = aj + *ai;
913: jmin = 0;
914: nonzerorow += (n>0);
915: if (*ib == i){ /* (diag of A)*x */
916: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
917: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
918: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
919: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
920: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
921: v += 25; jmin++;
922: }
923: for (j=jmin; j<n; j++) {
924: /* (strict lower triangular part of A)*x */
925: cval = ib[j]*5;
926: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
927: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
928: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
929: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
930: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
931: /* (strict upper triangular part of A)*x */
932: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
933: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
934: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
935: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
936: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
937: v += 25;
938: }
939: xb +=5; ai++;
940: }
942: VecRestoreArray(xx,&x);
943: VecRestoreArray(zz,&z);
945: PetscLogFlops(50*(a->nz*2 - nonzerorow));
946: return(0);
947: }
950: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
951: {
952: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
953: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6;
954: MatScalar *v;
956: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
957: PetscInt nonzerorow=0;
960: VecCopy_Seq(yy,zz);
961: VecGetArray(xx,&x);
962: VecGetArray(zz,&z);
964: v = a->a;
965: xb = x;
967: for (i=0; i<mbs; i++) {
968: n = ai[1] - ai[0]; /* length of i_th block row of A */
969: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
970: ib = aj + *ai;
971: jmin = 0;
972: nonzerorow += (n>0);
973: if (*ib == i){ /* (diag of A)*x */
974: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
975: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
976: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
977: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
978: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
979: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
980: v += 36; jmin++;
981: }
982: for (j=jmin; j<n; j++) {
983: /* (strict lower triangular part of A)*x */
984: cval = ib[j]*6;
985: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
986: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
987: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
988: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
989: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
990: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
991: /* (strict upper triangular part of A)*x */
992: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
993: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
994: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
995: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
996: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
997: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
998: v += 36;
999: }
1000: xb +=6; ai++;
1001: }
1003: VecRestoreArray(xx,&x);
1004: VecRestoreArray(zz,&z);
1006: PetscLogFlops(72*(a->nz*2 - nonzerorow));
1007: return(0);
1008: }
1012: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1013: {
1014: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1015: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
1016: MatScalar *v;
1018: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
1019: PetscInt nonzerorow=0;
1022: VecCopy_Seq(yy,zz);
1023: VecGetArray(xx,&x);
1024: VecGetArray(zz,&z);
1026: v = a->a;
1027: xb = x;
1029: for (i=0; i<mbs; i++) {
1030: n = ai[1] - ai[0]; /* length of i_th block row of A */
1031: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1032: ib = aj + *ai;
1033: jmin = 0;
1034: nonzerorow += (n>0);
1035: if (*ib == i){ /* (diag of A)*x */
1036: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1037: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1038: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1039: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1040: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1041: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1042: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1043: v += 49; jmin++;
1044: }
1045: for (j=jmin; j<n; j++) {
1046: /* (strict lower triangular part of A)*x */
1047: cval = ib[j]*7;
1048: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1049: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1050: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1051: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1052: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1053: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1054: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1055: /* (strict upper triangular part of A)*x */
1056: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1057: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1058: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1059: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1060: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1061: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1062: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1063: v += 49;
1064: }
1065: xb +=7; ai++;
1066: }
1068: VecRestoreArray(xx,&x);
1069: VecRestoreArray(zz,&z);
1071: PetscLogFlops(98*(a->nz*2 - nonzerorow));
1072: return(0);
1073: }
1077: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1078: {
1079: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1080: PetscScalar *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1081: MatScalar *v;
1083: PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
1084: PetscInt nonzerorow=0;
1087: VecCopy_Seq(yy,zz);
1088: VecGetArray(xx,&x); x_ptr=x;
1089: VecGetArray(zz,&z); z_ptr=z;
1091: aj = a->j;
1092: v = a->a;
1093: ii = a->i;
1095: if (!a->mult_work) {
1096: PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);
1097: }
1098: work = a->mult_work;
1099:
1100:
1101: for (i=0; i<mbs; i++) {
1102: n = ii[1] - ii[0]; ncols = n*bs;
1103: workt = work; idx=aj+ii[0];
1104: nonzerorow += (n>0);
1106: /* upper triangular part */
1107: for (j=0; j<n; j++) {
1108: xb = x_ptr + bs*(*idx++);
1109: for (k=0; k<bs; k++) workt[k] = xb[k];
1110: workt += bs;
1111: }
1112: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1113: Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1115: /* strict lower triangular part */
1116: idx = aj+ii[0];
1117: if (*idx == i){
1118: ncols -= bs; v += bs2; idx++; n--;
1119: }
1120: if (ncols > 0){
1121: workt = work;
1122: PetscMemzero(workt,ncols*sizeof(PetscScalar));
1123: Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1124: for (j=0; j<n; j++) {
1125: zb = z_ptr + bs*(*idx++);
1126: for (k=0; k<bs; k++) zb[k] += workt[k] ;
1127: workt += bs;
1128: }
1129: }
1131: x += bs; v += n*bs2; z += bs; ii++;
1132: }
1134: VecRestoreArray(xx,&x);
1135: VecRestoreArray(zz,&z);
1137: PetscLogFlops(2*(a->nz*2 - nonzerorow));
1138: return(0);
1139: }
1143: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1144: {
1145: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1146: PetscScalar oalpha = alpha;
1148: PetscBLASInt one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
1151: BLASscal_(&totalnz,&oalpha,a->a,&one);
1152: PetscLogFlops(totalnz);
1153: return(0);
1154: }
1158: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1159: {
1160: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1161: MatScalar *v = a->a;
1162: PetscReal sum_diag = 0.0, sum_off = 0.0, *sum;
1163: PetscInt i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1165: PetscInt *jl,*il,jmin,jmax,nexti,ik,*col;
1166:
1168: if (type == NORM_FROBENIUS) {
1169: for (k=0; k<mbs; k++){
1170: jmin = a->i[k]; jmax = a->i[k+1];
1171: col = aj + jmin;
1172: if (*col == k){ /* diagonal block */
1173: for (i=0; i<bs2; i++){
1174: #if defined(PETSC_USE_COMPLEX)
1175: sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1176: #else
1177: sum_diag += (*v)*(*v); v++;
1178: #endif
1179: }
1180: jmin++;
1181: }
1182: for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */
1183: for (i=0; i<bs2; i++){
1184: #if defined(PETSC_USE_COMPLEX)
1185: sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1186: #else
1187: sum_off += (*v)*(*v); v++;
1188: #endif
1189: }
1190: }
1191: }
1192: *norm = sqrt(sum_diag + 2*sum_off);
1193: } else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1194: PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);
1195: jl = il + mbs;
1196: sum = (PetscReal*)(jl + mbs);
1197: for (i=0; i<mbs; i++) jl[i] = mbs;
1198: il[0] = 0;
1200: *norm = 0.0;
1201: for (k=0; k<mbs; k++) { /* k_th block row */
1202: for (j=0; j<bs; j++) sum[j]=0.0;
1203: /*-- col sum --*/
1204: i = jl[k]; /* first |A(i,k)| to be added */
1205: /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1206: at step k */
1207: while (i<mbs){
1208: nexti = jl[i]; /* next block row to be added */
1209: ik = il[i]; /* block index of A(i,k) in the array a */
1210: for (j=0; j<bs; j++){
1211: v = a->a + ik*bs2 + j*bs;
1212: for (k1=0; k1<bs; k1++) {
1213: sum[j] += PetscAbsScalar(*v); v++;
1214: }
1215: }
1216: /* update il, jl */
1217: jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1218: jmax = a->i[i+1];
1219: if (jmin < jmax){
1220: il[i] = jmin;
1221: j = a->j[jmin];
1222: jl[i] = jl[j]; jl[j]=i;
1223: }
1224: i = nexti;
1225: }
1226: /*-- row sum --*/
1227: jmin = a->i[k]; jmax = a->i[k+1];
1228: for (i=jmin; i<jmax; i++) {
1229: for (j=0; j<bs; j++){
1230: v = a->a + i*bs2 + j;
1231: for (k1=0; k1<bs; k1++){
1232: sum[j] += PetscAbsScalar(*v); v += bs;
1233: }
1234: }
1235: }
1236: /* add k_th block row to il, jl */
1237: col = aj+jmin;
1238: if (*col == k) jmin++;
1239: if (jmin < jmax){
1240: il[k] = jmin;
1241: j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1242: }
1243: for (j=0; j<bs; j++){
1244: if (sum[j] > *norm) *norm = sum[j];
1245: }
1246: }
1247: PetscFree(il);
1248: } else {
1249: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1250: }
1251: return(0);
1252: }
1256: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1257: {
1258: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1263: /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1264: if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1265: *flg = PETSC_FALSE;
1266: return(0);
1267: }
1268:
1269: /* if the a->i are the same */
1270: PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1271: if (!*flg) {
1272: return(0);
1273: }
1274:
1275: /* if a->j are the same */
1276: PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1277: if (!*flg) {
1278: return(0);
1279: }
1280: /* if a->a are the same */
1281: PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);
1282: return(0);
1283: }
1287: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1288: {
1289: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1291: PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1292: PetscScalar *x,zero = 0.0;
1293: MatScalar *aa,*aa_j;
1296: bs = A->rmap->bs;
1297: if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1298:
1299: aa = a->a;
1300: ai = a->i;
1301: aj = a->j;
1302: ambs = a->mbs;
1303: bs2 = a->bs2;
1305: VecSet(v,zero);
1306: VecGetArray(v,&x);
1307: VecGetLocalSize(v,&n);
1308: if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1309: for (i=0; i<ambs; i++) {
1310: j=ai[i];
1311: if (aj[j] == i) { /* if this is a diagonal element */
1312: row = i*bs;
1313: aa_j = aa + j*bs2;
1314: if (A->factor && bs==1){
1315: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1316: } else {
1317: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1318: }
1319: }
1320: }
1321:
1322: VecRestoreArray(v,&x);
1323: return(0);
1324: }
1328: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1329: {
1330: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1331: PetscScalar *l,x,*li,*ri;
1332: MatScalar *aa,*v;
1334: PetscInt i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1335: PetscTruth flg;
1338: if (ll != rr){
1339: VecEqual(ll,rr,&flg);
1340: if (!flg)
1341: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1342: }
1343: if (!ll) return(0);
1344: ai = a->i;
1345: aj = a->j;
1346: aa = a->a;
1347: m = A->rmap->N;
1348: bs = A->rmap->bs;
1349: mbs = a->mbs;
1350: bs2 = a->bs2;
1352: VecGetArray(ll,&l);
1353: VecGetLocalSize(ll,&lm);
1354: if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1355: for (i=0; i<mbs; i++) { /* for each block row */
1356: M = ai[i+1] - ai[i];
1357: li = l + i*bs;
1358: v = aa + bs2*ai[i];
1359: for (j=0; j<M; j++) { /* for each block */
1360: ri = l + bs*aj[ai[i]+j];
1361: for (k=0; k<bs; k++) {
1362: x = ri[k];
1363: for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1364: }
1365: }
1366: }
1367: VecRestoreArray(ll,&l);
1368: PetscLogFlops(2*a->nz);
1369: return(0);
1370: }
1374: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1375: {
1376: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1379: info->block_size = a->bs2;
1380: info->nz_allocated = a->maxnz; /*num. of nonzeros in upper triangular part */
1381: info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1382: info->nz_unneeded = (double)(info->nz_allocated - info->nz_used);
1383: info->assemblies = A->num_ass;
1384: info->mallocs = a->reallocs;
1385: info->memory = ((PetscObject)A)->mem;
1386: if (A->factor) {
1387: info->fill_ratio_given = A->info.fill_ratio_given;
1388: info->fill_ratio_needed = A->info.fill_ratio_needed;
1389: info->factor_mallocs = A->info.factor_mallocs;
1390: } else {
1391: info->fill_ratio_given = 0;
1392: info->fill_ratio_needed = 0;
1393: info->factor_mallocs = 0;
1394: }
1395: return(0);
1396: }
1401: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1402: {
1403: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1407: PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1408: return(0);
1409: }
1413: /*
1414: This code does not work since it only checks the upper triangular part of
1415: the matrix. Hence it is not listed in the function table.
1416: */
1417: PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1418: {
1419: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1421: PetscInt i,j,n,row,col,bs,*ai,*aj,mbs;
1422: PetscReal atmp;
1423: MatScalar *aa;
1424: PetscScalar *x;
1425: PetscInt ncols,brow,bcol,krow,kcol;
1428: if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1429: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1430: bs = A->rmap->bs;
1431: aa = a->a;
1432: ai = a->i;
1433: aj = a->j;
1434: mbs = a->mbs;
1436: VecSet(v,0.0);
1437: VecGetArray(v,&x);
1438: VecGetLocalSize(v,&n);
1439: if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1440: for (i=0; i<mbs; i++) {
1441: ncols = ai[1] - ai[0]; ai++;
1442: brow = bs*i;
1443: for (j=0; j<ncols; j++){
1444: bcol = bs*(*aj);
1445: for (kcol=0; kcol<bs; kcol++){
1446: col = bcol + kcol; /* col index */
1447: for (krow=0; krow<bs; krow++){
1448: atmp = PetscAbsScalar(*aa); aa++;
1449: row = brow + krow; /* row index */
1450: if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1451: if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1452: }
1453: }
1454: aj++;
1455: }
1456: }
1457: VecRestoreArray(v,&x);
1458: return(0);
1459: }