Actual source code: csrperm.c
1: #define PETSCMAT_DLL
3: /*
4: Defines basic operations for the MATSEQCSRPERM matrix class.
5: This class is derived from the MATSEQAIJ class and retains the
6: compressed row storage (aka Yale sparse matrix format) but augments
7: it with some permutation information that enables some operations
8: to be more vectorizable. A physically rearranged copy of the matrix
9: may be stored if the user desires.
11: Eventually a variety of permutations may be supported.
12: */
14: #include ../src/mat/impls/aij/seq/aij.h
16: #define NDIM 512
17: /* NDIM specifies how many rows at a time we should work with when
18: * performing the vectorized mat-vec. This depends on various factors
19: * such as vector register length, etc., and I really need to add a
20: * way for the user (or the library) to tune this. I'm setting it to
21: * 512 for now since that is what Ed D'Azevedo was using in his Fortran
22: * routines. */
24: typedef struct {
25: PetscInt ngroup;
26: PetscInt *xgroup;
27: /* Denotes where groups of rows with same number of nonzeros
28: * begin and end, i.e., xgroup[i] gives us the position in iperm[]
29: * where the ith group begins. */
30: PetscInt *nzgroup; /* how many nonzeros each row that is a member of group i has. */
31: PetscInt *iperm; /* The permutation vector. */
33: /* Flag that indicates whether we need to clean up permutation
34: * information during the MatDestroy. */
35: PetscTruth CleanUpCSRPERM;
37: /* Some of this stuff is for Ed's recursive triangular solve.
38: * I'm not sure what I need yet. */
39: PetscInt blocksize;
40: PetscInt nstep;
41: PetscInt *jstart_list;
42: PetscInt *jend_list;
43: PetscInt *action_list;
44: PetscInt *ngroup_list;
45: PetscInt **ipointer_list;
46: PetscInt **xgroup_list;
47: PetscInt **nzgroup_list;
48: PetscInt **iperm_list;
50: /* We need to keep a pointer to MatAssemblyEnd_SeqAIJ because we
51: * actually want to call this function from within the
52: * MatAssemblyEnd_SeqCSRPERM function. Similarly, we also need
53: * MatDestroy_SeqAIJ and MatDuplicate_SeqAIJ. */
54: PetscErrorCode (*AssemblyEnd_SeqAIJ)(Mat,MatAssemblyType);
55: PetscErrorCode (*MatDestroy_SeqAIJ)(Mat);
56: PetscErrorCode (*MatDuplicate_SeqAIJ)(Mat,MatDuplicateOption,Mat*);
57: } Mat_SeqCSRPERM;
62: PetscErrorCode MatConvert_SeqCSRPERM_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
63: {
64: /* This routine is only called to convert a MATCSRPERM to its base PETSc type, */
65: /* so we will ignore 'MatType type'. */
67: Mat B = *newmat;
68: Mat_SeqCSRPERM *csrperm=(Mat_SeqCSRPERM*)A->spptr;
71: if (reuse == MAT_INITIAL_MATRIX) {
72: MatDuplicate(A,MAT_COPY_VALUES,&B);
73: }
75: /* Reset the original function pointers. */
76: B->ops->assemblyend = csrperm->AssemblyEnd_SeqAIJ;
77: B->ops->destroy = csrperm->MatDestroy_SeqAIJ;
78: B->ops->duplicate = csrperm->MatDuplicate_SeqAIJ;
80: /* Free everything in the Mat_SeqCSRPERM data structure.
81: * We don't free the Mat_SeqCSRPERM struct itself, as this will
82: * cause problems later when MatDestroy() tries to free it. */
83: if(csrperm->CleanUpCSRPERM) {
84: PetscFree(csrperm->xgroup);
85: PetscFree(csrperm->nzgroup);
86: PetscFree(csrperm->iperm);
87: }
89: /* Change the type of B to MATSEQAIJ. */
90: PetscObjectChangeTypeName( (PetscObject)B, MATSEQAIJ);
91:
92: *newmat = B;
93: return(0);
94: }
99: PetscErrorCode MatDestroy_SeqCSRPERM(Mat A)
100: {
102: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
104: /* We are going to convert A back into a SEQAIJ matrix, since we are
105: * eventually going to use MatDestroy_SeqAIJ() to destroy everything
106: * that is not specific to CSRPERM.
107: * In preparation for this, reset the operations pointers in A to
108: * their SeqAIJ versions. */
109: A->ops->assemblyend = csrperm->AssemblyEnd_SeqAIJ;
110: /* I suppose I don't actually need to point A->ops->assemblyend back
111: * to the right thing, but I do it anyway for completeness. */
112: A->ops->destroy = csrperm->MatDestroy_SeqAIJ;
113: A->ops->duplicate = csrperm->MatDuplicate_SeqAIJ;
115: /* Free everything in the Mat_SeqCSRPERM data structure.
116: * Note that we don't need to free the Mat_SeqCSRPERM struct
117: * itself, as MatDestroy() will do so. */
118: if(csrperm->CleanUpCSRPERM) {
119: PetscFree(csrperm->xgroup);
120: PetscFree(csrperm->nzgroup);
121: PetscFree(csrperm->iperm);
122: }
124: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
125: * to destroy everything that remains. */
126: PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);
127: /* Note that I don't call MatSetType(). I believe this is because that
128: * is only to be called when *building* a matrix. I could be wrong, but
129: * that is how things work for the SuperLU matrix class. */
130: (*A->ops->destroy)(A);
131: return(0);
132: }
134: PetscErrorCode MatDuplicate_SeqCSRPERM(Mat A, MatDuplicateOption op, Mat *M)
135: {
137: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
138: Mat_SeqCSRPERM *csrperm_dest = (Mat_SeqCSRPERM *) (*M)->spptr;
141: (*csrperm->MatDuplicate_SeqAIJ)(A,op,M);
142: PetscMemcpy((*M)->spptr,csrperm,sizeof(Mat_SeqCSRPERM));
143: /* Allocate space for, and copy the grouping and permutation info.
144: * I note that when the groups are initially determined in
145: * SeqCSRPERM_create_perm, xgroup and nzgroup may be sized larger than
146: * necessary. But at this point, we know how large they need to be, and
147: * allocate only the necessary amount of memory. So the duplicated matrix
148: * may actually use slightly less storage than the original! */
149: PetscMalloc(A->rmap->n*sizeof(PetscInt), csrperm_dest->iperm);
150: PetscMalloc((csrperm->ngroup+1)*sizeof(PetscInt), csrperm_dest->xgroup);
151: PetscMalloc((csrperm->ngroup)*sizeof(PetscInt), csrperm_dest->nzgroup);
152: PetscMemcpy(csrperm_dest->iperm,csrperm->iperm,sizeof(PetscInt)*A->rmap->n);
153: PetscMemcpy(csrperm_dest->xgroup,csrperm->xgroup,sizeof(PetscInt)*(csrperm->ngroup+1));
154: PetscMemcpy(csrperm_dest->nzgroup,csrperm->nzgroup,sizeof(PetscInt)*csrperm->ngroup);
155: return(0);
156: }
160: PetscErrorCode SeqCSRPERM_create_perm(Mat A)
161: {
162: PetscInt m; /* Number of rows in the matrix. */
163: Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data;
164: PetscInt *ia; /* From the CSR representation; points to the beginning of each row. */
165: PetscInt maxnz; /* Maximum number of nonzeros in any row. */
166: PetscInt *rows_in_bucket;
167: /* To construct the permutation, we sort each row into one of maxnz
168: * buckets based on how many nonzeros are in the row. */
169: PetscInt nz;
170: PetscInt *nz_in_row; /* the number of nonzero elements in row k. */
171: PetscInt *ipnz;
172: /* When constructing the iperm permutation vector,
173: * ipnz[nz] is used to point to the next place in the permutation vector
174: * that a row with nz nonzero elements should be placed.*/
175: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM*) A->spptr;
176: /* Points to the MATSEQCSRPERM-specific data in the matrix B. */
178: PetscInt i, ngroup, istart, ipos;
180: /* I really ought to put something in here to check if B is of
181: * type MATSEQCSRPERM and return an error code if it is not.
182: * Come back and do this! */
183:
184: m = A->rmap->n;
185: ia = a->i;
186:
187: /* Allocate the arrays that will hold the permutation vector. */
188: PetscMalloc(m*sizeof(PetscInt), &csrperm->iperm);
190: /* Allocate some temporary work arrays that will be used in
191: * calculating the permuation vector and groupings. */
192: PetscMalloc((m+1)*sizeof(PetscInt), &rows_in_bucket);
193: PetscMalloc((m+1)*sizeof(PetscInt), &ipnz);
194: PetscMalloc(m*sizeof(PetscInt), &nz_in_row);
196: /* Now actually figure out the permutation and grouping. */
198: /* First pass: Determine number of nonzeros in each row, maximum
199: * number of nonzeros in any row, and how many rows fall into each
200: * "bucket" of rows with same number of nonzeros. */
201: maxnz = 0;
202: for (i=0; i<m; i++) {
203: nz_in_row[i] = ia[i+1]-ia[i];
204: if (nz_in_row[i] > maxnz) maxnz = nz_in_row[i];
205: }
207: for (i=0; i<=maxnz; i++) {
208: rows_in_bucket[i] = 0;
209: }
210: for (i=0; i<m; i++) {
211: nz = nz_in_row[i];
212: rows_in_bucket[nz]++;
213: }
215: /* Allocate space for the grouping info. There will be at most (maxnz + 1)
216: * groups. (It is maxnz + 1 instead of simply maxnz because there may be
217: * rows with no nonzero elements.) If there are (maxnz + 1) groups,
218: * then xgroup[] must consist of (maxnz + 2) elements, since the last
219: * element of xgroup will tell us where the (maxnz + 1)th group ends.
220: * We allocate space for the maximum number of groups;
221: * that is potentially a little wasteful, but not too much so.
222: * Perhaps I should fix it later. */
223: PetscMalloc((maxnz+2)*sizeof(PetscInt), &csrperm->xgroup);
224: PetscMalloc((maxnz+1)*sizeof(PetscInt), &csrperm->nzgroup);
226: /* Second pass. Look at what is in the buckets and create the groupings.
227: * Note that it is OK to have a group of rows with no non-zero values. */
228: ngroup = 0;
229: istart = 0;
230: for (i=0; i<=maxnz; i++) {
231: if (rows_in_bucket[i] > 0) {
232: csrperm->nzgroup[ngroup] = i;
233: csrperm->xgroup[ngroup] = istart;
234: ngroup++;
235: istart += rows_in_bucket[i];
236: }
237: }
239: csrperm->xgroup[ngroup] = istart;
240: csrperm->ngroup = ngroup;
242: /* Now fill in the permutation vector iperm. */
243: ipnz[0] = 0;
244: for (i=0; i<maxnz; i++) {
245: ipnz[i+1] = ipnz[i] + rows_in_bucket[i];
246: }
248: for (i=0; i<m; i++) {
249: nz = nz_in_row[i];
250: ipos = ipnz[nz];
251: csrperm->iperm[ipos] = i;
252: ipnz[nz]++;
253: }
255: /* Clean up temporary work arrays. */
256: PetscFree(rows_in_bucket);
257: PetscFree(ipnz);
258: PetscFree(nz_in_row);
260: /* Since we've allocated some memory to hold permutation info,
261: * flip the CleanUpCSRPERM flag to true. */
262: csrperm->CleanUpCSRPERM = PETSC_TRUE;
263: return(0);
264: }
269: PetscErrorCode MatAssemblyEnd_SeqCSRPERM(Mat A, MatAssemblyType mode)
270: {
272: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM*) A->spptr;
273: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
275: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
276:
277: /* Since a MATSEQCSRPERM matrix is really just a MATSEQAIJ with some
278: * extra information, call the AssemblyEnd routine for a MATSEQAIJ.
279: * I'm not sure if this is the best way to do this, but it avoids
280: * a lot of code duplication.
281: * I also note that currently MATSEQCSRPERM doesn't know anything about
282: * the Mat_CompressedRow data structure that SeqAIJ now uses when there
283: * are many zero rows. If the SeqAIJ assembly end routine decides to use
284: * this, this may break things. (Don't know... haven't looked at it.) */
285: a->inode.use = PETSC_FALSE;
286: (*csrperm->AssemblyEnd_SeqAIJ)(A, mode);
288: /* Now calculate the permutation and grouping information. */
289: SeqCSRPERM_create_perm(A);
290: return(0);
291: }
293: #include ../src/inline/dot.h
297: PetscErrorCode MatMult_SeqCSRPERM(Mat A,Vec xx,Vec yy)
298: {
299: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
300: PetscScalar *x,*y;
301: const MatScalar *aa;
302: PetscErrorCode ierr;
303: PetscInt *aj,*ai;
304: #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM) && defined(notworking))
305: PetscInt i,j;
306: #endif
308: /* Variables that don't appear in MatMult_SeqAIJ. */
309: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
310: PetscInt *iperm; /* Points to the permutation vector. */
311: PetscInt *xgroup;
312: /* Denotes where groups of rows with same number of nonzeros
313: * begin and end in iperm. */
314: PetscInt *nzgroup;
315: PetscInt ngroup;
316: PetscInt igroup;
317: PetscInt jstart,jend;
318: /* jstart is used in loops to denote the position in iperm where a
319: * group starts; jend denotes the position where it ends.
320: * (jend + 1 is where the next group starts.) */
321: PetscInt iold,nz;
322: PetscInt istart,iend,isize;
323: PetscInt ipos;
324: PetscScalar yp[NDIM];
325: PetscInt ip[NDIM]; /* yp[] and ip[] are treated as vector "registers" for performing the mat-vec. */
327: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
328: #pragma disjoint(*x,*y,*aa)
329: #endif
332: VecGetArray(xx,&x);
333: VecGetArray(yy,&y);
334: aj = a->j; /* aj[k] gives column index for element aa[k]. */
335: aa = a->a; /* Nonzero elements stored row-by-row. */
336: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
338: /* Get the info we need about the permutations and groupings. */
339: iperm = csrperm->iperm;
340: ngroup = csrperm->ngroup;
341: xgroup = csrperm->xgroup;
342: nzgroup = csrperm->nzgroup;
343:
344: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM) && defined(notworking)
345: fortranmultcsrperm_(&m,x,ii,aj,aa,y);
346: #else
348: for (igroup=0; igroup<ngroup; igroup++) {
349: jstart = xgroup[igroup];
350: jend = xgroup[igroup+1] - 1;
351: nz = nzgroup[igroup];
353: /* Handle the special cases where the number of nonzeros per row
354: * in the group is either 0 or 1. */
355: if (nz == 0) {
356: for (i=jstart; i<=jend; i++) {
357: y[iperm[i]] = 0.0;
358: }
359: } else if (nz == 1) {
360: for (i=jstart; i<=jend; i++) {
361: iold = iperm[i];
362: ipos = ai[iold];
363: y[iold] = aa[ipos] * x[aj[ipos]];
364: }
365: } else {
366:
367: /* We work our way through the current group in chunks of NDIM rows
368: * at a time. */
370: for (istart=jstart; istart<=jend; istart+=NDIM) {
371: /* Figure out where the chunk of 'isize' rows ends in iperm.
372: * 'isize may of course be less than NDIM for the last chunk. */
373: iend = istart + (NDIM - 1);
374: if (iend > jend) { iend = jend; }
375: isize = iend - istart + 1;
377: /* Initialize the yp[] array that will be used to hold part of
378: * the permuted results vector, and figure out where in aa each
379: * row of the chunk will begin. */
380: for (i=0; i<isize; i++) {
381: iold = iperm[istart + i];
382: /* iold is a row number from the matrix A *before* reordering. */
383: ip[i] = ai[iold];
384: /* ip[i] tells us where the ith row of the chunk begins in aa. */
385: yp[i] = (PetscScalar) 0.0;
386: }
388: /* If the number of zeros per row exceeds the number of rows in
389: * the chunk, we should vectorize along nz, that is, perform the
390: * mat-vec one row at a time as in the usual CSR case. */
391: if (nz > isize) {
392: #if defined(PETSC_HAVE_CRAYC)
393: #pragma _CRI preferstream
394: #endif
395: for (i=0; i<isize; i++) {
396: #if defined(PETSC_HAVE_CRAYC)
397: #pragma _CRI prefervector
398: #endif
399: for (j=0; j<nz; j++) {
400: ipos = ip[i] + j;
401: yp[i] += aa[ipos] * x[aj[ipos]];
402: }
403: }
404: } else {
405: /* Otherwise, there are enough rows in the chunk to make it
406: * worthwhile to vectorize across the rows, that is, to do the
407: * matvec by operating with "columns" of the chunk. */
408: for (j=0; j<nz; j++) {
409: for(i=0; i<isize; i++) {
410: ipos = ip[i] + j;
411: yp[i] += aa[ipos] * x[aj[ipos]];
412: }
413: }
414: }
416: #if defined(PETSC_HAVE_CRAYC)
417: #pragma _CRI ivdep
418: #endif
419: /* Put results from yp[] into non-permuted result vector y. */
420: for (i=0; i<isize; i++) {
421: y[iperm[istart+i]] = yp[i];
422: }
423: } /* End processing chunk of isize rows of a group. */
424: } /* End handling matvec for chunk with nz > 1. */
425: } /* End loop over igroup. */
426: #endif
427: PetscLogFlops(2*a->nz - A->rmap->n);
428: VecRestoreArray(xx,&x);
429: VecRestoreArray(yy,&y);
430: return(0);
431: }
434: /* MatMultAdd_SeqCSRPERM() calculates yy = ww + A * xx.
435: * Note that the names I used to designate the vectors differs from that
436: * used in MatMultAdd_SeqAIJ(). I did this to keep my notation consistent
437: * with the MatMult_SeqCSRPERM() routine, which is very similar to this one. */
438: /*
439: I hate having virtually identical code for the mult and the multadd!!!
440: */
443: PetscErrorCode MatMultAdd_SeqCSRPERM(Mat A,Vec xx,Vec ww,Vec yy)
444: {
445: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
446: PetscScalar *x,*y,*w;
447: const MatScalar *aa;
448: PetscErrorCode ierr;
449: PetscInt *aj,*ai;
450: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTADDCSRPERM)
451: PetscInt i,j;
452: #endif
454: /* Variables that don't appear in MatMultAdd_SeqAIJ. */
455: Mat_SeqCSRPERM *csrperm;
456: PetscInt *iperm; /* Points to the permutation vector. */
457: PetscInt *xgroup;
458: /* Denotes where groups of rows with same number of nonzeros
459: * begin and end in iperm. */
460: PetscInt *nzgroup;
461: PetscInt ngroup;
462: PetscInt igroup;
463: PetscInt jstart,jend;
464: /* jstart is used in loops to denote the position in iperm where a
465: * group starts; jend denotes the position where it ends.
466: * (jend + 1 is where the next group starts.) */
467: PetscInt iold,nz;
468: PetscInt istart,iend,isize;
469: PetscInt ipos;
470: PetscScalar yp[NDIM];
471: PetscInt ip[NDIM];
472: /* yp[] and ip[] are treated as vector "registers" for performing
473: * the mat-vec. */
475: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
476: #pragma disjoint(*x,*y,*aa)
477: #endif
480: VecGetArray(xx,&x);
481: VecGetArray(yy,&y);
482: if (yy != ww) {
483: VecGetArray(ww,&w);
484: } else {
485: w = y;
486: }
488: aj = a->j; /* aj[k] gives column index for element aa[k]. */
489: aa = a->a; /* Nonzero elements stored row-by-row. */
490: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
492: /* Get the info we need about the permutations and groupings. */
493: csrperm = (Mat_SeqCSRPERM *) A->spptr;
494: iperm = csrperm->iperm;
495: ngroup = csrperm->ngroup;
496: xgroup = csrperm->xgroup;
497: nzgroup = csrperm->nzgroup;
498:
499: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDCSRPERM)
500: fortranmultaddcsrperm_(&m,x,ii,aj,aa,y,w);
501: #else
503: for(igroup=0; igroup<ngroup; igroup++) {
504: jstart = xgroup[igroup];
505: jend = xgroup[igroup+1] - 1;
507: nz = nzgroup[igroup];
509: /* Handle the special cases where the number of nonzeros per row
510: * in the group is either 0 or 1. */
511: if(nz == 0) {
512: for(i=jstart; i<=jend; i++) {
513: iold = iperm[i];
514: y[iold] = w[iold];
515: }
516: }
517: else if(nz == 1) {
518: for(i=jstart; i<=jend; i++) {
519: iold = iperm[i];
520: ipos = ai[iold];
521: y[iold] = w[iold] + aa[ipos] * x[aj[ipos]];
522: }
523: }
524: /* For the general case: */
525: else {
526:
527: /* We work our way through the current group in chunks of NDIM rows
528: * at a time. */
530: for(istart=jstart; istart<=jend; istart+=NDIM) {
531: /* Figure out where the chunk of 'isize' rows ends in iperm.
532: * 'isize may of course be less than NDIM for the last chunk. */
533: iend = istart + (NDIM - 1);
534: if(iend > jend) { iend = jend; }
535: isize = iend - istart + 1;
537: /* Initialize the yp[] array that will be used to hold part of
538: * the permuted results vector, and figure out where in aa each
539: * row of the chunk will begin. */
540: for(i=0; i<isize; i++) {
541: iold = iperm[istart + i];
542: /* iold is a row number from the matrix A *before* reordering. */
543: ip[i] = ai[iold];
544: /* ip[i] tells us where the ith row of the chunk begins in aa. */
545: yp[i] = w[iold];
546: }
548: /* If the number of zeros per row exceeds the number of rows in
549: * the chunk, we should vectorize along nz, that is, perform the
550: * mat-vec one row at a time as in the usual CSR case. */
551: if(nz > isize) {
552: #if defined(PETSC_HAVE_CRAYC)
553: #pragma _CRI preferstream
554: #endif
555: for(i=0; i<isize; i++) {
556: #if defined(PETSC_HAVE_CRAYC)
557: #pragma _CRI prefervector
558: #endif
559: for(j=0; j<nz; j++) {
560: ipos = ip[i] + j;
561: yp[i] += aa[ipos] * x[aj[ipos]];
562: }
563: }
564: }
565: /* Otherwise, there are enough rows in the chunk to make it
566: * worthwhile to vectorize across the rows, that is, to do the
567: * matvec by operating with "columns" of the chunk. */
568: else {
569: for(j=0; j<nz; j++) {
570: for(i=0; i<isize; i++) {
571: ipos = ip[i] + j;
572: yp[i] += aa[ipos] * x[aj[ipos]];
573: }
574: }
575: }
577: #if defined(PETSC_HAVE_CRAYC)
578: #pragma _CRI ivdep
579: #endif
580: /* Put results from yp[] into non-permuted result vector y. */
581: for(i=0; i<isize; i++) {
582: y[iperm[istart+i]] = yp[i];
583: }
584: } /* End processing chunk of isize rows of a group. */
585:
586: } /* End handling matvec for chunk with nz > 1. */
587: } /* End loop over igroup. */
589: #endif
590: PetscLogFlops(2*a->nz - A->rmap->n);
591: VecRestoreArray(xx,&x);
592: VecRestoreArray(yy,&y);
593: if (yy != ww) {
594: VecRestoreArray(ww,&w);
595: }
596: return(0);
597: }
600: /* MatConvert_SeqAIJ_SeqCSRPERM converts a SeqAIJ matrix into a
601: * SeqCSRPERM matrix. This routine is called by the MatCreate_SeqCSRPERM()
602: * routine, but can also be used to convert an assembled SeqAIJ matrix
603: * into a SeqCSRPERM one. */
607: PetscErrorCode MatConvert_SeqAIJ_SeqCSRPERM(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
608: {
610: Mat B = *newmat;
611: Mat_SeqCSRPERM *csrperm;
614: if (reuse == MAT_INITIAL_MATRIX) {
615: MatDuplicate(A,MAT_COPY_VALUES,&B);
616: }
618: PetscNewLog(B,Mat_SeqCSRPERM,&csrperm);
619: B->spptr = (void *) csrperm;
621: csrperm->AssemblyEnd_SeqAIJ = A->ops->assemblyend;
622: csrperm->MatDestroy_SeqAIJ = A->ops->destroy;
623: csrperm->MatDuplicate_SeqAIJ = A->ops->duplicate;
625: /* Set function pointers for methods that we inherit from AIJ but override. */
626: B->ops->duplicate = MatDuplicate_SeqCSRPERM;
627: B->ops->assemblyend = MatAssemblyEnd_SeqCSRPERM;
628: B->ops->destroy = MatDestroy_SeqCSRPERM;
629: B->ops->mult = MatMult_SeqCSRPERM;
630: B->ops->multadd = MatMultAdd_SeqCSRPERM;
632: /* If A has already been assembled, compute the permutation. */
633: if (A->assembled == PETSC_TRUE) {
634: SeqCSRPERM_create_perm(B);
635: }
636:
637: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqcsrperm_seqaij_C",
638: "MatConvert_SeqCSRPERM_SeqAIJ",MatConvert_SeqCSRPERM_SeqAIJ);
640: PetscObjectChangeTypeName((PetscObject)B,MATSEQCSRPERM);
641: *newmat = B;
642: return(0);
643: }
649: /*@C
650: MatCreateSeqCSRPERM - Creates a sparse matrix of type SEQCSRPERM.
651: This type inherits from AIJ, but calculates some additional permutation
652: information that is used to allow better vectorization of some
653: operations. At the cost of increased storage, the AIJ formatted
654: matrix can be copied to a format in which pieces of the matrix are
655: stored in ELLPACK format, allowing the vectorized matrix multiply
656: routine to use stride-1 memory accesses. As with the AIJ type, it is
657: important to preallocate matrix storage in order to get good assembly
658: performance.
659:
660: Collective on MPI_Comm
662: Input Parameters:
663: + comm - MPI communicator, set to PETSC_COMM_SELF
664: . m - number of rows
665: . n - number of columns
666: . nz - number of nonzeros per row (same for all rows)
667: - nnz - array containing the number of nonzeros in the various rows
668: (possibly different for each row) or PETSC_NULL
670: Output Parameter:
671: . A - the matrix
673: Notes:
674: If nnz is given then nz is ignored
676: Level: intermediate
678: .keywords: matrix, cray, sparse, parallel
680: .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues()
681: @*/
682: PetscErrorCode MatCreateSeqCSRPERM(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
683: {
687: MatCreate(comm,A);
688: MatSetSizes(*A,m,n,m,n);
689: MatSetType(*A,MATSEQCSRPERM);
690: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);
691: return(0);
692: }
698: PetscErrorCode MatCreate_SeqCSRPERM(Mat A)
699: {
703: MatSetType(A,MATSEQAIJ);
704: MatConvert_SeqAIJ_SeqCSRPERM(A,MATSEQCSRPERM,MAT_REUSE_MATRIX,&A);
705: return(0);
706: }